/* * Copyright 2010 Stefan Lankes, Chair for Operating Systems, * RWTH Aachen University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * This file is part of MetalSVM. */ /** * @author Stefan Lankes * @file kernel/tasks.c * @brief Implementations of task loading, killing, scheduling. * * This files contains all the implementations of different functions * to start tasks with, wake them up, schedule them, etc. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include DEFINE_PER_CORE(task_t*, current_task, NULL); /** @brief Array of task structures * * A task's id will be its position in this array. */ static task_t task_table[MAX_TASKS] = {[0 ... MAX_TASKS-1] = {0, TASK_INVALID, ATOMIC_INIT(0), \ SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, FS_INIT, 0, 0, 0, 0}}; static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT; /** @brief helper function for the assembly code to determine the current task * @return Pointer to the task_t structure of current task */ task_t* get_current_task(void) { return per_core(current_task); } int multitasking_init(void) { if (task_table[0].status == TASK_INVALID) { task_table[0].id = 0; task_table[0].status = TASK_RUNNING; atomic_int32_set(&task_table[0].user_usage, 0); mailbox_wait_msg_init(&task_table[0].inbox); memset(task_table[0].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS); per_core(current_task) = task_table+0; per_core(current_task)->pgd = get_boot_pgd(); task_table[0].flags = TASK_DEFAULT_FLAGS; return 0; } return -ENOMEM; } /** @brief Wakeup tasks which are waiting for a message from the current one * * @param result Current task's resulting return value */ static void wakeup_blocked_tasks(int result) { wait_msg_t tmp = { per_core(current_task)->id, result }; unsigned int i; spinlock_irqsave_lock(&table_lock); /* wake up blocked tasks */ for(i=0; ioutbox[i]) { mailbox_wait_msg_post(per_core(current_task)->outbox[i], tmp); per_core(current_task)->outbox[i] = NULL; } } spinlock_irqsave_unlock(&table_lock); } /** @brief A procedure to be called by * procedures which are called by exiting tasks. */ static void NORETURN do_exit(int arg) { vma_t* tmp; kprintf("Terminate task: %u, return value %d\n", per_core(current_task)->id, arg); wakeup_blocked_tasks(arg); //vma_dump(per_core(current_task)); spinlock_lock(&(per_core(current_task)->vma_lock)); // remove memory regions while((tmp = per_core(current_task)->vma_list) != NULL) { kfree((void*) tmp->start, tmp->end - tmp->start + 1); per_core(current_task)->vma_list = tmp->next; kfree((void*) tmp, sizeof(vma_t)); } spinlock_unlock(&(per_core(current_task)->vma_lock)); drop_pgd(); // delete page directory and its page tables if (atomic_int32_read(&per_core(current_task)->user_usage)) kprintf("Memory leak! Task %d did not release %d pages\n", per_core(current_task)->id, atomic_int32_read(&per_core(current_task)->user_usage)); per_core(current_task)->status = TASK_FINISHED; reschedule(); kputs("Kernel panic: scheduler found no valid task\n"); while(1) { HALT; } } /** @brief A procedure to be called by kernel tasks */ void NORETURN leave_kernel_task(void) { int result; result = get_return_value(); do_exit(result); } /** @brief To be called by the systemcall to exit tasks */ void NORETURN sys_exit(int arg) { do_exit(arg); } /** @brief Aborting a task is like exiting it with result -1 */ void NORETURN abort(void) { do_exit(-1); } /** @brief Create a task with a specific entry point * * @param id Pointer to a tid_t struct were the id shall be set * @param ep Pointer to the function the task shall start with * @param arg Arguments list * @return * - 0 on success * - -ENOMEM (-12) or -EINVAL (-22) on failure */ static int create_task(tid_t* id, entry_point_t ep, void* arg) { int ret = -ENOMEM; unsigned int i; if (BUILTIN_EXPECT(!ep, 0)) return -EINVAL; spinlock_irqsave_lock(&table_lock); for(i=0; iid] = &per_core(current_task)->inbox; task_table[i].fildes_table[0].node = findnode_fs("/dev/stdin"); task_table[i].fildes_table[1].node = findnode_fs("/dev/stdout"); task_table[i].fildes_table[2].node = findnode_fs("/dev/stderr"); if (id) *id = i; ret = create_default_frame(task_table+i, ep, arg); task_table[i].flags = TASK_DEFAULT_FLAGS; task_table[i].start_heap = 0; task_table[i].end_heap = 0; task_table[i].start_tick = get_clock_tick(); task_table[i].status = TASK_READY; break; } } create_task_out: spinlock_irqsave_unlock(&table_lock); return ret; } int sys_fork(void) { int ret = -ENOMEM; unsigned int i; task_t* parent_task = per_core(current_task); vma_t** child; vma_t* parent; vma_t* tmp; spinlock_lock(&per_core(current_task)->vma_lock); spinlock_irqsave_lock(&table_lock); for(i=0; ivma_list; tmp = NULL; while(parent) { *child = (vma_t*) kmalloc(sizeof(vma_t)); if (BUILTIN_EXPECT(!child, 0)) break; (*child)->start = parent->start; (*child)->end = parent->end; (*child)->type = parent->type; (*child)->prev = tmp; (*child)->next = NULL; parent = parent->next; tmp = *child; child = &((*child)->next); } task_table[i].fildes_table[0].node = findnode_fs("/dev/stdin"); task_table[i].fildes_table[1].node = findnode_fs("/dev/stdout"); task_table[i].fildes_table[2].node = findnode_fs("/dev/stderr"); mailbox_wait_msg_init(&task_table[i].inbox); memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS); task_table[i].outbox[per_core(current_task)->id] = &per_core(current_task)->inbox; task_table[i].flags = per_core(current_task)->flags; memcpy(&(task_table[i].fpu), &(per_core(current_task)->fpu), sizeof(union fpu_state)); task_table[i].start_tick = get_clock_tick(); task_table[i].start_heap = 0; task_table[i].end_heap = 0; ret = arch_fork(task_table+i); if (parent_task != per_core(current_task)) { // Oh, the current task is the new child task! // Leave the function without releasing the locks // because the locks are already released // by the parent task! return 0; } if (!ret) { task_table[i].status = TASK_READY; ret = i; } break; } } create_task_out: spinlock_irqsave_unlock(&table_lock); spinlock_unlock(&per_core(current_task)->vma_lock); return ret; } int create_kernel_task(tid_t* id, entry_point_t ep, void* arg) { return create_task(id, ep, arg); } #define MAX_ARGS (PAGE_SIZE - 2*sizeof(int) - sizeof(vfs_node_t*)) /** @brief Structure which keeps all * relevant data for a new task to start */ typedef struct { /// Points to the node with the executable in the file system vfs_node_t* node; /// Argument count int argc; /// Environment var count int envc; /// Buffer for env and argv values char buffer[MAX_ARGS]; } load_args_t; /** @brief Internally used function to load tasks with a load_args_t structure * keeping all the information needed to launch. * * This is where the serious loading action is done. */ static int load_task(load_args_t* largs) { uint32_t i, offset, idx; uint32_t addr, npages, flags, stack = 0; elf_header_t header; elf_program_header_t prog_header; //elf_section_header_t sec_header; vfs_node_t* node; if (!largs) return -EINVAL; node = largs->node; if (!node) return -EINVAL; read_fs(node, (uint8_t*)&header, sizeof(elf_header_t), 0); if (BUILTIN_EXPECT(header.ident.magic != ELF_MAGIC, 0)) goto invalid; if (BUILTIN_EXPECT(header.type != ELF_ET_EXEC, 0)) goto invalid; if (BUILTIN_EXPECT(header.machine != ELF_EM_386, 0)) goto invalid; if (BUILTIN_EXPECT(header.ident._class != ELF_CLASS_32, 0)) goto invalid; if (BUILTIN_EXPECT(header.ident.data != ELF_DATA_2LSB, 0)) goto invalid; if (header.entry <= KERNEL_SPACE) goto invalid; // interpret program header table for (i=0; i> PAGE_SHIFT); if (prog_header.mem_size & (PAGE_SIZE-1)) npages++; addr = get_pages(npages); flags = MAP_USER_SPACE; if (prog_header.flags & PF_X) flags |= MAP_CODE; // map page frames in the address space of the current task if (!map_region(prog_header.virt_addr, addr, npages, flags)) kprintf("Could not map 0x%x at 0x%x\n", addr, prog_header.virt_addr); // clear pages memset((void*) prog_header.virt_addr, 0, npages*PAGE_SIZE); // set starting point of the heap if (per_core(current_task)->start_heap < prog_header.virt_addr+prog_header.mem_size) per_core(current_task)->start_heap = per_core(current_task)->end_heap = prog_header.virt_addr+prog_header.mem_size; // load program read_fs(node, (uint8_t*)prog_header.virt_addr, prog_header.file_size, prog_header.offset); flags = VMA_CACHEABLE; if (prog_header.flags & PF_R) flags |= VMA_READ; if (prog_header.flags & PF_W) flags |= VMA_WRITE; if (prog_header.flags & PF_X) flags |= VMA_EXECUTE; vma_add(per_core(current_task), prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE-1, flags); if (!(prog_header.flags & PF_W)) change_page_permissions(prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE-1, flags); break; case ELF_PT_GNU_STACK: // Indicates stack executability // create user-level stack npages = DEFAULT_STACK_SIZE >> PAGE_SHIFT; if (DEFAULT_STACK_SIZE & (PAGE_SIZE-1)) npages++; addr = get_pages(npages); stack = header.entry*2; // virtual address of the stack if (!map_region(stack, addr, npages, MAP_USER_SPACE)) { kprintf("Could not map stack at 0x%x\n", stack); return -ENOMEM; } memset((void*) stack, 0, npages*PAGE_SIZE); // create vma regions for the user-level stack flags = VMA_CACHEABLE; if (prog_header.flags & PF_R) flags |= VMA_READ; if (prog_header.flags & PF_W) flags |= VMA_WRITE; if (prog_header.flags & PF_X) flags |= VMA_EXECUTE; vma_add(per_core(current_task), stack, stack+npages*PAGE_SIZE-1, flags); break; } } #if 0 // interpret section header table for (i=0; ibuffer, MAX_ARGS); idx = offset; // push argv on the stack offset -= largs->argc * sizeof(char*); for(i=0; iargc; i++) { ((char**) (stack+offset))[i] = (char*) (stack+idx); while(((char*) stack)[idx] != '\0') idx++; idx++; } // push env on the stack offset -= (largs->envc+1) * sizeof(char*); for(i=0; ienvc; i++) { ((char**) (stack+offset))[i] = (char*) (stack+idx); while(((char*) stack)[idx] != '\0') idx++; idx++; } ((char**) (stack+offset))[largs->envc] = NULL; // push pointer to env offset -= sizeof(char**); if (!(largs->envc)) *((char***) (stack+offset)) = NULL; else *((char***) (stack+offset)) = (char**) (stack + offset + sizeof(char**)); // push pointer to argv offset -= sizeof(char**); *((char***) (stack+offset)) = (char**) (stack + offset + 2*sizeof(char**) + (largs->envc+1) * sizeof(char*)); // push argc on the stack offset -= sizeof(int); *((int*) (stack+offset)) = largs->argc; kfree(largs, sizeof(load_args_t)); // clear fpu state per_core(current_task)->flags &= ~(TASK_FPU_USED|TASK_FPU_INIT); jump_to_user_code(header.entry, stack+offset); return 0; invalid: kprintf("Invalid executable!\n"); return -EINVAL; } /** @brief This call is used to adapt create_task calls * which want to have a start function and argument list */ static int STDCALL user_entry(void* arg) { return load_task((load_args_t*) arg); } /** @brief Luxus-edition of create_user_task functions. Just call with an exe name * * @param id Pointer to the tid_t structure which shall be filles * @param fname Executable's path and filename * @param argv Arguments list * @return * - 0 on success * - -ENOMEM (-12) or -EINVAL (-22) on failure */ int create_user_task(tid_t* id, const char* fname, char** argv) { vfs_node_t* node; int argc = 0; size_t i, buffer_size = 0; load_args_t* load_args = NULL; char *dest, *src; node = findnode_fs((char*) fname); if (!node || !(node->type == FS_FILE)) return -EINVAL; // determine buffer size of argv if (argv) { while (argv[argc]) { buffer_size += (strlen(argv[argc]) + 1); argc++; } } if (argc <= 0) return -EINVAL; if (buffer_size >= MAX_ARGS) return -EINVAL; load_args = kmalloc(sizeof(load_args_t)); if (BUILTIN_EXPECT(!load_args, 0)) return -ENOMEM; load_args->node = node; load_args->argc = argc; load_args->envc = 0; dest = load_args->buffer; for (i=0; itype == FS_FILE)) return -EINVAL; // determine total buffer size of argv and env if (argv) { while (argv[argc]) { buffer_size += (strlen(argv[argc]) + 1); argc++; } } if (env) { while (env[envc]) { buffer_size += (strlen(env[envc]) + 1); envc++; } } if (argc <= 0) return -EINVAL; if (buffer_size >= MAX_ARGS) return -EINVAL; load_args = kmalloc(sizeof(load_args_t)); if (BUILTIN_EXPECT(!load_args, 0)) return -ENOMEM; load_args->node = node; load_args->argc = argc; load_args->envc = envc; dest = load_args->buffer; for (i=0; ivma_lock)); // remove old program while((tmp = per_core(current_task)->vma_list) != NULL) { kfree((void*) tmp->start, tmp->end - tmp->start + 1); per_core(current_task)->vma_list = tmp->next; kfree((void*) tmp, sizeof(vma_t)); } spinlock_unlock(&(per_core(current_task)->vma_lock)); /* * we use a trap gate to enter the kernel * => eflags are not changed * => interrupts are enabled * => we could directly load the new task */ ret = load_task(load_args); kfree(load_args, sizeof(load_args_t)); return ret; } /** @brief Called by tasks which are waiting for another task's * return value. */ tid_t wait(int32_t* result) { wait_msg_t tmp = { -1, -1}; /* * idle tasks are not allowed to wait for another task * they should always run... */ if (BUILTIN_EXPECT(per_core(current_task)->status == TASK_IDLE, 0)) return -EINVAL; mailbox_wait_msg_fetch(&per_core(current_task)->inbox, &tmp); if (result) *result = tmp.result; return tmp.id; } /** @brief Wakeup a blocked task * @param id The task's tid_t structure * @return * - 0 on success * - -EINVAL (-22) on failure */ int wakeup_task(tid_t id) { int ret = -EINVAL; /* avoid nested locking */ spinlock_irqsave_lock(&table_lock); if (task_table[id].status != TASK_BLOCKED) { kprintf("Task %d is not blocked!\n", id); } else { task_table[id].status = TASK_READY; ret = 0; } spinlock_irqsave_unlock(&table_lock); return ret; } /** @brief Block a running or ready task. * @param id The task's tid_t structure * @return * - 0 on success * - -EINVAL (-22) on failure */ int block_task(tid_t id) { int ret = -EINVAL; spinlock_irqsave_lock(&table_lock); if ((task_table[id].status == TASK_RUNNING) || (task_table[id].status == TASK_READY)) { task_table[id].status = TASK_BLOCKED; ret = 0; } else kprintf("Unable to block task %d!\n", id); spinlock_irqsave_unlock(&table_lock); return ret; } /** @brief _The_ scheduler procedure * * Manages scheduling - right now this is just a round robin scheduler. */ void scheduler(void) { unsigned int i; unsigned int new_id; #if MAX_CORES > 1 spinlock_irqsave_lock(&table_lock); #endif /* signalize that this task could be reused */ if (per_core(current_task)->status == TASK_FINISHED) per_core(current_task)->status = TASK_INVALID; /* if the task is using the FPU, we need to save the FPU context */ if (per_core(current_task)->flags & TASK_FPU_USED) { save_fpu_state(&(per_core(current_task)->fpu)); per_core(current_task)->flags &= ~TASK_FPU_USED; } for(i=1, new_id=(per_core(current_task)->id + 1) % MAX_TASKS; istatus == TASK_RUNNING) per_core(current_task)->status = TASK_READY; task_table[new_id].status = TASK_RUNNING; per_core(current_task) = task_table+new_id; goto get_task_out; } } if ((per_core(current_task)->status == TASK_RUNNING) || (per_core(current_task)->status == TASK_IDLE)) goto get_task_out; /* * we switch to the idle task, if the current task terminates * and no other is ready */ for(i=0; i 1 spinlock_irqsave_unlock(&table_lock); #else return; #endif }