metalsvm/kernel/tasks.c
Stefan Lankes 7e0179f5f7 first version to support the FPU
- seems to work
- currently, we didn't support SIMD instructions
2011-04-20 15:16:22 +02:00

741 lines
18 KiB
C

/*
* Copyright 2010 Stefan Lankes, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Stefan Lankes
* @file kernel/tasks.c
* @brief Implementations of task loading, killing, scheduling.
*
* This files contains all the implementations of different functions
* to start tasks with, wake them up, schedule them, etc.
*/
#include <metalsvm/stdio.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/string.h>
#include <metalsvm/errno.h>
#include <metalsvm/mmu.h>
#include <metalsvm/page.h>
#include <metalsvm/tasks.h>
#include <metalsvm/processor.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/mailbox.h>
#include <metalsvm/syscall.h>
#include <metalsvm/fs.h>
#include <asm/apic.h>
#include <asm/elf.h>
DEFINE_PER_CORE(task_t*, current_task, NULL);
/** @brief Array of task structures
*
* A task's id will be its position in this array.
*/
static task_t task_table[MAX_TASKS] = {[0 ... MAX_TASKS-1] = {0, TASK_INVALID, ATOMIC_INIT(0), \
SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0}};
static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT;
/** @brief helper function for the assembly code to determine the current task
* @return Pointer to the task_t structure of current task
*/
task_t* get_current_task(void) {
return per_core(current_task);
}
int multitasking_init(void) {
if (task_table[0].status == TASK_INVALID) {
task_table[0].id = 0;
task_table[0].status = TASK_RUNNING;
atomic_int32_set(&task_table[0].user_usage, 0);
mailbox_wait_msg_init(&task_table[0].inbox);
memset(task_table[0].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
per_core(current_task) = task_table+0;
per_core(current_task)->pgd = get_boot_pgd();
task_table[0].fpu_used = 0;
return 0;
}
return -ENOMEM;
}
/** @brief Wakeup tasks which are waiting for a message from the current one
*
* @param result Current task's resulting return value
*/
static void wakeup_blocked_tasks(int result)
{
wait_msg_t tmp = { per_core(current_task)->id, result };
unsigned int i;
spinlock_irqsave_lock(&table_lock);
/* wake up blocked tasks */
for(i=0; i<MAX_TASKS; i++) {
if (per_core(current_task)->outbox[i]) {
mailbox_wait_msg_post(per_core(current_task)->outbox[i], tmp);
per_core(current_task)->outbox[i] = NULL;
}
}
spinlock_irqsave_unlock(&table_lock);
}
/** @brief A procedure to be called by
* procedures which are called by exiting tasks. */
static void NORETURN do_exit(int arg) {
vma_t* tmp;
kprintf("Terminate task: %u, return value %d\n", per_core(current_task)->id, arg);
wakeup_blocked_tasks(arg);
//vma_dump(per_core(current_task));
spinlock_lock(&(per_core(current_task)->vma_lock));
// remove memory regions
while((tmp = per_core(current_task)->vma_list) != NULL) {
kfree((void*) tmp->start, tmp->end - tmp->start + 1);
per_core(current_task)->vma_list = tmp->next;
kfree((void*) tmp, sizeof(vma_t));
}
spinlock_unlock(&(per_core(current_task)->vma_lock));
drop_pgd(); // delete page directory and its page tables
if (atomic_int32_read(&per_core(current_task)->user_usage))
kprintf("Memory leak! Task %d did not release %d pages\n",
per_core(current_task)->id, atomic_int32_read(&per_core(current_task)->user_usage));
per_core(current_task)->status = TASK_FINISHED;
reschedule();
kputs("Kernel panic: scheduler found no valid task\n");
while(1) {
HALT;
}
}
/** @brief A procedure to be called by kernel tasks */
void NORETURN leave_kernel_task(void) {
int result;
result = get_return_value();
do_exit(result);
}
/** @brief To be called by the systemcall to exit tasks */
void NORETURN sys_exit(int arg) {
do_exit(arg);
}
/** @brief Aborting a task is like exiting it with result -1 */
void NORETURN abort(void) {
do_exit(-1);
}
/** @brief Create a task with a specific entry point
*
* @param id Pointer to a tid_t struct were the id shall be set
* @param ep Pointer to the function the task shall start with
* @param arg Arguments list
* @return
* - 0 on success
* - -ENOMEM (-12) or -EINVAL (-22) on failure
*/
static int create_task(tid_t* id, entry_point_t ep, void* arg)
{
int ret = -ENOMEM;
unsigned int i;
if (BUILTIN_EXPECT(!ep, 0))
return -EINVAL;
spinlock_irqsave_lock(&table_lock);
for(i=0; i<MAX_TASKS; i++) {
if (task_table[i].status == TASK_INVALID) {
atomic_int32_set(&task_table[i].user_usage, 0);
ret = create_pgd(task_table+i, 0);
if (ret < 0) {
ret = -ENOMEM;
goto create_task_out;
}
task_table[i].id = i;
spinlock_init(&task_table[i].vma_lock);
task_table[i].vma_list = NULL;
mailbox_wait_msg_init(&task_table[i].inbox);
memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[i].outbox[per_core(current_task)->id] = &per_core(current_task)->inbox;
if (id)
*id = i;
ret = create_default_frame(task_table+i, ep, arg);
task_table[i].fpu_used = 0;
task_table[i].status = TASK_READY;
break;
}
}
create_task_out:
spinlock_irqsave_unlock(&table_lock);
return ret;
}
int sys_fork(void)
{
int ret = -ENOMEM;
unsigned int i;
task_t* parent_task = per_core(current_task);
vma_t** child;
vma_t* parent;
vma_t* tmp;
spinlock_lock(&per_core(current_task)->vma_lock);
spinlock_irqsave_lock(&table_lock);
for(i=0; i<MAX_TASKS; i++) {
if (task_table[i].status == TASK_INVALID) {
atomic_int32_set(&task_table[i].user_usage, 0);
ret = create_pgd(task_table+i, 1);
if (ret < 0) {
ret = -ENOMEM;
goto create_task_out;
}
task_table[i].id = i;
spinlock_init(&task_table[i].vma_lock);
// copy VMA list
child = &task_table[i].vma_list;
parent = per_core(current_task)->vma_list;
tmp = NULL;
while(parent) {
*child = (vma_t*) kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!child, 0))
break;
(*child)->start = parent->start;
(*child)->end = parent->end;
(*child)->type = parent->type;
(*child)->prev = tmp;
(*child)->next = NULL;
parent = parent->next;
tmp = *child;
child = &((*child)->next);
}
mailbox_wait_msg_init(&task_table[i].inbox);
memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[i].outbox[per_core(current_task)->id] = &per_core(current_task)->inbox;
task_table[i].fpu_used = 0x00;
ret = arch_fork(task_table+i);
if (parent_task != per_core(current_task)) {
// Oh, the current task is the new child task!
// Leave the function without releasing the locks
// because the locks are already released
// by the parent task!
return 0;
}
if (!ret) {
task_table[i].status = TASK_READY;
ret = i;
}
break;
}
}
create_task_out:
spinlock_irqsave_unlock(&table_lock);
spinlock_unlock(&per_core(current_task)->vma_lock);
return ret;
}
int create_kernel_task(tid_t* id, entry_point_t ep, void* arg)
{
return create_task(id, ep, arg);
}
#define MAX_ARGS (PAGE_SIZE - 2*sizeof(int) - sizeof(vfs_node_t*))
/** @brief Structure which keeps all
* relevant data for a new task to start */
typedef struct {
/// Points to the node with the executable in the file system
vfs_node_t* node;
/// Argument count
int argc;
/// Environment var count
int envc;
/// Buffer for env and argv values
char buffer[MAX_ARGS];
} load_args_t;
/** @brief Internally used function to load tasks with a load_args_t structure
* keeping all the information needed to launch.
*
* This is where the serious loading action is done.
*/
static int load_task(load_args_t* largs)
{
uint32_t i, offset, idx;
uint32_t addr, npages, flags, stack = 0;
elf_header_t header;
elf_program_header_t prog_header;
//elf_section_header_t sec_header;
vfs_node_t* node;
if (!largs)
return -EINVAL;
node = largs->node;
if (!node)
return -EINVAL;
read_fs(node, (uint8_t*)&header, sizeof(elf_header_t), 0);
if (BUILTIN_EXPECT(header.ident.magic != ELF_MAGIC, 0))
goto invalid;
if (BUILTIN_EXPECT(header.type != ELF_ET_EXEC, 0))
goto invalid;
if (BUILTIN_EXPECT(header.machine != ELF_EM_386, 0))
goto invalid;
if (BUILTIN_EXPECT(header.ident._class != ELF_CLASS_32, 0))
goto invalid;
if (BUILTIN_EXPECT(header.ident.data != ELF_DATA_2LSB, 0))
goto invalid;
if (header.entry <= KERNEL_SPACE)
goto invalid;
// interpret program header table
for (i=0; i<header.ph_entry_count; i++) {
if (read_fs(node, (uint8_t*)&prog_header, sizeof(elf_program_header_t), header.ph_offset+i*header.ph_entry_size) == 0) {
kprintf("Could not read programm header!\n");
continue;
}
switch(prog_header.type)
{
case ELF_PT_LOAD: // load program segment
if (!prog_header.virt_addr)
continue;
npages = (prog_header.mem_size / PAGE_SIZE);
if (prog_header.mem_size % PAGE_SIZE)
npages++;
addr = get_pages(npages);
flags = MAP_USER_SPACE;
if (prog_header.flags & PF_X)
flags |= MAP_CODE;
// map page frames in the address space of the current task
if (!map_region(prog_header.virt_addr, addr, npages, flags))
kprintf("Could not map 0x%x at 0x%x\n", addr, prog_header.virt_addr);
// clear pages
memset((void*) prog_header.virt_addr, 0, npages*PAGE_SIZE);
// load program
read_fs(node, (uint8_t*)prog_header.virt_addr, prog_header.file_size, prog_header.offset);
flags = VMA_CACHEABLE;
if (prog_header.flags & PF_R)
flags |= VMA_READ;
if (prog_header.flags & PF_W)
flags |= VMA_WRITE;
if (prog_header.flags & PF_X)
flags |= VMA_EXECUTE;
vma_add(per_core(current_task), prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE-1, flags);
if (!(prog_header.flags & PF_W))
change_page_permissions(prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE-1, flags);
break;
case ELF_PT_GNU_STACK: // Indicates stack executability
// create user-level stack
npages = DEFAULT_STACK_SIZE / PAGE_SIZE;
if (DEFAULT_STACK_SIZE % PAGE_SIZE)
npages++;
addr = get_pages(npages);
stack = header.entry*2; // virtual address of the stack
if (!map_region(stack, addr, npages, MAP_USER_SPACE)) {
kprintf("Could not map stack at 0x%x\n", stack);
return -ENOMEM;
}
memset((void*) stack, 0, npages*PAGE_SIZE);
// create vma regions for the user-level stack
flags = VMA_CACHEABLE;
if (prog_header.flags & PF_R)
flags |= VMA_READ;
if (prog_header.flags & PF_W)
flags |= VMA_WRITE;
if (prog_header.flags & PF_X)
flags |= VMA_EXECUTE;
vma_add(per_core(current_task), stack, stack+npages*PAGE_SIZE-1, flags);
break;
}
}
#if 0
// interpret section header table
for (i=0; i<header.sh_entry_count; i++) {
if (read_fs(node, (uint8_t*)&sec_header, sizeof(elf_section_header_t), header.sh_offset+i*header.sh_entry_size) == 0) {
kprintf("Could not read section header!\n");
continue;
}
// TODO: interpret section header
}
#endif
if (BUILTIN_EXPECT(!stack, 0)) {
kprintf("Stack is missing!\n");
return -ENOMEM;
}
// push strings on the stack
offset = DEFAULT_STACK_SIZE-8;
memset((void*) (stack+offset), 0, 4);
offset -= MAX_ARGS;
memcpy((void*) (stack+offset), largs->buffer, MAX_ARGS);
idx = offset;
// push argv on the stack
offset -= largs->argc * sizeof(char*);
for(i=0; i<largs->argc; i++) {
((char**) (stack+offset))[i] = (char*) (stack+idx);
while(((char*) stack)[idx] != '\0')
idx++;
idx++;
}
// push env on the stack
offset -= (largs->envc+1) * sizeof(char*);
for(i=0; i<largs->envc; i++) {
((char**) (stack+offset))[i] = (char*) (stack+idx);
while(((char*) stack)[idx] != '\0')
idx++;
idx++;
}
((char**) (stack+offset))[largs->envc] = NULL;
// push pointer to env
offset -= sizeof(char**);
if (!(largs->envc))
*((char***) (stack+offset)) = NULL;
else
*((char***) (stack+offset)) = (char**) (stack + offset + sizeof(char**));
// push pointer to argv
offset -= sizeof(char**);
*((char***) (stack+offset)) = (char**) (stack + offset + 2*sizeof(char**) + (largs->envc+1) * sizeof(char*));
// push argc on the stack
offset -= sizeof(int);
*((int*) (stack+offset)) = largs->argc;
kfree(largs, sizeof(load_args_t));
jump_to_user_code(header.entry, stack+offset);
return 0;
invalid:
kprintf("Invalid executable!\n");
return -EINVAL;
}
/** @brief This call is used to adapt create_task calls
* which want to have a start function and argument list */
static int STDCALL user_entry(void* arg)
{
return load_task((load_args_t*) arg);
}
/** @brief Luxus-edition of create_user_task functions. Just call with an exe name
*
* @param id Pointer to the tid_t structure which shall be filles
* @param fname Executable's path and filename
* @param argv Arguments list
* @return
* - 0 on success
* - -ENOMEM (-12) or -EINVAL (-22) on failure
*/
int create_user_task(tid_t* id, const char* fname, char** argv)
{
vfs_node_t* node;
int argc = 0;
size_t i, buffer_size = 0;
load_args_t* load_args = NULL;
char *dest, *src;
node = findnode_fs((char*) fname);
if (!node || !(node->type == FS_FILE))
return -EINVAL;
// determine buffer size of argv
if (argv) {
while (argv[argc]) {
buffer_size += (strlen(argv[argc]) + 1);
argc++;
}
}
if (argc <= 0)
return -EINVAL;
if (buffer_size >= MAX_ARGS)
return -EINVAL;
load_args = kmalloc(sizeof(load_args_t));
if (BUILTIN_EXPECT(!load_args, 0))
return -ENOMEM;
load_args->node = node;
load_args->argc = argc;
load_args->envc = 0;
dest = load_args->buffer;
for (i=0; i<argc; i++) {
src = argv[i];
while ((*dest++ = *src++) != 0);
}
return create_task(id, user_entry, load_args);
}
/** @brief Used by the execve-Systemcall */
int sys_execve(const char* fname, char** argv, char** env)
{
vfs_node_t* node;
vma_t* tmp;
size_t i, buffer_size = 0;
load_args_t* load_args = NULL;
char *dest, *src;
int ret, argc = 0;
int envc = 0;
node = findnode_fs((char*) fname);
if (!node || !(node->type == FS_FILE))
return -EINVAL;
// determine total buffer size of argv and env
if (argv) {
while (argv[argc]) {
buffer_size += (strlen(argv[argc]) + 1);
argc++;
}
}
if (env) {
while (env[envc]) {
buffer_size += (strlen(env[envc]) + 1);
envc++;
}
}
if (argc <= 0)
return -EINVAL;
if (buffer_size >= MAX_ARGS)
return -EINVAL;
load_args = kmalloc(sizeof(load_args_t));
if (BUILTIN_EXPECT(!load_args, 0))
return -ENOMEM;
load_args->node = node;
load_args->argc = argc;
load_args->envc = envc;
dest = load_args->buffer;
for (i=0; i<argc; i++) {
src = argv[i];
while ((*dest++ = *src++) != 0);
}
for (i=0; i<envc; i++) {
src = env[i];
while ((*dest++ = *src++) != 0);
}
spinlock_lock(&(per_core(current_task)->vma_lock));
// remove old program
while((tmp = per_core(current_task)->vma_list) != NULL) {
kfree((void*) tmp->start, tmp->end - tmp->start + 1);
per_core(current_task)->vma_list = tmp->next;
kfree((void*) tmp, sizeof(vma_t));
}
spinlock_unlock(&(per_core(current_task)->vma_lock));
/*
* we use a trap gate to enter the kernel
* => eflags are not changed
* => interrupts are enabled
* => we could directly load the new task
*/
ret = load_task(load_args);
kfree(load_args, sizeof(load_args_t));
return ret;
}
/** @brief Called by tasks which are waiting for another task's
* return value. */
tid_t wait(int32_t* result)
{
wait_msg_t tmp = { -1, -1};
/*
* idle tasks are not allowed to wait for another task
* they should always run...
*/
if (BUILTIN_EXPECT(per_core(current_task)->status == TASK_IDLE, 0))
return -EINVAL;
mailbox_wait_msg_fetch(&per_core(current_task)->inbox, &tmp);
if (result)
*result = tmp.result;
return tmp.id;
}
/** @brief Wakeup a blocked task
* @param id The task's tid_t structure
* @return
* - 0 on success
* - -EINVAL (-22) on failure
*/
int wakeup_task(tid_t id)
{
int ret = -EINVAL;
/* avoid nested locking */
spinlock_irqsave_lock(&table_lock);
if (task_table[id].status != TASK_BLOCKED) {
kprintf("Task %d is not blocked!\n", id);
} else {
task_table[id].status = TASK_READY;
ret = 0;
}
spinlock_irqsave_unlock(&table_lock);
return ret;
}
/** @brief Block a running or ready task.
* @param id The task's tid_t structure
* @return
* - 0 on success
* - -EINVAL (-22) on failure
*/
int block_task(tid_t id)
{
int ret = -EINVAL;
spinlock_irqsave_lock(&table_lock);
if ((task_table[id].status == TASK_RUNNING) || (task_table[id].status == TASK_READY)) {
task_table[id].status = TASK_BLOCKED;
ret = 0;
} else kprintf("Unable to block task %d!\n", id);
spinlock_irqsave_unlock(&table_lock);
return ret;
}
/** @brief _The_ scheduler procedure
*
* Manages scheduling - right now this is just a round robin scheduler.
*/
void scheduler(void)
{
unsigned int i;
unsigned int new_id;
#if MAX_CORES > 1
spinlock_irqsave_lock(&table_lock);
#endif
/* signalize that this task could be reused */
if (per_core(current_task)->status == TASK_FINISHED)
per_core(current_task)->status = TASK_INVALID;
for(i=1, new_id=(per_core(current_task)->id + 1) % MAX_TASKS;
i<MAX_TASKS; i++, new_id=(new_id+1) % MAX_TASKS)
{
if (task_table[new_id].status == TASK_READY) {
if (per_core(current_task)->status == TASK_RUNNING)
per_core(current_task)->status = TASK_READY;
task_table[new_id].status = TASK_RUNNING;
if (per_core(current_task)->fpu_used)
save_fpu_state(&(per_core(current_task)->fpu));
per_core(current_task) = task_table+new_id;
goto get_task_out;
}
}
if ((per_core(current_task)->status == TASK_RUNNING) || (per_core(current_task)->status == TASK_IDLE))
goto get_task_out;
/*
* we switch to the idle task, if the current task terminates
* and no other is ready
*/
for(i=0; i<MAX_TASKS; i++) {
if (task_table[i].status == TASK_IDLE) {
per_core(current_task) = task_table+i;
goto get_task_out;
}
}
get_task_out:
#if MAX_CORES > 1
spinlock_irqsave_unlock(&table_lock);
#else
return;
#endif
}