metalsvm/kernel/tasks.c
2012-10-04 17:20:38 +02:00

1475 lines
41 KiB
C

/*
* Copyright 2010 Stefan Lankes, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Stefan Lankes
* @file kernel/tasks.c
* @brief Implementations of task loading, killing, scheduling.
*
* This files contains all the implementations of different functions
* to start tasks with, wake them up, schedule them, etc.
*/
#include <metalsvm/stdio.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/string.h>
#include <metalsvm/errno.h>
#include <metalsvm/mmu.h>
#include <metalsvm/page.h>
#include <metalsvm/tasks.h>
#include <metalsvm/processor.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/mailbox.h>
#include <metalsvm/syscall.h>
#include <metalsvm/fs.h>
#include <metalsvm/time.h>
#include <asm/apic.h>
#include <asm/elf.h>
/** @brief Array of task structures
*
* A task's id will be its position in this array.
*/
static task_t task_table[MAX_TASKS] = { \
[0] = {0, TASK_IDLE, NULL, NULL, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_IRQSAVE_INIT, NULL, SPINLOCK_INIT, NULL, NULL, 0, 0, 0, 0}, \
[1 ... MAX_TASKS-1] = {0, TASK_INVALID, NULL, NULL, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_IRQSAVE_INIT, NULL, SPINLOCK_INIT, NULL, NULL, 0, 0, 0, 0}};
static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT;
#ifndef CONFIG_TICKLESS
#if MAX_CORES > 1
static runqueue_t runqueues[MAX_CORES] = { \
[0] = {task_table+0, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}, \
[1 ... MAX_CORES-1] = {NULL, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
#else
static runqueue_t runqueues[1] = { \
[0] = {task_table+0, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
#endif
#else
#if MAX_CORES > 1
static runqueue_t runqueues[MAX_CORES] = { \
[0] = {task_table+0, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}, \
[1 ... MAX_CORES-1] = {NULL, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
#else
static runqueue_t runqueues[1] = { \
[0] = {task_table+0, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
#endif
#endif
#if MAX_CORES > 1
extern atomic_int32_t cpu_online;
#endif
DEFINE_PER_CORE(task_t*, current_task, task_table+0);
extern const void boot_stack;
/** @brief helper function for the assembly code to determine the current task
* @return Pointer to the task_t structure of current task
*/
task_t* get_current_task(void) {
return per_core(current_task);
}
void check_scheduling(void) {
if (!is_irq_enabled())
return;
if (msb(runqueues[CORE_ID].prio_bitmap) > per_core(current_task)->prio)
reschedule();
}
uint32_t get_highest_priority(void)
{
return msb(runqueues[CORE_ID].prio_bitmap);
}
int multitasking_init(void) {
if (BUILTIN_EXPECT(task_table[0].status != TASK_IDLE, 0)) {
kputs("Task 0 is not an idle task\n");
return -ENOMEM;
}
mailbox_wait_msg_init(&task_table[0].inbox);
memset(task_table[0].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[0].pgd = get_boot_pgd();
task_table[0].flags = TASK_DEFAULT_FLAGS;
task_table[0].prio = IDLE_PRIO;
task_table[0].stack = (void*) &boot_stack;
return 0;
}
size_t get_idle_task(uint32_t id)
{
#if MAX_CORES > 1
if (BUILTIN_EXPECT((id >= MAX_TASKS) || (task_table[id].status != TASK_INVALID), 0))
return -EINVAL;
task_table[id].id = id;
task_table[id].last_stack_pointer = NULL;
task_table[id].stack = (void*) ((size_t)&boot_stack + id * KERNEL_STACK_SIZE);
task_table[id].status = TASK_IDLE;
task_table[id].prio = IDLE_PRIO;
task_table[id].flags = TASK_DEFAULT_FLAGS;
task_table[id].last_core = id;
atomic_int32_set(&task_table[id].user_usage, 0);
mailbox_wait_msg_init(&task_table[id].inbox);
memset(task_table[id].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[id].pgd = get_boot_pgd();
current_task[id].var = task_table+id;
runqueues[id].idle = task_table+id;
return (size_t) task_table[id].stack + KERNEL_STACK_SIZE - 16;
#else
return -EINVAL;
#endif
}
void finish_task_switch(void)
{
uint8_t prio;
uint32_t core_id = CORE_ID;
task_t* old;
spinlock_irqsave_lock(&runqueues[core_id].lock);
if ((old = runqueues[core_id].old_task) != NULL) {
if (old->status == TASK_INVALID) {
destroy_stack(old);
old->stack = NULL;
old->last_stack_pointer = NULL;
runqueues[core_id].old_task = NULL;
} else {
prio = old->prio;
if (!runqueues[core_id].queue[prio-1].first) {
old->next = old->prev = NULL;
runqueues[core_id].queue[prio-1].first = runqueues[core_id].queue[prio-1].last = old;
} else {
old->next = NULL;
old->prev = runqueues[core_id].queue[prio-1].last;
runqueues[core_id].queue[prio-1].last->next = old;
runqueues[core_id].queue[prio-1].last = old;
}
runqueues[core_id].old_task = NULL;
runqueues[core_id].prio_bitmap |= (1 << prio);
}
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
}
/** @brief Wakeup tasks which are waiting for a message from the current one
*
* @param result Current task's resulting return value
*/
static void wakeup_blocked_tasks(int result)
{
task_t* curr_task = per_core(current_task);
wait_msg_t tmp = { curr_task->id, result };
unsigned int i;
spinlock_irqsave_lock(&table_lock);
/* wake up blocked tasks */
for(i=0; i<MAX_TASKS; i++) {
if (curr_task->outbox[i]) {
//kprintf("Wake up blocked task %d\n", i);
mailbox_wait_msg_post(curr_task->outbox[i], tmp);
curr_task->outbox[i] = NULL;
}
}
spinlock_irqsave_unlock(&table_lock);
}
/** @brief A procedure to be called by
* procedures which are called by exiting tasks. */
static void NORETURN do_exit(int arg) {
vma_t* tmp;
task_t* curr_task = per_core(current_task);
uint32_t flags, core_id, fd, status;
if(curr_task->fildes_table) {
for (fd = 0; fd < NR_OPEN; fd++) {
if(curr_task->fildes_table[fd] != NULL) {
/*
* delete a descriptor from the per-process object
* reference table. If this is not the last reference to the underlying
* object, the object will be ignored.
*/
if (curr_task->fildes_table[fd]->count == 1) {
/* try to close the file */
status = close_fs(curr_task->fildes_table[fd]);
/* close command failed -> return check = errno */
if (BUILTIN_EXPECT(status < 0, 0))
kprintf("Task %u was not able to close file descriptor %i. close_fs returned %d", curr_task->id, fd, -status);
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
curr_task->fildes_table[fd] = NULL;
} else {
curr_task->fildes_table[fd]->count--;
curr_task->fildes_table[fd] = NULL;
}
}
}
//finally the table has to be cleared.
kfree(curr_task->fildes_table, sizeof(filp_t)*NR_OPEN);
}
kprintf("Terminate task: %u, return value %d\n", curr_task->id, arg);
wakeup_blocked_tasks(arg);
//vma_dump(curr_task);
spinlock_lock(&curr_task->vma_lock);
// remove memory regions
while((tmp = curr_task->vma_list) != NULL) {
kfree((void*) tmp->start, tmp->end - tmp->start + 1);
curr_task->vma_list = tmp->next;
kfree((void*) tmp, sizeof(vma_t));
}
spinlock_unlock(&curr_task->vma_lock);
drop_pgd(); // delete page directory and its page tables
#if 0
if (atomic_int32_read(&curr_task->user_usage))
kprintf("Memory leak! Task %d did not release %d pages\n",
curr_task->id, atomic_int32_read(&curr_task->user_usage));
#endif
curr_task->status = TASK_FINISHED;
// decrease the number of active tasks
flags = irq_nested_disable();
core_id = CORE_ID;
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].nr_tasks--;
spinlock_irqsave_unlock(&runqueues[core_id].lock);
irq_nested_enable(flags);
reschedule();
kprintf("Kernel panic: scheduler on core %d found no valid task\n", CORE_ID);
while(1) {
HALT;
}
}
/** @brief A procedure to be called by kernel tasks */
void NORETURN leave_kernel_task(void) {
int result;
result = get_return_value();
do_exit(result);
}
/** @brief To be called by the systemcall to exit tasks */
void NORETURN sys_exit(int arg) {
do_exit(arg);
}
/** @brief Aborting a task is like exiting it with result -1 */
void NORETURN abort(void) {
do_exit(-1);
}
/** @brief Create a task with a specific entry point
*
* @param id Pointer to a tid_t struct were the id shall be set
* @param ep Pointer to the function the task shall start with
* @param arg Arguments list
* @param prio Desired priority of the new task
* @param core_id Start the new task on the core with this id
*
* @return
* - 0 on success
* - -ENOMEM (-12) or -EINVAL (-22) on failure
*/
static int create_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio, uint32_t core_id)
{
task_t* curr_task;
int ret = -ENOMEM;
uint32_t i;
if (BUILTIN_EXPECT(!ep, 0))
return -EINVAL;
if (BUILTIN_EXPECT(prio == IDLE_PRIO, 0))
return -EINVAL;
if (BUILTIN_EXPECT(prio > MAX_PRIO, 0))
return -EINVAL;
spinlock_irqsave_lock(&table_lock);
#if MAX_CORES > 1
if (core_id >= atomic_int32_read(&cpu_online))
#else
if (core_id > 0)
#endif
{
core_id = CORE_ID;
kprintf("Inavlid core id! Set id to %u!\n", core_id);
}
curr_task = per_core(current_task);
for(i=0; i<MAX_TASKS; i++) {
if (task_table[i].status == TASK_INVALID) {
atomic_int32_set(&task_table[i].user_usage, 0);
ret = create_pgd(task_table+i, 0);
if (ret < 0) {
ret = -ENOMEM;
goto create_task_out;
}
task_table[i].id = i;
task_table[i].status = TASK_READY;
task_table[i].last_stack_pointer = NULL;
task_table[i].stack = create_stack();
task_table[i].flags = TASK_DEFAULT_FLAGS;
task_table[i].prio = prio;
task_table[i].last_core = 0;
spinlock_init(&task_table[i].vma_lock);
task_table[i].vma_list = NULL;
task_table[i].fildes_table = NULL;
mailbox_wait_msg_init(&task_table[i].inbox);
memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[i].outbox[curr_task->id] = &curr_task->inbox;
if (id)
*id = i;
ret = create_default_frame(task_table+i, ep, arg);
task_table[i].start_heap = 0;
task_table[i].end_heap = 0;
task_table[i].lwip_err = 0;
task_table[i].start_tick = get_clock_tick();
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[prio-1].first) {
task_table[i].next = task_table[i].prev = NULL;
runqueues[core_id].queue[prio-1].first = task_table+i;
runqueues[core_id].queue[prio-1].last = task_table+i;
} else {
task_table[i].prev = runqueues[core_id].queue[prio-1].last;
task_table[i].next = NULL;
runqueues[core_id].queue[prio-1].last->next = task_table+i;
runqueues[core_id].queue[prio-1].last = task_table+i;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
break;
}
}
create_task_out:
spinlock_irqsave_unlock(&table_lock);
return ret;
}
int sys_fork(void)
{
int ret = -ENOMEM;
unsigned int i, core_id, fd_i;
task_t* parent_task = per_core(current_task);
vma_t** child;
vma_t* parent;
vma_t* tmp;
spinlock_lock(&parent_task->vma_lock);
spinlock_irqsave_lock(&table_lock);
core_id = CORE_ID;
for(i=0; i<MAX_TASKS; i++) {
if (task_table[i].status == TASK_INVALID) {
atomic_int32_set(&task_table[i].user_usage, 0);
ret = create_pgd(task_table+i, 1);
if (ret < 0) {
ret = -ENOMEM;
goto create_task_out;
}
task_table[i].id = i;
task_table[i].last_stack_pointer = NULL;
task_table[i].stack = create_stack();
spinlock_init(&task_table[i].vma_lock);
// copy VMA list
child = &task_table[i].vma_list;
parent = parent_task->vma_list;
tmp = NULL;
while(parent) {
*child = (vma_t*) kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!child, 0))
break;
(*child)->start = parent->start;
(*child)->end = parent->end;
(*child)->type = parent->type;
(*child)->prev = tmp;
(*child)->next = NULL;
parent = parent->next;
tmp = *child;
child = &((*child)->next);
}
/* init fildes_table */
task_table[i].fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
memcpy(task_table[i].fildes_table, parent_task->fildes_table, sizeof(filp_t)*NR_OPEN);
for (fd_i = 0; fd_i < NR_OPEN; fd_i++)
if ((task_table[i].fildes_table[fd_i]) != NULL)
task_table[i].fildes_table[fd_i]->count++;
mailbox_wait_msg_init(&task_table[i].inbox);
memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[i].outbox[parent_task->id] = &parent_task->inbox;
task_table[i].flags = parent_task->flags;
memcpy(&(task_table[i].fpu), &(parent_task->fpu), sizeof(union fpu_state));
task_table[i].start_tick = get_clock_tick();
task_table[i].start_heap = 0;
task_table[i].end_heap = 0;
task_table[i].lwip_err = 0;
task_table[i].prio = parent_task->prio;
task_table[i].last_core = parent_task->last_core;
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << parent_task->prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[parent_task->prio-1].first) {
task_table[i].next = task_table[i].prev = NULL;
runqueues[core_id].queue[parent_task->prio-1].first = task_table+i;
runqueues[core_id].queue[parent_task->prio-1].last = task_table+i;
} else {
task_table[i].prev = runqueues[core_id].queue[parent_task->prio-1].last;
task_table[i].next = NULL;
runqueues[core_id].queue[parent_task->prio-1].last->next = task_table+i;
runqueues[core_id].queue[parent_task->prio-1].last = task_table+i;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
ret = arch_fork(task_table+i);
if (parent_task != per_core(current_task)) {
// Oh, the current task is the new child task!
// Leave the function without releasing the locks
// because the locks are already released
// by the parent task!
return 0;
}
if (!ret) {
task_table[i].status = TASK_READY;
ret = i;
}
break;
}
}
create_task_out:
spinlock_irqsave_unlock(&table_lock);
spinlock_unlock(&parent_task->vma_lock);
return ret;
}
/** @brief Structure which keeps all
* relevant data for a new kernel task to start */
typedef struct {
/// entry point of the kernel task
entry_point_t func;
/// arguments
void* args;
} kernel_args_t;
/** @brief This call is used to adapt create_task calls
* which want to have a start function and argument list */
static int kernel_entry(void* args)
{
int ret;
kernel_args_t* kernel_args = (kernel_args_t*) args;
if (BUILTIN_EXPECT(!kernel_args, 0))
return -EINVAL;
ret = kernel_args->func(kernel_args->args);
kfree(kernel_args, sizeof(kernel_args_t));
return ret;
}
int create_kernel_task_on_core(tid_t* id, entry_point_t ep, void* args, uint8_t prio, uint32_t core_id)
{
kernel_args_t* kernel_args;
kernel_args = kmalloc(sizeof(kernel_args_t));
if (BUILTIN_EXPECT(!kernel_args, 0))
return -ENOMEM;
kernel_args->func = ep;
kernel_args->args = args;
if (prio > MAX_PRIO)
prio = NORMAL_PRIO;
return create_task(id, kernel_entry, kernel_args, prio, core_id);
}
#define MAX_ARGS (PAGE_SIZE - 2*sizeof(int) - sizeof(vfs_node_t*))
/** @brief Structure which keeps all
* relevant data for a new user task to start */
typedef struct {
/// Points to the node with the executable in the file system
vfs_node_t* node;
/// Argument count
int argc;
/// Environment var count
int envc;
/// Buffer for env and argv values
char buffer[MAX_ARGS];
} load_args_t;
/** @brief Internally used function to load tasks with a load_args_t structure
* keeping all the information needed to launch.
*
* This is where the serious loading action is done.
*/
static int load_task(load_args_t* largs)
{
uint32_t i, offset, idx, fd_i;
uint32_t addr, npages, flags;
size_t stack = 0;
elf_header_t header;
elf_program_header_t prog_header;
//elf_section_header_t sec_header;
///!!! kfree is missing!
fildes_t *file = kmalloc(sizeof(fildes_t));
file->offset = 0;
file->flags = 0;
//TODO: init the hole fildes_t struct!
task_t* curr_task = per_core(current_task);
int err;
if (!largs)
return -EINVAL;
file->node = largs->node;
if (!file->node)
return -EINVAL;
/* init fildes_table */
spinlock_irqsave_lock(&table_lock);
if (!task_table[curr_task->id].fildes_table) {
task_table[curr_task->id].fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
if (BUILTIN_EXPECT(!task_table[curr_task->id].fildes_table, 0)) {
spinlock_irqsave_unlock(&table_lock);
return -ENOMEM;
}
memset(task_table[curr_task->id].fildes_table, 0x00, sizeof(filp_t)*NR_OPEN);
for (fd_i = 0; fd_i < 3; fd_i++) {
task_table[curr_task->id].fildes_table[fd_i] = kmalloc(sizeof(fildes_t));
task_table[curr_task->id].fildes_table[fd_i]->count = 1;
}
task_table[curr_task->id].fildes_table[0]->node = findnode_fs("/dev/stdin");
task_table[curr_task->id].fildes_table[1]->node = findnode_fs("/dev/stdout");
task_table[curr_task->id].fildes_table[2]->node = findnode_fs("/dev/stderr");
}
spinlock_irqsave_unlock(&table_lock);
err = read_fs(file, (uint8_t*)&header, sizeof(elf_header_t));
if (err < 0) {
kprintf("read_fs failed: %d\n", err);
return err;
}
if (BUILTIN_EXPECT(header.ident.magic != ELF_MAGIC, 0))
goto invalid;
if (BUILTIN_EXPECT(header.type != ELF_ET_EXEC, 0))
goto invalid;
#ifdef CONFIG_X86_32
if (BUILTIN_EXPECT(header.machine != ELF_EM_386, 0))
goto invalid;
if (BUILTIN_EXPECT(header.ident._class != ELF_CLASS_32, 0))
goto invalid;
#else
if (BUILTIN_EXPECT(header.machine != ELF_EM_X86_64, 0))
goto invalid;
if (BUILTIN_EXPECT(header.ident._class != ELF_CLASS_64, 0))
goto invalid;
#endif
if (BUILTIN_EXPECT(header.ident.data != ELF_DATA_2LSB, 0))
goto invalid;
if (header.entry <= KERNEL_SPACE)
goto invalid;
// interpret program header table
for (i=0; i<header.ph_entry_count; i++) {
file->offset = header.ph_offset+i*header.ph_entry_size;
if (read_fs(file, (uint8_t*)&prog_header, sizeof(elf_program_header_t)) == 0) {
kprintf("Could not read programm header!\n");
continue;
}
switch(prog_header.type)
{
case ELF_PT_LOAD: // load program segment
if (!prog_header.virt_addr)
continue;
npages = (prog_header.mem_size >> PAGE_SHIFT);
if (prog_header.mem_size & (PAGE_SIZE-1))
npages++;
addr = get_pages(npages);
flags = MAP_USER_SPACE;
if (prog_header.flags & PF_X)
flags |= MAP_CODE;
// map page frames in the address space of the current task
if (!map_region(prog_header.virt_addr, addr, npages, flags)) {
kprintf("Could not map 0x%x at 0x%x (%u pages)\n", addr, prog_header.virt_addr, npages);
return -ENOMEM;
}
// clear pages
memset((void*) prog_header.virt_addr, 0x00, npages*PAGE_SIZE);
// set starting point of the heap
if (curr_task->start_heap < prog_header.virt_addr+prog_header.mem_size)
curr_task->start_heap = curr_task->end_heap = prog_header.virt_addr+prog_header.mem_size;
// load program
file->offset = prog_header.offset;
read_fs(file, (uint8_t*)prog_header.virt_addr, prog_header.file_size);
flags = VMA_CACHEABLE;
if (prog_header.flags & PF_R)
flags |= VMA_READ;
if (prog_header.flags & PF_W)
flags |= VMA_WRITE;
if (prog_header.flags & PF_X)
flags |= VMA_EXECUTE;
vma_add(curr_task, prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE-1, flags);
if (!(prog_header.flags & PF_W))
change_page_permissions(prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE-1, flags);
break;
case ELF_PT_GNU_STACK: // Indicates stack executability
// create user-level stack
npages = DEFAULT_STACK_SIZE >> PAGE_SHIFT;
if (DEFAULT_STACK_SIZE & (PAGE_SIZE-1))
npages++;
addr = get_pages(npages);
stack = header.entry*2; // virtual address of the stack
if (!map_region(stack, addr, npages, MAP_USER_SPACE)) {
kprintf("Could not map stack at 0x%x\n", stack);
return -ENOMEM;
}
memset((void*) stack, 0x00, npages*PAGE_SIZE);
// create vma regions for the user-level stack
flags = VMA_CACHEABLE;
if (prog_header.flags & PF_R)
flags |= VMA_READ;
if (prog_header.flags & PF_W)
flags |= VMA_WRITE;
if (prog_header.flags & PF_X)
flags |= VMA_EXECUTE;
vma_add(curr_task, stack, stack+npages*PAGE_SIZE-1, flags);
break;
}
}
#if 0
// interpret section header table
for (i=0; i<header.sh_entry_count; i++) {
file.offset = header.sh_offset+i*header.sh_entry_size;
if (read_fs(&file, (uint8_t*)&sec_header, sizeof(elf_section_header_t)) == 0) {
kprintf("Could not read section header!\n");
continue;
}
// TODO: interpret section header
}
#endif
if (BUILTIN_EXPECT(!stack, 0)) {
kprintf("Stack is missing!\n");
return -ENOMEM;
}
// push strings on the stack
offset = DEFAULT_STACK_SIZE-8;
memset((void*) (stack+offset), 0, 4);
offset -= MAX_ARGS;
memcpy((void*) (stack+offset), largs->buffer, MAX_ARGS);
idx = offset;
// push argv on the stack
offset -= largs->argc * sizeof(char*);
for(i=0; i<largs->argc; i++) {
((char**) (stack+offset))[i] = (char*) (stack+idx);
while(((char*) stack)[idx] != '\0')
idx++;
idx++;
}
// push env on the stack
offset -= (largs->envc+1) * sizeof(char*);
for(i=0; i<largs->envc; i++) {
((char**) (stack+offset))[i] = (char*) (stack+idx);
while(((char*) stack)[idx] != '\0')
idx++;
idx++;
}
((char**) (stack+offset))[largs->envc] = NULL;
// push pointer to env
offset -= sizeof(char**);
if (!(largs->envc))
*((char***) (stack+offset)) = NULL;
else
*((char***) (stack+offset)) = (char**) (stack + offset + sizeof(char**));
// push pointer to argv
offset -= sizeof(char**);
*((char***) (stack+offset)) = (char**) (stack + offset + 2*sizeof(char**) + (largs->envc+1) * sizeof(char*));
// push argc on the stack
offset -= sizeof(int);
*((int*) (stack+offset)) = largs->argc;
kfree(largs, sizeof(load_args_t));
// clear fpu state
curr_task->flags &= ~(TASK_FPU_USED|TASK_FPU_INIT);
jump_to_user_code(header.entry, stack+offset);
return 0;
invalid:
kprintf("Invalid executable!\n");
kprintf("magic number 0x%x\n", (uint32_t) header.ident.magic);
kprintf("header type 0x%x\n", (uint32_t) header.type);
kprintf("machine type 0x%x\n", (uint32_t) header.machine);
kprintf("elf ident class 0x%x\n", (uint32_t) header.ident._class);
kprintf("elf identdata !0x%x\n", header.ident.data);
kprintf("program entry point 0x%x\n", (size_t) header.entry);
return -EINVAL;
}
/** @brief This call is used to adapt create_task calls
* which want to have a start function and argument list */
static int user_entry(void* arg)
{
int ret;
if (BUILTIN_EXPECT(!arg, 0))
return -EINVAL;
ret = load_task((load_args_t*) arg);
kfree(arg, sizeof(load_args_t));
return ret;
}
/** @brief Luxus-edition of create_user_task functions. Just call with an exe name
*
* @param id Pointer to the tid_t structure which shall be filles
* @param fname Executable's path and filename
* @param argv Arguments list
* @param core_id Start the new task on the core with this id
*
* @return
* - 0 on success
* - -ENOMEM (-12) or -EINVAL (-22) on failure
*/
int create_user_task_on_core(tid_t* id, const char* fname, char** argv, uint32_t core_id)
{
#ifdef CONFIG_X86_32
vfs_node_t* node;
int argc = 0;
size_t i, buffer_size = 0;
load_args_t* load_args = NULL;
char *dest, *src;
node = findnode_fs((char*) fname);
if (!node || !(node->type == FS_FILE))
return -EINVAL;
// determine buffer size of argv
if (argv) {
while (argv[argc]) {
buffer_size += (strlen(argv[argc]) + 1);
argc++;
}
}
if (argc <= 0)
return -EINVAL;
if (buffer_size >= MAX_ARGS)
return -EINVAL;
load_args = kmalloc(sizeof(load_args_t));
if (BUILTIN_EXPECT(!load_args, 0))
return -ENOMEM;
load_args->node = node;
load_args->argc = argc;
load_args->envc = 0;
dest = load_args->buffer;
for (i=0; i<argc; i++) {
src = argv[i];
while ((*dest++ = *src++) != 0);
}
/* create new task */
return create_task(id, user_entry, load_args, NORMAL_PRIO, core_id);
#else
return -EINVAL;
#endif
}
/** @brief Used by the execve-Systemcall */
int sys_execve(const char* fname, char** argv, char** env)
{
vfs_node_t* node;
vma_t* tmp;
size_t i, buffer_size = 0;
load_args_t* load_args = NULL;
char *dest, *src;
int ret, argc = 0;
int envc = 0;
task_t* curr_task = per_core(current_task);
node = findnode_fs((char*) fname);
if (!node || !(node->type == FS_FILE))
return -EINVAL;
// determine total buffer size of argv and env
if (argv) {
while (argv[argc]) {
buffer_size += (strlen(argv[argc]) + 1);
argc++;
}
}
if (env) {
while (env[envc]) {
buffer_size += (strlen(env[envc]) + 1);
envc++;
}
}
if (argc <= 0)
return -EINVAL;
if (buffer_size >= MAX_ARGS)
return -EINVAL;
load_args = kmalloc(sizeof(load_args_t));
if (BUILTIN_EXPECT(!load_args, 0))
return -ENOMEM;
load_args->node = node;
load_args->argc = argc;
load_args->envc = envc;
dest = load_args->buffer;
for (i=0; i<argc; i++) {
src = argv[i];
while ((*dest++ = *src++) != 0);
}
for (i=0; i<envc; i++) {
src = env[i];
while ((*dest++ = *src++) != 0);
}
spinlock_lock(&curr_task->vma_lock);
// remove old program
while((tmp = curr_task->vma_list) != NULL) {
kfree((void*) tmp->start, tmp->end - tmp->start + 1);
curr_task->vma_list = tmp->next;
kfree((void*) tmp, sizeof(vma_t));
}
spinlock_unlock(&curr_task->vma_lock);
/*
* we use a trap gate to enter the kernel
* => eflags are not changed
* => interrupts are enabled
* => we could directly load the new task
*/
ret = load_task(load_args);
kfree(load_args, sizeof(load_args_t));
return ret;
}
/** @brief Called by tasks which are waiting for another task's
* return value. */
tid_t wait(int32_t* result)
{
task_t* curr_task = per_core(current_task);
wait_msg_t tmp = { -1, -1};
/*
* idle tasks are not allowed to wait for another task
* they should always run...
*/
if (BUILTIN_EXPECT(curr_task->status == TASK_IDLE, 0))
return -EINVAL;
mailbox_wait_msg_fetch(&curr_task->inbox, &tmp, 0);
if (result)
*result = tmp.result;
return tmp.id;
}
/** @brief Wakeup a blocked task
* @param id The task's tid_t structure
* @return
* - 0 on success
* - -EINVAL (-22) on failure
*/
int wakeup_task(tid_t id)
{
task_t* task;
uint32_t core_id, prio;
uint32_t flags;
int ret = -EINVAL;
flags = irq_nested_disable();
task = task_table + id;
prio = task->prio;
core_id = task->last_core;
if (task->status == TASK_BLOCKED) {
task->status = TASK_READY;
ret = 0;
spinlock_irqsave_lock(&runqueues[core_id].lock);
// increase the number of ready tasks
runqueues[core_id].nr_tasks++;
// do we need to remove from timer queue?
if (task->flags & TASK_TIMER) {
task->flags &= ~TASK_TIMER;
if (task->prev)
task->prev->next = task->next;
if (task->next)
task->next->prev = task->prev;
if (runqueues[core_id].timers.first == task)
runqueues[core_id].timers.first = task->next;
if (runqueues[core_id].timers.last == task)
runqueues[core_id].timers.last = task->prev;
}
// add task to the runqueue
if (!runqueues[core_id].queue[prio-1].last) {
runqueues[core_id].queue[prio-1].last = runqueues[core_id].queue[prio-1].first = task;
task->next = task->prev = NULL;
runqueues[core_id].prio_bitmap |= (1 << prio);
} else {
task->prev = runqueues[core_id].queue[prio-1].last;
task->next = NULL;
runqueues[core_id].queue[prio-1].last->next = task;
runqueues[core_id].queue[prio-1].last = task;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
}
irq_nested_enable(flags);
return ret;
}
/** @brief Block current task
*
* The current task's status will be changed to TASK_BLOCKED
*
* @return
* - 0 on success
* - -EINVAL (-22) on failure
*/
int block_current_task(void)
{
task_t* curr_task;
tid_t id;
uint32_t core_id, prio;
uint32_t flags;
int ret = -EINVAL;
flags = irq_nested_disable();
curr_task = per_core(current_task);
id = curr_task->id;
prio = curr_task->prio;
core_id = CORE_ID;
if (task_table[id].status == TASK_RUNNING) {
task_table[id].status = TASK_BLOCKED;
ret = 0;
spinlock_irqsave_lock(&runqueues[core_id].lock);
// reduce the number of ready tasks
runqueues[core_id].nr_tasks--;
// remove task from queue
if (task_table[id].prev)
task_table[id].prev->next = task_table[id].next;
if (task_table[id].next)
task_table[id].next->prev = task_table[id].prev;
if (runqueues[core_id].queue[prio-1].first == task_table+id)
runqueues[core_id].queue[prio-1].first = task_table[id].next;
if (runqueues[core_id].queue[prio-1].last == task_table+id) {
runqueues[core_id].queue[prio-1].last = task_table[id].prev;
if (!runqueues[core_id].queue[prio-1].last)
runqueues[core_id].queue[prio-1].last = runqueues[core_id].queue[prio-1].first;
}
// No valid task in queue => update prio_bitmap
if (!runqueues[core_id].queue[prio-1].first)
runqueues[core_id].prio_bitmap &= ~(1 << prio);
spinlock_irqsave_unlock(&runqueues[core_id].lock);
}
irq_nested_enable(flags);
return ret;
}
int set_timer(uint64_t deadline)
{
task_t* curr_task;
task_t* tmp;
uint32_t core_id, prio;
uint32_t flags;
int ret = -EINVAL;
flags = irq_nested_disable();
curr_task = per_core(current_task);
prio = curr_task->prio;
core_id = CORE_ID;
if (curr_task->status == TASK_RUNNING) {
curr_task->status = TASK_BLOCKED;
curr_task->timeout = deadline;
curr_task->flags |= TASK_TIMER;
ret = 0;
spinlock_irqsave_lock(&runqueues[core_id].lock);
// reduce the number of ready tasks
runqueues[core_id].nr_tasks--;
// remove task from queue
if (curr_task->prev)
curr_task->prev->next = curr_task->next;
if (curr_task->next)
curr_task->next->prev = curr_task->prev;
if (runqueues[core_id].queue[prio-1].first == curr_task)
runqueues[core_id].queue[prio-1].first = curr_task->next;
if (runqueues[core_id].queue[prio-1].last == curr_task) {
runqueues[core_id].queue[prio-1].last = curr_task->prev;
if (!runqueues[core_id].queue[prio-1].last)
runqueues[core_id].queue[prio-1].last = runqueues[core_id].queue[prio-1].first;
}
// No valid task in queue => update prio_bitmap
if (!runqueues[core_id].queue[prio-1].first)
runqueues[core_id].prio_bitmap &= ~(1 << prio);
// add task to the timer queue
tmp = runqueues[core_id].timers.first;
if (!tmp) {
runqueues[core_id].timers.first = runqueues[core_id].timers.last = curr_task;
curr_task->prev = curr_task->next = NULL;
} else {
while(tmp && (deadline >= tmp->timeout))
tmp = tmp->next;
if (!tmp) {
curr_task->next = NULL;
curr_task->prev = runqueues[core_id].timers.last;
if (runqueues[core_id].timers.last)
runqueues[core_id].timers.last->next = curr_task;
runqueues[core_id].timers.last = curr_task;
// obsolete lines...
//if (!runqueues[core_id].timers.first)
// runqueues[core_id].timers.first = curr_task;
} else {
curr_task->prev = tmp->prev;
curr_task->next = tmp;
tmp->prev = curr_task;
if (curr_task->prev)
curr_task->prev->next = curr_task;
if (runqueues[core_id].timers.first == tmp)
runqueues[core_id].timers.first = curr_task;
}
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
} else kprintf("Task is already blocked. No timer will be set!\n");
irq_nested_enable(flags);
return ret;
}
#ifndef CONFIG_TICKLESS
/* determining the load as fix-point */
#define FSHIFT 11 /* nr of bits of precision */
#define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */
#define EXP_1 1884 /* 1/exp(5sec/1min) */
#define EXP_5 2014 /* 1/exp(5sec/5min) */
#define EXP_15 2037 /* 1/exp(5sec/15min) */
void update_load(void)
{
uint32_t core_id = CORE_ID;
runqueues[core_id].balance_counter--;
runqueues[core_id].load_counter--;
if (runqueues[core_id].load_counter <= 0) {
runqueues[core_id].load_counter += TIMER_FREQ/5;
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].load[0] *= EXP_1;
runqueues[core_id].load[0] += (runqueues[core_id].nr_tasks * FIXED_1) * (FIXED_1 - EXP_1);
runqueues[core_id].load[0] >>= FSHIFT;
runqueues[core_id].load[1] *= EXP_5;
runqueues[core_id].load[1] += (runqueues[core_id].nr_tasks * FIXED_1) * (FIXED_1 - EXP_5);
runqueues[core_id].load[1] >>= FSHIFT;
runqueues[core_id].load[2] *= EXP_15;
runqueues[core_id].load[2] += (runqueues[core_id].nr_tasks * FIXED_1) * (FIXED_1 - EXP_15);
runqueues[core_id].load[2] >>= FSHIFT;
spinlock_irqsave_unlock(&runqueues[core_id].lock);
//kprintf("load of core %u: %u, %u, %u, %u\n", core_id, runqueues[core_id].load[0], runqueues[core_id].load[1], runqueues[core_id].load[2], runqueues[core_id].nr_tasks);
}
}
void dump_load(void)
{
uint32_t i;
#if MAX_CORES > 1
uint32_t ncores = atomic_int32_read(&cpu_online);
#else
uint32_t ncores = 1;
#endif
for(i=0; i<ncores; i++)
{
kprintf("Load average of core %u: %u.%u, %u.%u, %u.%u\n",
i, runqueues[i].load[0] >> FSHIFT,
((runqueues[i].load[0] & ((1 << FSHIFT) - 1)) * 100) / (1 << FSHIFT),
runqueues[i].load[1] >> FSHIFT,
((runqueues[i].load[1] & ((1 << FSHIFT) - 1)) * 100) / (1 << FSHIFT),
runqueues[i].load[2] >> FSHIFT,
((runqueues[i].load[2] & ((1 << FSHIFT) - 1)) * 100) / (1 << FSHIFT));
}
}
#if MAX_CORES > 1
void load_balancing(void)
{
#if 1
uint32_t i, core_id = CORE_ID;
uint32_t prio;
task_t* task;
for(i=0; (i<atomic_int32_read(&cpu_online)) && (runqueues[core_id].balance_counter <= 0); i++)
{
if (i == core_id)
continue;
if ((runqueues[i].load[0] >> (FSHIFT-1)) > (runqueues[core_id].load[0] >> (FSHIFT-1))) {
//kprintf("Try to steal a task from core %u (load %u) to %u (load %u)\n", i, runqueues[i].load[0], core_id, runqueues[core_id].load[0]);
//kprintf("Task on core %u: %u, core %u, %u\n", i, runqueues[i].nr_tasks, core_id, runqueues[i].nr_tasks);
spinlock_irqsave_lock(&runqueues[i].lock);
prio = lsb(runqueues[i].prio_bitmap);
if (prio < sizeof(size_t)*8) {
// steal a ready task
task = runqueues[i].queue[prio-1].last;
kprintf("Core %u steals the task %d form %u with prio %u\n", core_id, task->id, i, prio);
// remove last element from queue i
if (task->prev)
task->prev->next = NULL;
if (runqueues[i].queue[prio-1].first == task) {
runqueues[i].queue[prio-1].first = runqueues[i].queue[prio-1].last = NULL;
runqueues[i].prio_bitmap &= ~(1 << prio);
} else runqueues[i].queue[prio-1].last = task->prev;
// update task counters
runqueues[i].nr_tasks--;
spinlock_irqsave_unlock(&runqueues[i].lock);
// add task at the end of queue core_id
spinlock_irqsave_lock(&runqueues[core_id].lock);
if (!runqueues[core_id].queue[prio-1].last) {
runqueues[core_id].queue[prio-1].first = runqueues[core_id].queue[prio-1].last = task;
task->next = task->prev = NULL;
} else {
runqueues[core_id].queue[prio-1].last->next = task;
task->prev = runqueues[core_id].queue[prio-1].last;
runqueues[core_id].queue[prio-1].last = task;
task->next = NULL;
}
runqueues[core_id].prio_bitmap |= (1 << prio);
// update task counters
runqueues[core_id].nr_tasks++;
runqueues[core_id].balance_counter = TIMER_FREQ/2;
spinlock_irqsave_unlock(&runqueues[core_id].lock);
} else {
#if 1
spinlock_irqsave_unlock(&runqueues[i].lock);
#else
task_t* tmp;
// steal a blocked task
task = runqueues[i].timers.first;
if (!task) { // Ups, found no valid task to steal
spinlock_irqsave_unlock(&runqueues[i].lock);
goto no_task_found;
}
kprintf("Core %u steals the blocked task %d from %u with prio %u\n", core_id, task->id, i, task->prio);
// remove first timer from queue i
if (runqueues[i].timers.first == runqueues[i].timers.last)
runqueues[i].timers.first = runqueues[i].timers.last = NULL;
else
runqueues[i].timers.first = runqueues[i].timers.first->next;
spinlock_irqsave_unlock(&runqueues[i].lock);
spinlock_irqsave_lock(&runqueues[core_id].lock);
// add timer to queue core_id
tmp = runqueues[core_id].timers.first;
while(tmp && (task->timeout >= tmp->timeout))
tmp = tmp->next;
if (!tmp) {
task->next = NULL;
task->prev = runqueues[core_id].timers.last;
if (runqueues[core_id].timers.last)
runqueues[core_id].timers.last->next = task;
runqueues[core_id].timers.last = task;
if (!runqueues[core_id].timers.first)
runqueues[core_id].timers.first = task;
} else {
task->prev = tmp->prev;
task->next = tmp;
tmp->prev = task;
if (task->prev)
task->prev->next = task;
if (runqueues[core_id].timers.first == tmp)
runqueues[core_id].timers.first = task;
}
// => reschedule on the new core
task->last_core = CORE_ID;
// update task counters
runqueues[core_id].balance_counter = TIMER_FREQ/2;
spinlock_irqsave_lock(&runqueues[core_id].lock);
#endif
}
}
//no_task_found:
}
if (runqueues[core_id].balance_counter <= 0)
runqueues[core_id].balance_counter = TIMER_FREQ/2;
#endif
}
#endif
#endif // CONFIG_TICKLESS
void check_timers(void)
{
uint32_t core_id = CORE_ID;
uint32_t prio;
uint64_t current_tick;
spinlock_irqsave_lock(&runqueues[core_id].lock);
// check timers
current_tick = get_clock_tick();
while (runqueues[core_id].timers.first && runqueues[core_id].timers.first->timeout <= current_tick)
{
task_t* task = runqueues[core_id].timers.first;
// remove timer from queue
runqueues[core_id].timers.first = runqueues[core_id].timers.first->next;
if (runqueues[core_id].timers.first)
runqueues[core_id].timers.first->prev = NULL;
else
runqueues[core_id].timers.last = NULL;
task->flags &= ~TASK_TIMER;
// wakeup task
if (task->status == TASK_BLOCKED) {
task->status = TASK_READY;
prio = task->prio;
// increase the number of ready tasks
runqueues[core_id].nr_tasks++;
// add task to the runqueue
if (!runqueues[core_id].queue[prio-1].first) {
runqueues[core_id].queue[prio-1].last = runqueues[core_id].queue[prio-1].first = task;
task->next = task->prev = NULL;
runqueues[core_id].prio_bitmap |= (1 << prio);
} else {
task->prev = runqueues[core_id].queue[prio-1].last;
task->next = NULL;
runqueues[core_id].queue[prio-1].last->next = task;
runqueues[core_id].queue[prio-1].last = task;
}
}
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
}
size_t** scheduler(void)
{
task_t* orig_task;
task_t* curr_task;
uint32_t core_id = CORE_ID;
uint32_t prio;
orig_task = curr_task = per_core(current_task);
curr_task->last_core = core_id;
spinlock_irqsave_lock(&runqueues[core_id].lock);
/* signalizes that this task could be reused */
if (curr_task->status == TASK_FINISHED) {
curr_task->status = TASK_INVALID;
runqueues[core_id].old_task = curr_task;
} else runqueues[core_id].old_task = NULL; // reset old task
prio = msb(runqueues[core_id].prio_bitmap); // determines highest priority
#ifndef CONFIG_TICKLESS
#if MAX_CORES > 1
if (prio >= sizeof(size_t)*8) {
// push load balancing
runqueues[core_id].balance_counter -= TIMER_FREQ/20;
load_balancing();
prio = msb(runqueues[core_id].prio_bitmap); // retry...
}
#endif
#endif
if (prio >= sizeof(size_t)*8) {
if ((curr_task->status == TASK_RUNNING) || (curr_task->status == TASK_IDLE))
goto get_task_out;
curr_task = per_core(current_task) = runqueues[core_id].idle;
} else {
// Does the current task have an higher priority? => no task switch
if ((curr_task->prio > prio) && (curr_task->status == TASK_RUNNING))
goto get_task_out;
if (curr_task->status == TASK_RUNNING) {
curr_task->status = TASK_READY;
runqueues[core_id].old_task = curr_task;
}
curr_task = per_core(current_task) = runqueues[core_id].queue[prio-1].first;
if (BUILTIN_EXPECT(curr_task->status == TASK_INVALID, 0)) {
pushbg(COL_RED);
kprintf("Upps!!!!!!! Got invalid task %d, orig task %d\n", curr_task->id, orig_task->id);
popbg();
}
curr_task->status = TASK_RUNNING;
// remove new task from queue
runqueues[core_id].queue[prio-1].first = curr_task->next;
if (!curr_task->next) {
runqueues[core_id].queue[prio-1].last = NULL;
runqueues[core_id].prio_bitmap &= ~(1 << prio);
}
curr_task->next = curr_task->prev = NULL;
}
get_task_out:
spinlock_irqsave_unlock(&runqueues[core_id].lock);
if (curr_task != orig_task) {
/* if the original task is using the FPU, we need to save the FPU context */
if ((orig_task->flags & TASK_FPU_USED) && (orig_task->status == TASK_READY)) {
save_fpu_state(&(orig_task->fpu));
orig_task->flags &= ~TASK_FPU_USED;
}
//kprintf("schedule from %u to %u with prio %u on core %u\n", orig_task->id, curr_task->id, (uint32_t)curr_task->prio, CORE_ID);
return (size_t**) &(orig_task->last_stack_pointer);
}
return NULL;
}
void reschedule(void)
{
size_t** stack;
uint32_t flags = irq_nested_disable();
if ((stack = scheduler()))
switch_context(stack);
irq_nested_enable(flags);
}