From 4c9855c83a773efd5ea88c6474ece5cca52df916 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Wed, 17 Aug 2011 13:51:19 +0200 Subject: [PATCH 1/7] redesign of the scheduler, defining of a runqueue per core => Currently, we work stealing isn't supported --- arch/x86/include/asm/processor.h | 16 ++ arch/x86/kernel/timer.c | 7 +- include/metalsvm/fs.h | 6 +- include/metalsvm/semaphore.h | 7 +- include/metalsvm/tasks.h | 21 +- include/metalsvm/tasks_types.h | 45 +++- kernel/client.c | 2 +- kernel/main.c | 3 +- kernel/server.c | 4 +- kernel/tasks.c | 400 +++++++++++++++++++++++-------- kernel/tests.c | 14 +- lwip/src/arch/sys_arch.c | 2 +- lwip/src/include/lwipopts.h | 14 ++ 13 files changed, 400 insertions(+), 141 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index da3b3556..182db412 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -272,6 +272,22 @@ static inline uint32_t read_eflags(void) return result; } +/** @brief search the first bit, which is set + * + * @param i source operand + * @return first bit, which is set in the source operand + */ +static inline uint32_t last_set(uint32_t i) +{ + uint32_t ret; + + if (!i) + return 0; + asm volatile ("bsr %0, %1" : "=r"(ret) : "r"(i)); + + return ret; +} + /** @brief Read extended instruction pointer * @return The EIP's value */ diff --git a/arch/x86/kernel/timer.c b/arch/x86/kernel/timer.c index 8a806eb6..ffc7bf00 100644 --- a/arch/x86/kernel/timer.c +++ b/arch/x86/kernel/timer.c @@ -104,12 +104,7 @@ int timer_wait(unsigned int ticks) check_workqueues(); if (timer_ticks < eticks) { - uint32_t flags = irq_nested_disable(); - curr_task->timeout = eticks; - curr_task->flags |= TASK_TIMER_USED; - curr_task->status = TASK_BLOCKED; - irq_nested_enable(flags); - + set_timer(eticks); reschedule(); } } diff --git a/include/metalsvm/fs.h b/include/metalsvm/fs.h index 56eba752..10b8412a 100644 --- a/include/metalsvm/fs.h +++ b/include/metalsvm/fs.h @@ -76,11 +76,11 @@ typedef struct block_list { } block_list_t; typedef struct vfs_node { - /// The permissions mask. + /// The permissions mask. uint32_t mask; - /// The owning user. + /// The owning user. uint32_t uid; - /// The owning group. + /// The owning group. uint32_t gid; /// Includes the node type. See #defines above. uint32_t type; diff --git a/include/metalsvm/semaphore.h b/include/metalsvm/semaphore.h index 19ad8e38..c0dc0f81 100644 --- a/include/metalsvm/semaphore.h +++ b/include/metalsvm/semaphore.h @@ -124,7 +124,7 @@ next_try1: } else { s->queue[s->pos] = curr_task->id; s->pos = (s->pos + 1) % MAX_TASKS; - curr_task->status = TASK_BLOCKED; + block_current_task(); spinlock_irqsave_unlock(&s->lock); reschedule(); NOP2; @@ -152,11 +152,10 @@ next_try2: } s->queue[s->pos] = curr_task->id; s->pos = (s->pos + 1) % MAX_TASKS; - curr_task->timeout = deadline; - curr_task->flags |= TASK_TIMER_USED; - curr_task->status = TASK_BLOCKED; + set_timer(deadline); spinlock_irqsave_unlock(&s->lock); reschedule(); + NOP2; goto next_try2; } } diff --git a/include/metalsvm/tasks.h b/include/metalsvm/tasks.h index 0b0bcdac..3be44077 100644 --- a/include/metalsvm/tasks.h +++ b/include/metalsvm/tasks.h @@ -65,7 +65,7 @@ int multitasking_init(void); * - 0 on success * - -EINVAL (-22) on failure */ -int create_kernel_task(tid_t* id, entry_point_t ep, void* arg); +int create_kernel_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio); /** @brief Create a user level task. * @@ -100,6 +100,25 @@ void scheduler(void); */ int wakeup_task(tid_t); +/** @brief Block current task + * + * The current task's status will be changed to TASK_BLOCKED + * + * @return + * - 0 on success + * - -EINVAL (-22) on failure + */ +int block_current_task(void); + +/** @brief Block current task until timer expires + * + * @param deadline Clock tick, when the timer expires + * @return + * - 0 on success + * - -EINVAL (-22) on failure + */ +int set_timer(uint64_t deadline); + /** @brief Abort current task */ void NORETURN abort(void); diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h index 3379fccb..214df8e7 100644 --- a/include/metalsvm/tasks_types.h +++ b/include/metalsvm/tasks_types.h @@ -40,6 +40,13 @@ extern "C" { #endif +#define MAX_PRIO 32 +#define REALTIME_PRIO 31 +#define HIGH_PRIO 16 +#define NORMAL_PRIO 8 +#define LOW_PRIO 1 +#define IDLE_PRIO 0 + #define TASK_INVALID 0 #define TASK_READY 1 #define TASK_RUNNING 2 @@ -50,8 +57,6 @@ extern "C" { #define TASK_DEFAULT_FLAGS 0 #define TASK_FPU_INIT (1 << 0) #define TASK_FPU_USED (1 << 1) -#define TASK_TIMER_USED (1 << 2) -#define TASK_SWITCH_IN_PROGRESS (1 << 3) typedef int (*entry_point_t)(void*); typedef int (STDCALL *internal_entry_point_t)(void*); @@ -64,11 +69,15 @@ typedef struct task { /// Task status (INVALID, READY, RUNNING, ...) uint32_t status; /// Additional status flags. For instance, to signalize the using of the FPU - uint32_t flags; - /// Number of used time slices - uint32_t time_slices; + uint8_t flags; + /// Task priority + uint8_t prio; /// timeout for a blocked task uint64_t timeout; + /// next task in the queue + struct task* next; + /// previous task in the queue + struct task* prev; /// Usage in number of pages atomic_int32_t user_usage; /// Avoids concurrent access to the page directory @@ -82,13 +91,11 @@ typedef struct task { /// starting time/tick of the task uint64_t start_tick; /// Start address of the heap - uint32_t start_heap; + size_t start_heap; /// End address of the heap - uint32_t end_heap; -#ifdef CONFIG_LWIP + size_t end_heap; /// LwIP error code int lwip_err; -#endif /// Mail inbox mailbox_wait_msg_t inbox; /// Mail outbox array @@ -97,6 +104,26 @@ typedef struct task { union fpu_state fpu; } task_t; +typedef struct { + task_t* first; + task_t* last; +} task_list_t; + +typedef struct { + /// idle task + task_t* idle __attribute__ ((aligned (CACHE_LINE))); + /// previous task + task_t* old_task; + /// indicates the used priority queues + uint32_t prio_bitmap; + /// a queue for each priority + task_list_t queue[MAX_PRIO]; + /// a queue for timers + task_list_t timers; + /// lock for this runqueue + spinlock_t lock; +} runqueue_t; + #ifdef __cplusplus } #endif diff --git a/kernel/client.c b/kernel/client.c index 386e69a7..0c3982fb 100644 --- a/kernel/client.c +++ b/kernel/client.c @@ -42,7 +42,7 @@ int cli_ConnectTo(Client* cli,char * pAdresse,unsigned short Port,int webAdresse if (connect(cli->sSocket,(const struct sockaddr*)&cli->adAddr, sizeof(cli->adAddr))==0) { - create_kernel_task(&cli->bThread,cli_WaitForPacket,cli); + create_kernel_task(&cli->bThread,cli_WaitForPacket,cli, NORMAL_PRIO); if (cli->_OnConnect != 0) { diff --git a/kernel/main.c b/kernel/main.c index f89d2bc7..b35c947b 100644 --- a/kernel/main.c +++ b/kernel/main.c @@ -89,8 +89,7 @@ int main(void) kprintf("Current available memory: %u MBytes\n", atomic_int32_read(&total_available_pages)/((1024*1024)/PAGE_SIZE)); sleep(5); - create_kernel_task(NULL, initd, NULL); - per_core(current_task)->time_slices = 0; // reset the number of time slices + create_kernel_task(NULL, initd, NULL, NORMAL_PRIO); reschedule(); while(1) { diff --git a/kernel/server.c b/kernel/server.c index bd5c441a..1d988eaa 100644 --- a/kernel/server.c +++ b/kernel/server.c @@ -78,7 +78,7 @@ void* srv_WaitForConnection(Server* srv) t = (ServerThreadArgs*) kmalloc(sizeof(ServerThreadArgs)); t->ID = i; t->srv = srv; - create_kernel_task(&srv->bThreads[i],srv_WaitForPacket,t); + create_kernel_task(&srv->bThreads[i],srv_WaitForPacket,t, NORMAL_PRIO); break; } @@ -175,7 +175,7 @@ int server_init(Server* srv, unsigned short Port, unsigned int dwMaxConnections) bind( srv->sSocket,(const struct sockaddr *) &srv->adAddr, sizeof(srv->adAddr)); // Der Server an die Adresse binden; listen(srv->sSocket,srv->dwMaximumConnections); // Den Server in listenig State versetzen - create_kernel_task(&srv->bThread_listen,srv_WaitForConnection,srv); + create_kernel_task(&srv->bThread_listen,srv_WaitForConnection,srv, NORMAL_PRIO); // sConnections[0] = accept(sSocket,(struct sockaddr*)&tmpAddr,&tmpAddrLen); // t.ID = 0; // bthread_create(&bThreads[0],NULL,(start_routine) srv_WaitForPacket,&t); diff --git a/kernel/tasks.c b/kernel/tasks.c index 2dec1348..cf16ef40 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -47,14 +47,14 @@ * A task's id will be its position in this array. */ static task_t task_table[MAX_TASKS] = { \ - [0] = {0, TASK_IDLE, 0, 0, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}, \ - [1 ... MAX_TASKS-1] = {0, TASK_INVALID, 0, 0, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}}; + [0] = {0, TASK_IDLE, 0, 0, 0, NULL, NULL, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}, \ + [1 ... MAX_TASKS-1] = {0, TASK_INVALID, 0, 0, 0, NULL, NULL, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}}; static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT; +static runqueue_t runqueues[MAX_CORES] = { \ + [0] = {task_table+0, NULL, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}, \ + [1 ... MAX_CORES-1] = {NULL, NULL, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}}; DEFINE_PER_CORE(task_t*, current_task, task_table+0); -#if MAX_CORES > 1 -DEFINE_PER_CORE_STATIC(task_t*, old_task, NULL); -#endif /** @brief helper function for the assembly code to determine the current task * @return Pointer to the task_t structure of current task @@ -65,6 +65,7 @@ task_t* get_current_task(void) { int dump_scheduling_statistics(void) { +#if 0 uint32_t i; uint32_t id = 0; @@ -77,7 +78,7 @@ int dump_scheduling_statistics(void) id++; } } - +#endif return 0; } @@ -91,6 +92,7 @@ int multitasking_init(void) { memset(task_table[0].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS); task_table[0].pgd = get_boot_pgd(); task_table[0].flags = TASK_DEFAULT_FLAGS; + task_table[0].prio = IDLE_PRIO; return 0; } @@ -103,13 +105,14 @@ size_t get_idle_task(uint32_t id) task_table[id].id = id; task_table[id].status = TASK_IDLE; + task_table[id].prio = IDLE_PRIO; task_table[id].flags = TASK_DEFAULT_FLAGS; - task_table[id].time_slices = 0; atomic_int32_set(&task_table[id].user_usage, 0); mailbox_wait_msg_init(&task_table[id].inbox); memset(task_table[id].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS); task_table[id].pgd = get_boot_pgd(); current_task[id].var = task_table+id; + runqueues[id].idle = task_table+id; return get_stack(id); #else @@ -117,6 +120,32 @@ size_t get_idle_task(uint32_t id) #endif } +static void finish_task_switch(void) +{ + uint8_t prio; + uint32_t core_id = CORE_ID; + task_t* old; + + spinlock_lock(&runqueues[core_id].lock); + if ((old = runqueues[core_id].old_task) != NULL) { + prio = old->prio; + if (!runqueues[core_id].queue[prio].first) { + old->prev = NULL; + runqueues[core_id].queue[prio].first = runqueues[core_id].queue[prio].last = old; + } else { + old->prev = runqueues[core_id].queue[prio].last; + runqueues[core_id].queue[prio].last->next = old; + runqueues[core_id].queue[prio].last = old; + } + runqueues[core_id].old_task = NULL; + runqueues[core_id].prio_bitmap |= (1 << prio); + old->next = NULL; + } + spinlock_unlock(&runqueues[core_id].lock); + + irq_enable(); +} + /** @brief Wakeup tasks which are waiting for a message from the current one * * @param result Current task's resulting return value @@ -203,14 +232,18 @@ void NORETURN abort(void) { * - 0 on success * - -ENOMEM (-12) or -EINVAL (-22) on failure */ -static int create_task(tid_t* id, internal_entry_point_t ep, void* arg) +static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t prio) { task_t* curr_task; int ret = -ENOMEM; - unsigned int i; + unsigned int i, core_id = CORE_ID; if (BUILTIN_EXPECT(!ep, 0)) return -EINVAL; + if (BUILTIN_EXPECT(prio == IDLE_PRIO, 0)) + return -EINVAL; + if (BUILTIN_EXPECT(prio >= MAX_PRIO, 0)) + return -EINVAL; spinlock_irqsave_lock(&table_lock); @@ -229,7 +262,7 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg) task_table[i].id = i; task_table[i].status = TASK_READY; task_table[i].flags = TASK_DEFAULT_FLAGS; - task_table[i].time_slices = 0; + task_table[i].prio = prio; spinlock_init(&task_table[i].vma_lock); task_table[i].vma_list = NULL; mailbox_wait_msg_init(&task_table[i].inbox); @@ -245,6 +278,22 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg) task_table[i].end_heap = 0; task_table[i].lwip_err = 0; task_table[i].start_tick = get_clock_tick(); + + // add task in the runqueue + spinlock_lock(&runqueues[core_id].lock); + runqueues[core_id].prio_bitmap |= (1 << prio); + if (!runqueues[core_id].queue[prio].first) { + task_table[i].prev = NULL; + runqueues[core_id].queue[prio].first = task_table+i; + runqueues[core_id].queue[prio].last = task_table+i; + task_table[i].next = NULL; + } else { + task_table[i].prev = runqueues[core_id].queue[prio].last; + runqueues[core_id].queue[prio].last->next = task_table+i; + runqueues[core_id].queue[prio].last = task_table+i; + task_table[i].next = NULL; + } + spinlock_unlock(&runqueues[core_id].lock); break; } } @@ -258,7 +307,7 @@ create_task_out: int sys_fork(void) { int ret = -ENOMEM; - unsigned int i; + unsigned int i, core_id = CORE_ID; task_t* parent_task = per_core(current_task); vma_t** child; vma_t* parent; @@ -304,12 +353,29 @@ int sys_fork(void) mailbox_wait_msg_init(&task_table[i].inbox); memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS); task_table[i].outbox[parent_task->id] = &parent_task->inbox; - task_table[i].flags = parent_task->flags & ~TASK_SWITCH_IN_PROGRESS; + task_table[i].flags = parent_task->flags; memcpy(&(task_table[i].fpu), &(parent_task->fpu), sizeof(union fpu_state)); task_table[i].start_tick = get_clock_tick(); task_table[i].start_heap = 0; task_table[i].end_heap = 0; task_table[i].lwip_err = 0; + task_table[i].prio = parent_task->prio; + + // add task in the runqueue + spinlock_lock(&runqueues[core_id].lock); + runqueues[core_id].prio_bitmap |= (1 << parent_task->prio); + if (!runqueues[core_id].queue[parent_task->prio].first) { + task_table[i].prev = NULL; + runqueues[core_id].queue[parent_task->prio].first = task_table+i; + runqueues[core_id].queue[parent_task->prio].last = task_table+i; + task_table[i].next = NULL; + } else { + task_table[i].prev = runqueues[core_id].queue[parent_task->prio].last; + runqueues[core_id].queue[parent_task->prio].last->next = task_table+i; + runqueues[core_id].queue[parent_task->prio].last = task_table+i; + task_table[i].next = NULL; + } + spinlock_unlock(&runqueues[core_id].lock); ret = arch_fork(task_table+i); @@ -318,13 +384,7 @@ int sys_fork(void) // Leave the function without releasing the locks // because the locks are already released // by the parent task! -#if MAX_CORES > 1 - task_t* old = per_core(old_task); - - if (old) - old->flags &= ~TASK_SWITCH_IN_PROGRESS; -#endif - irq_enable(); + finish_task_switch(); return 0; } @@ -358,13 +418,8 @@ static int STDCALL kernel_entry(void* args) { int ret; kernel_args_t* kernel_args = (kernel_args_t*) args; -#if MAX_CORES > 1 - task_t* old = per_core(old_task); - if (old) - old->flags &= ~TASK_SWITCH_IN_PROGRESS; -#endif - irq_enable(); + finish_task_switch(); if (BUILTIN_EXPECT(!kernel_args, 0)) return -EINVAL; @@ -376,7 +431,7 @@ static int STDCALL kernel_entry(void* args) return ret; } -int create_kernel_task(tid_t* id, entry_point_t ep, void* args) +int create_kernel_task(tid_t* id, entry_point_t ep, void* args, uint8_t prio) { kernel_args_t* kernel_args; @@ -387,7 +442,10 @@ int create_kernel_task(tid_t* id, entry_point_t ep, void* args) kernel_args->func = ep; kernel_args->args = args; - return create_task(id, kernel_entry, kernel_args); + if (prio >= MAX_PRIO) + prio = NORMAL_PRIO; + + return create_task(id, kernel_entry, kernel_args, prio); } #define MAX_ARGS (PAGE_SIZE - 2*sizeof(int) - sizeof(vfs_node_t*)) @@ -616,13 +674,8 @@ invalid: static int STDCALL user_entry(void* arg) { int ret; -#if MAX_CORES > 1 - task_t* old = per_core(old_task); - if (old) - old->flags &= ~TASK_SWITCH_IN_PROGRESS; -#endif - irq_enable(); + finish_task_switch(); if (BUILTIN_EXPECT(!arg, 0)) return -EINVAL; @@ -680,7 +733,7 @@ int create_user_task(tid_t* id, const char* fname, char** argv) while ((*dest++ = *src++) != 0); } - return create_task(id, user_entry, load_args); + return create_task(id, user_entry, load_args, NORMAL_PRIO); } /** @brief Used by the execve-Systemcall */ @@ -791,54 +844,182 @@ tid_t wait(int32_t* result) */ int wakeup_task(tid_t id) { + task_t* task; + uint32_t core_id, prio; + uint32_t flags; int ret = -EINVAL; - spinlock_irqsave_lock(&table_lock); + flags = irq_nested_disable(); + + core_id = CORE_ID; + task = task_table + id; + prio = task->prio; if (task_table[id].status == TASK_BLOCKED) { task_table[id].status = TASK_READY; ret = 0; + + spinlock_lock(&runqueues[core_id].lock); + // add task to the runqueue + if (!runqueues[core_id].queue[prio].last) { + runqueues[core_id].queue[prio].last = runqueues[core_id].queue[prio].first = task; + task->next = task->prev = NULL; + runqueues[core_id].prio_bitmap |= (1 << prio); + } else { + task->prev = runqueues[core_id].queue[prio].last; + task->next = NULL; + runqueues[core_id].queue[prio].last->next = task; + runqueues[core_id].queue[prio].last = task; + } + spinlock_unlock(&runqueues[core_id].lock); } - spinlock_irqsave_unlock(&table_lock); + irq_nested_enable(flags); return ret; } -/* - * we use this struct to guarantee that the id - * has its own cache line - */ -typedef struct { - uint32_t id __attribute__ ((aligned (CACHE_LINE))); - uint8_t gap[CACHE_LINE-sizeof(uint32_t)]; -} last_id_t; - -/** @brief _The_ scheduler procedure +/** @brief Block current task * - * Manages scheduling - right now this is just a round robin scheduler. + * The current task's status will be changed to TASK_BLOCKED + * + * @return + * - 0 on success + * - -EINVAL (-22) on failure */ -void scheduler(void) +int block_current_task(void) +{ + task_t* curr_task; + tid_t id; + uint32_t core_id, prio; + uint32_t flags; + int ret = -EINVAL; + + flags = irq_nested_disable(); + + curr_task = per_core(current_task); + id = curr_task->id; + prio = curr_task->prio; + core_id = CORE_ID; + + if (task_table[id].status == TASK_RUNNING) { + task_table[id].status = TASK_BLOCKED; + ret = 0; + + spinlock_lock(&runqueues[core_id].lock); + + // remove task from queue + if (task_table[id].prev) + task_table[id].prev->next = task_table[id].next; + if (task_table[id].next) + task_table[id].next->prev = task_table[id].prev; + if (runqueues[core_id].queue[prio].first == task_table+id) + runqueues[core_id].queue[prio].first = task_table[id].next; + if (runqueues[core_id].queue[prio].last == task_table+id) { + runqueues[core_id].queue[prio].last = task_table[id].prev; + if (!runqueues[core_id].queue[prio].last) + runqueues[core_id].queue[prio].last = runqueues[core_id].queue[prio].first; + } + + // No valid task in queue => update prio_bitmap + if (!runqueues[core_id].queue[prio].first) + runqueues[core_id].prio_bitmap &= ~(1 << prio); + + spinlock_unlock(&runqueues[core_id].lock); + } + + irq_nested_enable(flags); + + return ret; +} + +int set_timer(uint64_t deadline) +{ + task_t* curr_task; + task_t* tmp; + uint32_t core_id, prio; + uint32_t flags; + int ret = -EINVAL; + + flags = irq_nested_disable(); + + curr_task = per_core(current_task); + prio = curr_task->prio; + core_id = CORE_ID; + + if (curr_task->status == TASK_RUNNING) { + curr_task->status = TASK_BLOCKED; + curr_task->timeout = deadline; + ret = 0; + + spinlock_lock(&runqueues[core_id].lock); + + // remove task from queue + if (curr_task->prev) + curr_task->prev->next = curr_task->next; + if (curr_task->next) + curr_task->next->prev = curr_task->prev; + if (runqueues[core_id].queue[prio].first == curr_task) + runqueues[core_id].queue[prio].first = curr_task->next; + if (runqueues[core_id].queue[prio].last == curr_task) { + runqueues[core_id].queue[prio].last = curr_task->prev; + if (!runqueues[core_id].queue[prio].last) + runqueues[core_id].queue[prio].last = runqueues[core_id].queue[prio].first; + } + + // No valid task in queue => update prio_bitmap + if (!runqueues[core_id].queue[prio].first) + runqueues[core_id].prio_bitmap &= ~(1 << prio); + + // add task to the timer queue + tmp = runqueues[core_id].timers.first; + if (!tmp) { + runqueues[core_id].timers.first = runqueues[core_id].timers.last = curr_task; + curr_task->prev = curr_task->next = NULL; + } else { + while(tmp && (deadline >= tmp->timeout)) + tmp = tmp->next; + + if (!tmp) { + curr_task->next = NULL; + curr_task->prev = runqueues[core_id].timers.last; + if (runqueues[core_id].timers.last) + runqueues[core_id].timers.last->next = curr_task; + runqueues[core_id].timers.last = curr_task; + if (!runqueues[core_id].timers.first) + runqueues[core_id].timers.first = curr_task; + } else { + curr_task->prev = tmp->prev; + curr_task->next = tmp; + tmp->prev = curr_task; + if (curr_task->prev) + curr_task->prev->next = curr_task; + if (runqueues[core_id].timers.first == tmp) + runqueues[core_id].timers.first = curr_task; + } + } + + spinlock_unlock(&runqueues[core_id].lock); + } else kprintf("Task is already blocked. No timer will be set!\n"); + + irq_nested_enable(flags); + + return ret; +} + +void scheduler(void) { task_t* orig_task; task_t* curr_task; - uint32_t i; - uint32_t new_id; + uint32_t core_id = CORE_ID; + uint32_t prio; uint64_t current_tick; - static last_id_t last_id = { 0 }; -#if MAX_CORES > 1 - spinlock_irqsave_lock(&table_lock); -#endif - current_tick = get_clock_tick(); orig_task = curr_task = per_core(current_task); - /* increase the number of used time slices */ - curr_task->time_slices++; - /* signalizes that this task could be reused */ if (curr_task->status == TASK_FINISHED) - curr_task->status = TASK_INVALID; + curr_task->status = TASK_INVALID; /* if the task is using the FPU, we need to save the FPU context */ if (curr_task->flags & TASK_FPU_USED) { @@ -846,64 +1027,73 @@ void scheduler(void) curr_task->flags &= ~TASK_FPU_USED; } - for(i=0, new_id=(last_id.id + 1) % MAX_TASKS; - itimeout <= current_tick) { - if (task_table[new_id].flags & TASK_TIMER_USED) { - if (task_table[new_id].status != TASK_BLOCKED) - task_table[new_id].flags &= ~TASK_TIMER_USED; - if ((task_table[new_id].status == TASK_BLOCKED) && (current_tick >= task_table[new_id].timeout)) { - task_table[new_id].flags &= ~TASK_TIMER_USED; - task_table[new_id].status = TASK_READY; - } - } + task_t* task = runqueues[core_id].timers.first; - if ((task_table[new_id].status == TASK_READY) && !(task_table[new_id].flags & TASK_SWITCH_IN_PROGRESS)) { - if (curr_task->status == TASK_RUNNING) { - curr_task->status = TASK_READY; -#if MAX_CORES > 1 - curr_task->flags |= TASK_SWITCH_IN_PROGRESS; - per_core(old_task) = curr_task; -#endif - } -#if MAX_CORES > 1 - else per_core(old_task) = NULL; -#endif - task_table[new_id].status = TASK_RUNNING; - curr_task = per_core(current_task) = task_table+new_id; - last_id.id = new_id; + // remove timer from queue + runqueues[core_id].timers.first = runqueues[core_id].timers.first->next; + if (!runqueues[core_id].timers.first) + runqueues[core_id].timers.last = NULL; - goto get_task_out; + // wakeup task + if (task->status == TASK_BLOCKED) { + task->status = TASK_READY; + prio = task->prio; + + // add task to the runqueue + if (!runqueues[core_id].queue[prio].first) { + runqueues[core_id].queue[prio].last = runqueues[core_id].queue[prio].first = task; + task->next = task->prev = NULL; + runqueues[core_id].prio_bitmap |= (1 << prio); + } else { + task->prev = runqueues[core_id].queue[prio].last; + task->next = NULL; + runqueues[core_id].queue[prio].last->next = task; + runqueues[core_id].queue[prio].last = task; + } } } -#if MAX_CORES > 1 - per_core(old_task) = NULL; -#endif + runqueues[core_id].old_task = NULL; // reset old task + prio = last_set(runqueues[core_id].prio_bitmap); // determines highest priority - if ((curr_task->status == TASK_RUNNING) || (curr_task->status == TASK_IDLE)) - goto get_task_out; + if (!prio) { + if ((curr_task->status == TASK_RUNNING) || (curr_task->status == TASK_IDLE)) + goto get_task_out; + curr_task = per_core(current_task) = runqueues[core_id].idle; + } else { + // Does the current task have an higher priority? => no task switch + if ((curr_task->prio > prio) && (curr_task->status == TASK_RUNNING)) + goto get_task_out; - /* - * we switch to the idle task, if the current task terminates - * and no other is ready - */ - new_id = CORE_ID; - curr_task = per_core(current_task) = task_table+CORE_ID; + if (curr_task->status == TASK_RUNNING) { + curr_task->status = TASK_READY; + runqueues[core_id].old_task = curr_task; + } + + curr_task = per_core(current_task) = runqueues[core_id].queue[prio].first; + curr_task->status = TASK_RUNNING; + + // remove new task from queue + runqueues[core_id].queue[prio].first = curr_task->next; + if (!curr_task->next) { + runqueues[core_id].queue[prio].last = NULL; + runqueues[core_id].prio_bitmap &= ~(1 << prio); + } + } get_task_out: -#if MAX_CORES > 1 - spinlock_irqsave_unlock(&table_lock); -#endif + spinlock_unlock(&runqueues[core_id].lock); if (curr_task != orig_task) { - //kprintf("schedule from %d to %d on core %d\n", orig_task->id, curr_task->id, smp_id()); - switch_task(new_id); -#if MAX_CORES > 1 - orig_task= per_core(old_task); - if (orig_task) - orig_task->flags &= ~TASK_SWITCH_IN_PROGRESS; -#endif + kprintf("schedule from %u to %u with prio %u on core %u\n", + orig_task->id, curr_task->id, (uint32_t)curr_task->prio, CORE_ID); + switch_task(curr_task->id); } } diff --git a/kernel/tests.c b/kernel/tests.c index 5b67a06d..d04cf938 100644 --- a/kernel/tests.c +++ b/kernel/tests.c @@ -118,7 +118,7 @@ static int join_test(void* arg) tid_t id, ret; int result = -1234; - create_kernel_task(&id, foo, "Hello from foo2"); + create_kernel_task(&id, foo, "Hello from foo2", HIGH_PRIO); kprintf("Wait for child %u\n", id); do { @@ -273,16 +273,16 @@ int test_init(void) // create_kernel_task(NULL,client_task,NULL); #endif - create_kernel_task(NULL, foo, "Hello from foo1"); - create_kernel_task(NULL, join_test, NULL); - //create_kernel_task(NULL, producer, NULL); - //create_kernel_task(NULL, consumer, NULL); - //create_kernel_task(NULL, mail_ping, NULL); + create_kernel_task(NULL, foo, "Hello from foo1", NORMAL_PRIO); + create_kernel_task(NULL, join_test, NULL, NORMAL_PRIO); + //create_kernel_task(NULL, producer, , NORMAL_PRIO); + //create_kernel_task(NULL, consumer, NULL, NORMAL_PRIO); + //create_kernel_task(NULL, mail_ping, NULL, NORMAL_PRIO); //create_user_task(NULL, "/bin/hello", argv); create_user_task(NULL, "/bin/tests", argv); //create_user_task(NULL, "/bin/jacobi", argv); //create_user_task(NULL, "/bin/jacobi", argv); - create_user_task(NULL, "/bin/server", server_argv); + //create_user_task(NULL, "/bin/server", server_argv); //sleep(5); //create_user_task(NULL, "/bin/client", client_argv); diff --git a/lwip/src/arch/sys_arch.c b/lwip/src/arch/sys_arch.c index b46f0fd7..781e3bdd 100644 --- a/lwip/src/arch/sys_arch.c +++ b/lwip/src/arch/sys_arch.c @@ -85,7 +85,7 @@ sys_thread_t sys_thread_new(const char *name, lwip_thread_fn thread, void *arg, { sys_thread_t tmp; - create_kernel_task(&tmp, thread, arg); + create_kernel_task(&tmp, thread, arg, prio); kprintf("Created LWIP task %s with id %u\n", name, tmp); return tmp; diff --git a/lwip/src/include/lwipopts.h b/lwip/src/include/lwipopts.h index fb9ebf6e..4ecdc944 100644 --- a/lwip/src/include/lwipopts.h +++ b/lwip/src/include/lwipopts.h @@ -104,6 +104,20 @@ */ #define IP_FORWARD 1 +/** + * TCPIP_THREAD_PRIO: The priority assigned to the main tcpip thread. + * The priority value itself is platform-dependent, but is passed to + * sys_thread_new() when the thread is created. + */ +#define TCPIP_THREAD_PRIO HIGH_PRIO + +/** + * DEFAULT_THREAD_PRIO: The priority assigned to any other lwIP thread. + * The priority value itself is platform-dependent, but is passed to + * sys_thread_new() when the thread is created. + */ +#define DEFAULT_THREAD_PRIO NORMAL_PRIO + /* DEBUG options */ #define LWIP_DEBUG 1 #define DHCP_DEBUG LWIP_DBG_OFF From 296e8e98f4437cfd4c0665fc39c13344b92d48c0 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Wed, 17 Aug 2011 14:49:12 +0200 Subject: [PATCH 2/7] the idle priority doesn't longer possess an own runqueue only the idle task uses this priority class --- include/metalsvm/tasks_types.h | 2 +- kernel/tasks.c | 99 ++++++++++++++++++---------------- 2 files changed, 54 insertions(+), 47 deletions(-) diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h index 214df8e7..784167a7 100644 --- a/include/metalsvm/tasks_types.h +++ b/include/metalsvm/tasks_types.h @@ -40,7 +40,7 @@ extern "C" { #endif -#define MAX_PRIO 32 +#define MAX_PRIO 31 #define REALTIME_PRIO 31 #define HIGH_PRIO 16 #define NORMAL_PRIO 8 diff --git a/kernel/tasks.c b/kernel/tasks.c index cf16ef40..115f5be9 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -129,13 +129,13 @@ static void finish_task_switch(void) spinlock_lock(&runqueues[core_id].lock); if ((old = runqueues[core_id].old_task) != NULL) { prio = old->prio; - if (!runqueues[core_id].queue[prio].first) { + if (!runqueues[core_id].queue[prio-1].first) { old->prev = NULL; - runqueues[core_id].queue[prio].first = runqueues[core_id].queue[prio].last = old; + runqueues[core_id].queue[prio-1].first = runqueues[core_id].queue[prio-1].last = old; } else { - old->prev = runqueues[core_id].queue[prio].last; - runqueues[core_id].queue[prio].last->next = old; - runqueues[core_id].queue[prio].last = old; + old->prev = runqueues[core_id].queue[prio-1].last; + runqueues[core_id].queue[prio-1].last->next = old; + runqueues[core_id].queue[prio-1].last = old; } runqueues[core_id].old_task = NULL; runqueues[core_id].prio_bitmap |= (1 << prio); @@ -242,7 +242,7 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t return -EINVAL; if (BUILTIN_EXPECT(prio == IDLE_PRIO, 0)) return -EINVAL; - if (BUILTIN_EXPECT(prio >= MAX_PRIO, 0)) + if (BUILTIN_EXPECT(prio > MAX_PRIO, 0)) return -EINVAL; spinlock_irqsave_lock(&table_lock); @@ -282,15 +282,22 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t // add task in the runqueue spinlock_lock(&runqueues[core_id].lock); runqueues[core_id].prio_bitmap |= (1 << prio); - if (!runqueues[core_id].queue[prio].first) { + kprintf("prio %d %p\n", prio, runqueues[core_id].queue[prio-1].first); + if (!runqueues[core_id].queue[prio-1].first) { task_table[i].prev = NULL; - runqueues[core_id].queue[prio].first = task_table+i; - runqueues[core_id].queue[prio].last = task_table+i; + kputs("A"); + runqueues[core_id].queue[prio-1].first = task_table+i; + kputs("B"); + runqueues[core_id].queue[prio-1].last = task_table+i; + kputs("C"); task_table[i].next = NULL; } else { - task_table[i].prev = runqueues[core_id].queue[prio].last; - runqueues[core_id].queue[prio].last->next = task_table+i; - runqueues[core_id].queue[prio].last = task_table+i; + kputs("D"); + task_table[i].prev = runqueues[core_id].queue[prio-1].last; + kputs("E"); + runqueues[core_id].queue[prio-1].last->next = task_table+i; + kputs("F"); + runqueues[core_id].queue[prio-1].last = task_table+i; task_table[i].next = NULL; } spinlock_unlock(&runqueues[core_id].lock); @@ -364,15 +371,15 @@ int sys_fork(void) // add task in the runqueue spinlock_lock(&runqueues[core_id].lock); runqueues[core_id].prio_bitmap |= (1 << parent_task->prio); - if (!runqueues[core_id].queue[parent_task->prio].first) { + if (!runqueues[core_id].queue[parent_task->prio-1].first) { task_table[i].prev = NULL; - runqueues[core_id].queue[parent_task->prio].first = task_table+i; - runqueues[core_id].queue[parent_task->prio].last = task_table+i; + runqueues[core_id].queue[parent_task->prio-1].first = task_table+i; + runqueues[core_id].queue[parent_task->prio-1].last = task_table+i; task_table[i].next = NULL; } else { - task_table[i].prev = runqueues[core_id].queue[parent_task->prio].last; - runqueues[core_id].queue[parent_task->prio].last->next = task_table+i; - runqueues[core_id].queue[parent_task->prio].last = task_table+i; + task_table[i].prev = runqueues[core_id].queue[parent_task->prio-1].last; + runqueues[core_id].queue[parent_task->prio-1].last->next = task_table+i; + runqueues[core_id].queue[parent_task->prio-1].last = task_table+i; task_table[i].next = NULL; } spinlock_unlock(&runqueues[core_id].lock); @@ -442,7 +449,7 @@ int create_kernel_task(tid_t* id, entry_point_t ep, void* args, uint8_t prio) kernel_args->func = ep; kernel_args->args = args; - if (prio >= MAX_PRIO) + if (prio > MAX_PRIO) prio = NORMAL_PRIO; return create_task(id, kernel_entry, kernel_args, prio); @@ -861,15 +868,15 @@ int wakeup_task(tid_t id) spinlock_lock(&runqueues[core_id].lock); // add task to the runqueue - if (!runqueues[core_id].queue[prio].last) { - runqueues[core_id].queue[prio].last = runqueues[core_id].queue[prio].first = task; + if (!runqueues[core_id].queue[prio-1].last) { + runqueues[core_id].queue[prio-1].last = runqueues[core_id].queue[prio-1].first = task; task->next = task->prev = NULL; runqueues[core_id].prio_bitmap |= (1 << prio); } else { - task->prev = runqueues[core_id].queue[prio].last; + task->prev = runqueues[core_id].queue[prio-1].last; task->next = NULL; - runqueues[core_id].queue[prio].last->next = task; - runqueues[core_id].queue[prio].last = task; + runqueues[core_id].queue[prio-1].last->next = task; + runqueues[core_id].queue[prio-1].last = task; } spinlock_unlock(&runqueues[core_id].lock); } @@ -913,16 +920,16 @@ int block_current_task(void) task_table[id].prev->next = task_table[id].next; if (task_table[id].next) task_table[id].next->prev = task_table[id].prev; - if (runqueues[core_id].queue[prio].first == task_table+id) - runqueues[core_id].queue[prio].first = task_table[id].next; - if (runqueues[core_id].queue[prio].last == task_table+id) { - runqueues[core_id].queue[prio].last = task_table[id].prev; - if (!runqueues[core_id].queue[prio].last) - runqueues[core_id].queue[prio].last = runqueues[core_id].queue[prio].first; + if (runqueues[core_id].queue[prio-1].first == task_table+id) + runqueues[core_id].queue[prio-1].first = task_table[id].next; + if (runqueues[core_id].queue[prio-1].last == task_table+id) { + runqueues[core_id].queue[prio-1].last = task_table[id].prev; + if (!runqueues[core_id].queue[prio-1].last) + runqueues[core_id].queue[prio-1].last = runqueues[core_id].queue[prio-1].first; } // No valid task in queue => update prio_bitmap - if (!runqueues[core_id].queue[prio].first) + if (!runqueues[core_id].queue[prio-1].first) runqueues[core_id].prio_bitmap &= ~(1 << prio); spinlock_unlock(&runqueues[core_id].lock); @@ -959,16 +966,16 @@ int set_timer(uint64_t deadline) curr_task->prev->next = curr_task->next; if (curr_task->next) curr_task->next->prev = curr_task->prev; - if (runqueues[core_id].queue[prio].first == curr_task) - runqueues[core_id].queue[prio].first = curr_task->next; - if (runqueues[core_id].queue[prio].last == curr_task) { - runqueues[core_id].queue[prio].last = curr_task->prev; - if (!runqueues[core_id].queue[prio].last) - runqueues[core_id].queue[prio].last = runqueues[core_id].queue[prio].first; + if (runqueues[core_id].queue[prio-1].first == curr_task) + runqueues[core_id].queue[prio-1].first = curr_task->next; + if (runqueues[core_id].queue[prio-1].last == curr_task) { + runqueues[core_id].queue[prio-1].last = curr_task->prev; + if (!runqueues[core_id].queue[prio-1].last) + runqueues[core_id].queue[prio-1].last = runqueues[core_id].queue[prio-1].first; } // No valid task in queue => update prio_bitmap - if (!runqueues[core_id].queue[prio].first) + if (!runqueues[core_id].queue[prio-1].first) runqueues[core_id].prio_bitmap &= ~(1 << prio); // add task to the timer queue @@ -1046,15 +1053,15 @@ void scheduler(void) prio = task->prio; // add task to the runqueue - if (!runqueues[core_id].queue[prio].first) { - runqueues[core_id].queue[prio].last = runqueues[core_id].queue[prio].first = task; + if (!runqueues[core_id].queue[prio-1].first) { + runqueues[core_id].queue[prio-1].last = runqueues[core_id].queue[prio-1].first = task; task->next = task->prev = NULL; runqueues[core_id].prio_bitmap |= (1 << prio); } else { - task->prev = runqueues[core_id].queue[prio].last; + task->prev = runqueues[core_id].queue[prio-1].last; task->next = NULL; - runqueues[core_id].queue[prio].last->next = task; - runqueues[core_id].queue[prio].last = task; + runqueues[core_id].queue[prio-1].last->next = task; + runqueues[core_id].queue[prio-1].last = task; } } } @@ -1076,13 +1083,13 @@ void scheduler(void) runqueues[core_id].old_task = curr_task; } - curr_task = per_core(current_task) = runqueues[core_id].queue[prio].first; + curr_task = per_core(current_task) = runqueues[core_id].queue[prio-1].first; curr_task->status = TASK_RUNNING; // remove new task from queue - runqueues[core_id].queue[prio].first = curr_task->next; + runqueues[core_id].queue[prio-1].first = curr_task->next; if (!curr_task->next) { - runqueues[core_id].queue[prio].last = NULL; + runqueues[core_id].queue[prio-1].last = NULL; runqueues[core_id].prio_bitmap &= ~(1 << prio); } } From 6c1553ce0ec1ecebf063781f79ac9942543e1455 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Wed, 17 Aug 2011 14:58:51 +0200 Subject: [PATCH 3/7] fix conflict --- include/metalsvm/tasks_types.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h index 926b66cd..784167a7 100644 --- a/include/metalsvm/tasks_types.h +++ b/include/metalsvm/tasks_types.h @@ -93,11 +93,7 @@ typedef struct task { /// Start address of the heap size_t start_heap; /// End address of the heap -<<<<<<< HEAD size_t end_heap; -======= - uint32_t end_heap; ->>>>>>> master /// LwIP error code int lwip_err; /// Mail inbox From 93257508ee71ee59e42c1e5f6ac0fe9388bf1dc4 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Wed, 17 Aug 2011 15:09:59 +0200 Subject: [PATCH 4/7] wakeup a blocked task on the core, which the task used during the last time slice --- include/metalsvm/tasks_types.h | 2 ++ kernel/tasks.c | 17 +++++++---------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h index 784167a7..3981f15c 100644 --- a/include/metalsvm/tasks_types.h +++ b/include/metalsvm/tasks_types.h @@ -78,6 +78,8 @@ typedef struct task { struct task* next; /// previous task in the queue struct task* prev; + /// last core id on which the task was running + uint32_t last_core; /// Usage in number of pages atomic_int32_t user_usage; /// Avoids concurrent access to the page directory diff --git a/kernel/tasks.c b/kernel/tasks.c index 115f5be9..c6874153 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -47,8 +47,8 @@ * A task's id will be its position in this array. */ static task_t task_table[MAX_TASKS] = { \ - [0] = {0, TASK_IDLE, 0, 0, 0, NULL, NULL, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}, \ - [1 ... MAX_TASKS-1] = {0, TASK_INVALID, 0, 0, 0, NULL, NULL, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}}; + [0] = {0, TASK_IDLE, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}, \ + [1 ... MAX_TASKS-1] = {0, TASK_INVALID, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}}; static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT; static runqueue_t runqueues[MAX_CORES] = { \ [0] = {task_table+0, NULL, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}, \ @@ -107,6 +107,7 @@ size_t get_idle_task(uint32_t id) task_table[id].status = TASK_IDLE; task_table[id].prio = IDLE_PRIO; task_table[id].flags = TASK_DEFAULT_FLAGS; + task_table[id].last_core = id; atomic_int32_set(&task_table[id].user_usage, 0); mailbox_wait_msg_init(&task_table[id].inbox); memset(task_table[id].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS); @@ -263,6 +264,7 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t task_table[i].status = TASK_READY; task_table[i].flags = TASK_DEFAULT_FLAGS; task_table[i].prio = prio; + task_table[i].last_core = 0; spinlock_init(&task_table[i].vma_lock); task_table[i].vma_list = NULL; mailbox_wait_msg_init(&task_table[i].inbox); @@ -282,21 +284,14 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t // add task in the runqueue spinlock_lock(&runqueues[core_id].lock); runqueues[core_id].prio_bitmap |= (1 << prio); - kprintf("prio %d %p\n", prio, runqueues[core_id].queue[prio-1].first); if (!runqueues[core_id].queue[prio-1].first) { task_table[i].prev = NULL; - kputs("A"); runqueues[core_id].queue[prio-1].first = task_table+i; - kputs("B"); runqueues[core_id].queue[prio-1].last = task_table+i; - kputs("C"); task_table[i].next = NULL; } else { - kputs("D"); task_table[i].prev = runqueues[core_id].queue[prio-1].last; - kputs("E"); runqueues[core_id].queue[prio-1].last->next = task_table+i; - kputs("F"); runqueues[core_id].queue[prio-1].last = task_table+i; task_table[i].next = NULL; } @@ -367,6 +362,7 @@ int sys_fork(void) task_table[i].end_heap = 0; task_table[i].lwip_err = 0; task_table[i].prio = parent_task->prio; + task_table[i].last_core = parent_task->last_core; // add task in the runqueue spinlock_lock(&runqueues[core_id].lock); @@ -858,9 +854,9 @@ int wakeup_task(tid_t id) flags = irq_nested_disable(); - core_id = CORE_ID; task = task_table + id; prio = task->prio; + core_id = task->last_core; if (task_table[id].status == TASK_BLOCKED) { task_table[id].status = TASK_READY; @@ -1023,6 +1019,7 @@ void scheduler(void) uint64_t current_tick; orig_task = curr_task = per_core(current_task); + curr_task->last_core = core_id; /* signalizes that this task could be reused */ if (curr_task->status == TASK_FINISHED) From 0ba7e146abf895978916ab4ce6a0e46fd54790fc Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Thu, 18 Aug 2011 12:15:05 +0200 Subject: [PATCH 5/7] fix bug: use AT&T instead of Intel style --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 182db412..fb2f93bb 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -283,7 +283,7 @@ static inline uint32_t last_set(uint32_t i) if (!i) return 0; - asm volatile ("bsr %0, %1" : "=r"(ret) : "r"(i)); + asm volatile ("bsr %1, %0" : "=r"(ret) : "r"(i) : "flags"); return ret; } From 35621d72d131f8cefb04c84bc0f2c663835cf562 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Thu, 18 Aug 2011 12:16:31 +0200 Subject: [PATCH 6/7] first try to realize task stealing --- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/timer.c | 13 ++- arch/x86/mm/page.c | 1 + include/metalsvm/tasks.h | 16 +++ include/metalsvm/tasks_types.h | 8 ++ kernel/tasks.c | 184 ++++++++++++++++++++++++++++----- kernel/tests.c | 2 +- 7 files changed, 197 insertions(+), 29 deletions(-) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 379a181b..da4c65f4 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -60,7 +60,7 @@ static uint32_t ncores = 1; static uint8_t irq_redirect[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF}; #if MAX_CORES > 1 static uint8_t boot_code[] = { 0xFA, 0x0F, 0x01, 0x16, 0x3B, 0x70, 0x0F, 0x20, 0xC0, 0x0C, 0x01, 0x0F, 0x22, 0xC0, 0x66, 0xEA, 0x16, 0x70, 0x00, 0x00, 0x08, 0x00, 0x31, 0xC0, 0x66, 0xB8, 0x10, 0x00, 0x8E, 0xD8, 0x8E, 0xC0, 0x8E, 0xE0, 0x8E, 0xE8, 0x8E, 0xD0, 0xBC, 0xEF, 0xBE, 0xAD, 0xDE, 0x68, 0xAD, 0xDE, 0xAD, 0xDE, 0x6A, 0x00, 0xEA, 0xDE, 0xC0, 0xAD, 0xDE, 0x08, 0x00, 0xEB, 0xFE, 0x17, 0x00, 0x41, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x9A, 0xCF, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x92, 0xCF, 0x00}; -static atomic_int32_t cpu_online = ATOMIC_INIT(1); +atomic_int32_t cpu_online = ATOMIC_INIT(1); #endif static uint8_t initialized = 0; spinlock_t bootlock = SPINLOCK_INIT; diff --git a/arch/x86/kernel/timer.c b/arch/x86/kernel/timer.c index ffc7bf00..af70abfd 100644 --- a/arch/x86/kernel/timer.c +++ b/arch/x86/kernel/timer.c @@ -36,6 +36,10 @@ */ static volatile uint64_t timer_ticks = 0; +#if MAX_CORES > 1 +extern atomic_int32_t cpu_online; +#endif + uint64_t get_clock_tick(void) { return timer_ticks; @@ -61,8 +65,6 @@ int sys_times(struct tms* buffer, clock_t* clock) */ static void timer_handler(struct state *s) { - uint32_t i; - /* Increment our 'tick counter' */ #if MAX_CORES > 1 if (smp_id() == 0) @@ -78,6 +80,13 @@ static void timer_handler(struct state *s) vga_puts("One second has passed\n"); }*/ } + + update_load(); + +#if MAX_CORES > 1 + if ((atomic_int32_read(&cpu_online) > 1) && (timer_ticks % (TIMER_FREQ/5) == 0)) + load_balancing(); +#endif } int timer_wait(unsigned int ticks) diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c index 6eadd3d1..a99a576a 100644 --- a/arch/x86/mm/page.c +++ b/arch/x86/mm/page.c @@ -615,6 +615,7 @@ default_handler: kprintf("Register state: eax = 0x%x, ebx = 0x%x, ecx = 0x%x, edx = 0x%x, edi = 0x%x, esi = 0x%x, ebp = 0x%x, esp = 0x%x\n", s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp); + while(1); irq_enable(); abort(); } diff --git a/include/metalsvm/tasks.h b/include/metalsvm/tasks.h index 3be44077..be3fdd07 100644 --- a/include/metalsvm/tasks.h +++ b/include/metalsvm/tasks.h @@ -85,6 +85,22 @@ int create_user_task(tid_t* id, const char* fame, char** argv); */ tid_t wait(int32_t* result); +/** @brief Update the load of the current core + * + * This function is called from the timer interrupt + * and updates the load of the current core + */ +void update_load(void); + +#if MAX_CORES > 1 +/** @brief Load balancer + * + * This load balancer is called from the timer interrupt + * and steals tasks from other cores + */ +void load_balancing(void); +#endif + /** @brief Task switcher * * Timer-interrupted use of this function for task switching */ diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h index 3981f15c..fc1072de 100644 --- a/include/metalsvm/tasks_types.h +++ b/include/metalsvm/tasks_types.h @@ -116,6 +116,14 @@ typedef struct { task_t* idle __attribute__ ((aligned (CACHE_LINE))); /// previous task task_t* old_task; + /// total number of tasks in the queue + uint32_t nr_tasks; + // current load = average number of tasks in the queue (1-minute average) + uint32_t load; + // help counter to determine the the cpu load + int32_t load_counter; + // help counter to avoid "over balancing" + int32_t balance_counter; /// indicates the used priority queues uint32_t prio_bitmap; /// a queue for each priority diff --git a/kernel/tasks.c b/kernel/tasks.c index c6874153..9939651a 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -51,8 +51,8 @@ static task_t task_table[MAX_TASKS] = { \ [1 ... MAX_TASKS-1] = {0, TASK_INVALID, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}}; static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT; static runqueue_t runqueues[MAX_CORES] = { \ - [0] = {task_table+0, NULL, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}, \ - [1 ... MAX_CORES-1] = {NULL, NULL, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}}; + [0] = {task_table+0, NULL, 0, 0, 0, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}, \ + [1 ... MAX_CORES-1] = {NULL, NULL, 0, 0, 0, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}}; DEFINE_PER_CORE(task_t*, current_task, task_table+0); @@ -63,25 +63,6 @@ task_t* get_current_task(void) { return per_core(current_task); } -int dump_scheduling_statistics(void) -{ -#if 0 - uint32_t i; - uint32_t id = 0; - - kprintf("Scheduling statistics:\n"); - kprintf("======================\n"); - kprintf("total ticks:\t%llu\n", get_clock_tick()); - for(i=0; iid, arg); @@ -198,6 +180,15 @@ static void NORETURN do_exit(int arg) { kprintf("Memory leak! Task %d did not release %d pages\n", curr_task->id, atomic_int32_read(&curr_task->user_usage)); curr_task->status = TASK_FINISHED; + + // decrease the number of active tasks + flags = irq_nested_disable(); + core_id = CORE_ID; + spinlock_lock(&runqueues[core_id].lock); + runqueues[core_id].nr_tasks--; + spinlock_unlock(&runqueues[core_id].lock); + irq_nested_enable(flags); + reschedule(); kprintf("Kernel panic: scheduler on core %d found no valid task\n", CORE_ID); @@ -237,7 +228,7 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t { task_t* curr_task; int ret = -ENOMEM; - unsigned int i, core_id = CORE_ID; + unsigned int i, core_id; if (BUILTIN_EXPECT(!ep, 0)) return -EINVAL; @@ -248,6 +239,7 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t spinlock_irqsave_lock(&table_lock); + core_id = CORE_ID; curr_task = per_core(current_task); for(i=0; ivma_lock); spinlock_irqsave_lock(&table_lock); + core_id = CORE_ID; + for(i=0; iprio); + runqueues[core_id].nr_tasks++; if (!runqueues[core_id].queue[parent_task->prio-1].first) { task_table[i].prev = NULL; runqueues[core_id].queue[parent_task->prio-1].first = task_table+i; @@ -1010,6 +1006,133 @@ int set_timer(uint64_t deadline) return ret; } +#define FSHIFT 21 /* nr of bits of precision (e.g. 11) */ +#define FIXED_1 (1< 0) + runqueues[core_id].balance_counter--; + if (runqueues[core_id].load_counter < 0) { + runqueues[core_id].load_counter += 5*TIMER_FREQ; + + spinlock_lock(&runqueues[core_id].lock); + runqueues[core_id].load *= EXP; + runqueues[core_id].load += runqueues[core_id].nr_tasks*(FIXED_1-EXP); + runqueues[core_id].load >>= FSHIFT; + spinlock_unlock(&runqueues[core_id].lock); + + //kprintf("load of core %u: %u, %u\n", core_id, runqueues[core_id].load, runqueues[core_id].nr_tasks); + } +} + +#if MAX_CORES > 1 +extern atomic_int32_t cpu_online; + +void load_balancing(void) +{ + uint32_t i, core_id = CORE_ID; + uint32_t prio; + task_t* task; + + spinlock_lock(&runqueues[core_id].lock); + for(i=0; (i runqueues[core_id].load) { + kprintf("Try to steal a task from core %u (load %u) to %u (load %u)\n", i, runqueues[i].load, core_id, runqueues[core_id].load); + kprintf("Task on core %u: %u, core %u, %u\n", i, runqueues[i].nr_tasks, core_id, runqueues[i].nr_tasks); + + prio = last_set(runqueues[i].prio_bitmap); + if (prio) { + // steal a ready task + task = runqueues[i].queue[prio-1].last; + kprintf("Try to steal a ready task %d\n", task->id); + + // remove last element from queue i + if (task->prev) + task->prev->next = NULL; + runqueues[i].queue[prio-1].last = task->prev; + if (!runqueues[i].queue[prio-1].last) + runqueues[i].queue[prio-1].first = NULL; + + // add task at the end of queue core_id + if (!runqueues[core_id].queue[prio-1].last) { + runqueues[core_id].queue[prio-1].first = runqueues[core_id].queue[prio-1].last = task; + task->next = task->prev = NULL; + } else { + runqueues[core_id].queue[prio-1].last->next = task; + task->prev = runqueues[core_id].queue[prio-1].last; + runqueues[core_id].queue[prio-1].last = task; + task->next = NULL; + } + + // update task counters + runqueues[core_id].nr_tasks++; + runqueues[i].nr_tasks--; + runqueues[core_id].balance_counter = 5*TIMER_FREQ; + } else { + task_t* tmp; + + // steal a blocked task + task = runqueues[i].timers.first; + if (!task) // Ups, found no valid task to steal + goto no_task_found; + + kprintf("Try to steal blocked task %d\n", task->id); + + // remove first timer from queue i + if (runqueues[i].timers.first == runqueues[i].timers.last) + runqueues[i].timers.first = runqueues[i].timers.last = NULL; + else + runqueues[i].timers.first = runqueues[i].timers.first->next; + + // add timer to queue core_id + tmp = runqueues[core_id].timers.first; + while(tmp && (task->timeout >= tmp->timeout)) + tmp = tmp->next; + + if (!tmp) { + task->next = NULL; + task->prev = runqueues[core_id].timers.last; + if (runqueues[core_id].timers.last) + runqueues[core_id].timers.last->next = task; + runqueues[core_id].timers.last = task; + if (!runqueues[core_id].timers.first) + runqueues[core_id].timers.first = task; + } else { + task->prev = tmp->prev; + task->next = tmp; + tmp->prev = task; + if (task->prev) + task->prev->next = task; + if (runqueues[core_id].timers.first == tmp) + runqueues[core_id].timers.first = task; + } + + // => reschedule on the new core + task->last_core = CORE_ID; + + // update task counters + runqueues[core_id].nr_tasks++; + runqueues[i].nr_tasks--; + runqueues[core_id].balance_counter = 5*TIMER_FREQ; + } + } +no_task_found: + spinlock_unlock(&runqueues[i].lock); + } + spinlock_unlock(&runqueues[core_id].lock); +} +#endif + void scheduler(void) { task_t* orig_task; @@ -1065,6 +1188,17 @@ void scheduler(void) runqueues[core_id].old_task = NULL; // reset old task prio = last_set(runqueues[core_id].prio_bitmap); // determines highest priority +#if MAX_CORES > 1 + /*if (!prio) { + load_balancing(); + prio = last_set(runqueues[core_id].prio_bitmap); // retry... + }*/ +#endif + + if (BUILTIN_EXPECT(prio > MAX_PRIO, 0)) { + kprintf("Invalid priority %u by bitmap 0x%x\n", prio, runqueues[core_id].prio_bitmap); + prio = 0; + } if (!prio) { if ((curr_task->status == TASK_RUNNING) || (curr_task->status == TASK_IDLE)) @@ -1095,8 +1229,8 @@ get_task_out: spinlock_unlock(&runqueues[core_id].lock); if (curr_task != orig_task) { - kprintf("schedule from %u to %u with prio %u on core %u\n", - orig_task->id, curr_task->id, (uint32_t)curr_task->prio, CORE_ID); + //kprintf("schedule from %u to %u with prio %u on core %u\n", + // orig_task->id, curr_task->id, (uint32_t)curr_task->prio, CORE_ID); switch_task(curr_task->id); } } diff --git a/kernel/tests.c b/kernel/tests.c index 281329d5..8f8860a4 100644 --- a/kernel/tests.c +++ b/kernel/tests.c @@ -91,7 +91,7 @@ static int foo(void* arg) if (!arg) return 0; - for(i=0; i<5; i++) { + for(i=0; i<20; i++) { kprintf("Message from core %d: %s\n", smp_id(), (char*) arg); sleep(1); } From 1e1e77351a9c5e2b6a9ae6657d8964fe53f35cd5 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Wed, 24 Aug 2011 09:32:36 +0200 Subject: [PATCH 7/7] first approach to determine the cpu load --- kernel/tasks.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/kernel/tasks.c b/kernel/tasks.c index 9939651a..57fccdba 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -50,9 +50,14 @@ static task_t task_table[MAX_TASKS] = { \ [0] = {0, TASK_IDLE, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}, \ [1 ... MAX_TASKS-1] = {0, TASK_INVALID, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}}; static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT; +#if MAX_CORES > 1 static runqueue_t runqueues[MAX_CORES] = { \ [0] = {task_table+0, NULL, 0, 0, 0, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}, \ [1 ... MAX_CORES-1] = {NULL, NULL, 0, 0, 0, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}}; +#else +static runqueue_t runqueues[1] = { \ + [0] = {task_table+0, NULL, 0, 0, 0, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}}; +#endif DEFINE_PER_CORE(task_t*, current_task, task_table+0); @@ -859,6 +864,9 @@ int wakeup_task(tid_t id) ret = 0; spinlock_lock(&runqueues[core_id].lock); + // increase the number of ready tasks + runqueues[core_id].nr_tasks++; + // add task to the runqueue if (!runqueues[core_id].queue[prio-1].last) { runqueues[core_id].queue[prio-1].last = runqueues[core_id].queue[prio-1].first = task; @@ -906,6 +914,8 @@ int block_current_task(void) ret = 0; spinlock_lock(&runqueues[core_id].lock); + // reduce the number of ready tasks + runqueues[core_id].nr_tasks--; // remove task from queue if (task_table[id].prev) @@ -953,6 +963,9 @@ int set_timer(uint64_t deadline) spinlock_lock(&runqueues[core_id].lock); + // reduce the number of ready tasks + runqueues[core_id].nr_tasks--; + // remove task from queue if (curr_task->prev) curr_task->prev->next = curr_task->next; @@ -1035,6 +1048,7 @@ extern atomic_int32_t cpu_online; void load_balancing(void) { +#if 0 uint32_t i, core_id = CORE_ID; uint32_t prio; task_t* task; @@ -1130,6 +1144,7 @@ no_task_found: spinlock_unlock(&runqueues[i].lock); } spinlock_unlock(&runqueues[core_id].lock); +#endif } #endif @@ -1172,6 +1187,9 @@ void scheduler(void) task->status = TASK_READY; prio = task->prio; + // increase the number of ready tasks + runqueues[core_id].nr_tasks++; + // add task to the runqueue if (!runqueues[core_id].queue[prio-1].first) { runqueues[core_id].queue[prio-1].last = runqueues[core_id].queue[prio-1].first = task; @@ -1189,10 +1207,10 @@ void scheduler(void) runqueues[core_id].old_task = NULL; // reset old task prio = last_set(runqueues[core_id].prio_bitmap); // determines highest priority #if MAX_CORES > 1 - /*if (!prio) { + if (!prio) { load_balancing(); prio = last_set(runqueues[core_id].prio_bitmap); // retry... - }*/ + } #endif if (BUILTIN_EXPECT(prio > MAX_PRIO, 0)) {