diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 379a181b..da4c65f4 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -60,7 +60,7 @@ static uint32_t ncores = 1; static uint8_t irq_redirect[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF}; #if MAX_CORES > 1 static uint8_t boot_code[] = { 0xFA, 0x0F, 0x01, 0x16, 0x3B, 0x70, 0x0F, 0x20, 0xC0, 0x0C, 0x01, 0x0F, 0x22, 0xC0, 0x66, 0xEA, 0x16, 0x70, 0x00, 0x00, 0x08, 0x00, 0x31, 0xC0, 0x66, 0xB8, 0x10, 0x00, 0x8E, 0xD8, 0x8E, 0xC0, 0x8E, 0xE0, 0x8E, 0xE8, 0x8E, 0xD0, 0xBC, 0xEF, 0xBE, 0xAD, 0xDE, 0x68, 0xAD, 0xDE, 0xAD, 0xDE, 0x6A, 0x00, 0xEA, 0xDE, 0xC0, 0xAD, 0xDE, 0x08, 0x00, 0xEB, 0xFE, 0x17, 0x00, 0x41, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x9A, 0xCF, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x92, 0xCF, 0x00}; -static atomic_int32_t cpu_online = ATOMIC_INIT(1); +atomic_int32_t cpu_online = ATOMIC_INIT(1); #endif static uint8_t initialized = 0; spinlock_t bootlock = SPINLOCK_INIT; diff --git a/arch/x86/kernel/timer.c b/arch/x86/kernel/timer.c index ffc7bf00..af70abfd 100644 --- a/arch/x86/kernel/timer.c +++ b/arch/x86/kernel/timer.c @@ -36,6 +36,10 @@ */ static volatile uint64_t timer_ticks = 0; +#if MAX_CORES > 1 +extern atomic_int32_t cpu_online; +#endif + uint64_t get_clock_tick(void) { return timer_ticks; @@ -61,8 +65,6 @@ int sys_times(struct tms* buffer, clock_t* clock) */ static void timer_handler(struct state *s) { - uint32_t i; - /* Increment our 'tick counter' */ #if MAX_CORES > 1 if (smp_id() == 0) @@ -78,6 +80,13 @@ static void timer_handler(struct state *s) vga_puts("One second has passed\n"); }*/ } + + update_load(); + +#if MAX_CORES > 1 + if ((atomic_int32_read(&cpu_online) > 1) && (timer_ticks % (TIMER_FREQ/5) == 0)) + load_balancing(); +#endif } int timer_wait(unsigned int ticks) diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c index 6eadd3d1..a99a576a 100644 --- a/arch/x86/mm/page.c +++ b/arch/x86/mm/page.c @@ -615,6 +615,7 @@ default_handler: kprintf("Register state: eax = 0x%x, ebx = 0x%x, ecx = 0x%x, edx = 0x%x, edi = 0x%x, esi = 0x%x, ebp = 0x%x, esp = 0x%x\n", s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp); + while(1); irq_enable(); abort(); } diff --git a/include/metalsvm/tasks.h b/include/metalsvm/tasks.h index 3be44077..be3fdd07 100644 --- a/include/metalsvm/tasks.h +++ b/include/metalsvm/tasks.h @@ -85,6 +85,22 @@ int create_user_task(tid_t* id, const char* fame, char** argv); */ tid_t wait(int32_t* result); +/** @brief Update the load of the current core + * + * This function is called from the timer interrupt + * and updates the load of the current core + */ +void update_load(void); + +#if MAX_CORES > 1 +/** @brief Load balancer + * + * This load balancer is called from the timer interrupt + * and steals tasks from other cores + */ +void load_balancing(void); +#endif + /** @brief Task switcher * * Timer-interrupted use of this function for task switching */ diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h index 3981f15c..fc1072de 100644 --- a/include/metalsvm/tasks_types.h +++ b/include/metalsvm/tasks_types.h @@ -116,6 +116,14 @@ typedef struct { task_t* idle __attribute__ ((aligned (CACHE_LINE))); /// previous task task_t* old_task; + /// total number of tasks in the queue + uint32_t nr_tasks; + // current load = average number of tasks in the queue (1-minute average) + uint32_t load; + // help counter to determine the the cpu load + int32_t load_counter; + // help counter to avoid "over balancing" + int32_t balance_counter; /// indicates the used priority queues uint32_t prio_bitmap; /// a queue for each priority diff --git a/kernel/tasks.c b/kernel/tasks.c index c6874153..9939651a 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -51,8 +51,8 @@ static task_t task_table[MAX_TASKS] = { \ [1 ... MAX_TASKS-1] = {0, TASK_INVALID, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}}; static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT; static runqueue_t runqueues[MAX_CORES] = { \ - [0] = {task_table+0, NULL, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}, \ - [1 ... MAX_CORES-1] = {NULL, NULL, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}}; + [0] = {task_table+0, NULL, 0, 0, 0, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}, \ + [1 ... MAX_CORES-1] = {NULL, NULL, 0, 0, 0, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}}; DEFINE_PER_CORE(task_t*, current_task, task_table+0); @@ -63,25 +63,6 @@ task_t* get_current_task(void) { return per_core(current_task); } -int dump_scheduling_statistics(void) -{ -#if 0 - uint32_t i; - uint32_t id = 0; - - kprintf("Scheduling statistics:\n"); - kprintf("======================\n"); - kprintf("total ticks:\t%llu\n", get_clock_tick()); - for(i=0; iid, arg); @@ -198,6 +180,15 @@ static void NORETURN do_exit(int arg) { kprintf("Memory leak! Task %d did not release %d pages\n", curr_task->id, atomic_int32_read(&curr_task->user_usage)); curr_task->status = TASK_FINISHED; + + // decrease the number of active tasks + flags = irq_nested_disable(); + core_id = CORE_ID; + spinlock_lock(&runqueues[core_id].lock); + runqueues[core_id].nr_tasks--; + spinlock_unlock(&runqueues[core_id].lock); + irq_nested_enable(flags); + reschedule(); kprintf("Kernel panic: scheduler on core %d found no valid task\n", CORE_ID); @@ -237,7 +228,7 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t { task_t* curr_task; int ret = -ENOMEM; - unsigned int i, core_id = CORE_ID; + unsigned int i, core_id; if (BUILTIN_EXPECT(!ep, 0)) return -EINVAL; @@ -248,6 +239,7 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t spinlock_irqsave_lock(&table_lock); + core_id = CORE_ID; curr_task = per_core(current_task); for(i=0; ivma_lock); spinlock_irqsave_lock(&table_lock); + core_id = CORE_ID; + for(i=0; iprio); + runqueues[core_id].nr_tasks++; if (!runqueues[core_id].queue[parent_task->prio-1].first) { task_table[i].prev = NULL; runqueues[core_id].queue[parent_task->prio-1].first = task_table+i; @@ -1010,6 +1006,133 @@ int set_timer(uint64_t deadline) return ret; } +#define FSHIFT 21 /* nr of bits of precision (e.g. 11) */ +#define FIXED_1 (1< 0) + runqueues[core_id].balance_counter--; + if (runqueues[core_id].load_counter < 0) { + runqueues[core_id].load_counter += 5*TIMER_FREQ; + + spinlock_lock(&runqueues[core_id].lock); + runqueues[core_id].load *= EXP; + runqueues[core_id].load += runqueues[core_id].nr_tasks*(FIXED_1-EXP); + runqueues[core_id].load >>= FSHIFT; + spinlock_unlock(&runqueues[core_id].lock); + + //kprintf("load of core %u: %u, %u\n", core_id, runqueues[core_id].load, runqueues[core_id].nr_tasks); + } +} + +#if MAX_CORES > 1 +extern atomic_int32_t cpu_online; + +void load_balancing(void) +{ + uint32_t i, core_id = CORE_ID; + uint32_t prio; + task_t* task; + + spinlock_lock(&runqueues[core_id].lock); + for(i=0; (i runqueues[core_id].load) { + kprintf("Try to steal a task from core %u (load %u) to %u (load %u)\n", i, runqueues[i].load, core_id, runqueues[core_id].load); + kprintf("Task on core %u: %u, core %u, %u\n", i, runqueues[i].nr_tasks, core_id, runqueues[i].nr_tasks); + + prio = last_set(runqueues[i].prio_bitmap); + if (prio) { + // steal a ready task + task = runqueues[i].queue[prio-1].last; + kprintf("Try to steal a ready task %d\n", task->id); + + // remove last element from queue i + if (task->prev) + task->prev->next = NULL; + runqueues[i].queue[prio-1].last = task->prev; + if (!runqueues[i].queue[prio-1].last) + runqueues[i].queue[prio-1].first = NULL; + + // add task at the end of queue core_id + if (!runqueues[core_id].queue[prio-1].last) { + runqueues[core_id].queue[prio-1].first = runqueues[core_id].queue[prio-1].last = task; + task->next = task->prev = NULL; + } else { + runqueues[core_id].queue[prio-1].last->next = task; + task->prev = runqueues[core_id].queue[prio-1].last; + runqueues[core_id].queue[prio-1].last = task; + task->next = NULL; + } + + // update task counters + runqueues[core_id].nr_tasks++; + runqueues[i].nr_tasks--; + runqueues[core_id].balance_counter = 5*TIMER_FREQ; + } else { + task_t* tmp; + + // steal a blocked task + task = runqueues[i].timers.first; + if (!task) // Ups, found no valid task to steal + goto no_task_found; + + kprintf("Try to steal blocked task %d\n", task->id); + + // remove first timer from queue i + if (runqueues[i].timers.first == runqueues[i].timers.last) + runqueues[i].timers.first = runqueues[i].timers.last = NULL; + else + runqueues[i].timers.first = runqueues[i].timers.first->next; + + // add timer to queue core_id + tmp = runqueues[core_id].timers.first; + while(tmp && (task->timeout >= tmp->timeout)) + tmp = tmp->next; + + if (!tmp) { + task->next = NULL; + task->prev = runqueues[core_id].timers.last; + if (runqueues[core_id].timers.last) + runqueues[core_id].timers.last->next = task; + runqueues[core_id].timers.last = task; + if (!runqueues[core_id].timers.first) + runqueues[core_id].timers.first = task; + } else { + task->prev = tmp->prev; + task->next = tmp; + tmp->prev = task; + if (task->prev) + task->prev->next = task; + if (runqueues[core_id].timers.first == tmp) + runqueues[core_id].timers.first = task; + } + + // => reschedule on the new core + task->last_core = CORE_ID; + + // update task counters + runqueues[core_id].nr_tasks++; + runqueues[i].nr_tasks--; + runqueues[core_id].balance_counter = 5*TIMER_FREQ; + } + } +no_task_found: + spinlock_unlock(&runqueues[i].lock); + } + spinlock_unlock(&runqueues[core_id].lock); +} +#endif + void scheduler(void) { task_t* orig_task; @@ -1065,6 +1188,17 @@ void scheduler(void) runqueues[core_id].old_task = NULL; // reset old task prio = last_set(runqueues[core_id].prio_bitmap); // determines highest priority +#if MAX_CORES > 1 + /*if (!prio) { + load_balancing(); + prio = last_set(runqueues[core_id].prio_bitmap); // retry... + }*/ +#endif + + if (BUILTIN_EXPECT(prio > MAX_PRIO, 0)) { + kprintf("Invalid priority %u by bitmap 0x%x\n", prio, runqueues[core_id].prio_bitmap); + prio = 0; + } if (!prio) { if ((curr_task->status == TASK_RUNNING) || (curr_task->status == TASK_IDLE)) @@ -1095,8 +1229,8 @@ get_task_out: spinlock_unlock(&runqueues[core_id].lock); if (curr_task != orig_task) { - kprintf("schedule from %u to %u with prio %u on core %u\n", - orig_task->id, curr_task->id, (uint32_t)curr_task->prio, CORE_ID); + //kprintf("schedule from %u to %u with prio %u on core %u\n", + // orig_task->id, curr_task->id, (uint32_t)curr_task->prio, CORE_ID); switch_task(curr_task->id); } } diff --git a/kernel/tests.c b/kernel/tests.c index 281329d5..8f8860a4 100644 --- a/kernel/tests.c +++ b/kernel/tests.c @@ -91,7 +91,7 @@ static int foo(void* arg) if (!arg) return 0; - for(i=0; i<5; i++) { + for(i=0; i<20; i++) { kprintf("Message from core %d: %s\n", smp_id(), (char*) arg); sleep(1); }