diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index ad9e9f36..08f925c3 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -268,7 +268,7 @@ static inline void tlb_flush(void) static inline uint32_t read_eflags(void) { uint32_t result; - asm volatile ("pushf; pop $0" : "=r"(result)); + asm volatile ("pushf; pop %0" : "=r"(result)); return result; } diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index da4c65f4..ca860d4f 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -320,6 +320,8 @@ void smp_start(uint32_t id) // enable additional cpu features cpu_detection(); + kprintf("CR0 of core %u: 0x%x\n", apic_cpu_id(), read_cr0()); + smp_main(); // idle loop @@ -355,6 +357,8 @@ int smp_init(void) if (ncores <= 1) return -EINVAL; + kprintf("CR0 of core %u: 0x%x\n", apic_cpu_id(), read_cr0()); + for(i=1; (i 1 - if ((atomic_int32_read(&cpu_online) > 1) && (timer_ticks % (TIMER_FREQ/5) == 0)) + if (atomic_int32_read(&cpu_online) > 1) load_balancing(); #endif } diff --git a/include/metalsvm/tasks.h b/include/metalsvm/tasks.h index 3f4589cb..55056cbd 100644 --- a/include/metalsvm/tasks.h +++ b/include/metalsvm/tasks.h @@ -92,6 +92,11 @@ tid_t wait(int32_t* result); */ void update_load(void); +/** @brief Print the current cpu load + * + */ +void dump_load(void); + #if MAX_CORES > 1 /** @brief Load balancer * @@ -103,7 +108,8 @@ void load_balancing(void); /** @brief Task switcher * - * Timer-interrupted use of this function for task switching */ + * Timer-interrupted use of this function for task switching + */ void scheduler(void); /** @brief Wake up a blocked task diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h index 6813896e..3e0253cd 100644 --- a/include/metalsvm/tasks_types.h +++ b/include/metalsvm/tasks_types.h @@ -123,7 +123,7 @@ typedef struct { /// total number of tasks in the queue uint32_t nr_tasks; // current load = average number of tasks in the queue (1-minute average) - uint32_t load; + uint32_t load[3]; // help counter to determine the the cpu load int32_t load_counter; // help counter to avoid "over balancing" diff --git a/kernel/tasks.c b/kernel/tasks.c index 039d7ad1..db30571f 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -53,11 +53,11 @@ static task_t task_table[MAX_TASKS] = { \ static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT; #if MAX_CORES > 1 static runqueue_t runqueues[MAX_CORES] = { \ - [0] = {task_table+0, NULL, 0, 0, 0, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}, \ - [1 ... MAX_CORES-1] = {NULL, NULL, 0, 0, 0, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}}; + [0] = {task_table+0, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}, \ + [1 ... MAX_CORES-1] = {NULL, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}}; #else static runqueue_t runqueues[1] = { \ - [0] = {task_table+0, NULL, 0, 0, 0, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}}; + [0] = {task_table+0, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}}; #endif DEFINE_PER_CORE(task_t*, current_task, task_table+0); @@ -1079,63 +1079,96 @@ int set_timer(uint64_t deadline) return ret; } -#define FSHIFT 21 /* nr of bits of precision (e.g. 11) */ -#define FIXED_1 (1< 0) - runqueues[core_id].balance_counter--; - if (runqueues[core_id].load_counter < 0) { - runqueues[core_id].load_counter += 5*TIMER_FREQ; + + if (runqueues[core_id].load_counter <= 0) { + runqueues[core_id].load_counter += TIMER_FREQ/5; spinlock_irqsave_lock(&runqueues[core_id].lock); - runqueues[core_id].load *= EXP; - runqueues[core_id].load += runqueues[core_id].nr_tasks*(FIXED_1-EXP); - runqueues[core_id].load >>= FSHIFT; + runqueues[core_id].load[0] *= EXP_1; + runqueues[core_id].load[0] += (runqueues[core_id].nr_tasks *FIXED_1) * (FIXED_1 - EXP_1); + runqueues[core_id].load[0] >>= FSHIFT; + runqueues[core_id].load[1] *= EXP_5; + runqueues[core_id].load[1] += (runqueues[core_id].nr_tasks *FIXED_1) * (FIXED_1 - EXP_5); + runqueues[core_id].load[1] >>= FSHIFT; + runqueues[core_id].load[2] *= EXP_15; + runqueues[core_id].load[2] += (runqueues[core_id].nr_tasks *FIXED_1) * (FIXED_1 - EXP_15); + runqueues[core_id].load[2] >>= FSHIFT; spinlock_irqsave_unlock(&runqueues[core_id].lock); - //kprintf("load of core %u: %u, %u\n", core_id, runqueues[core_id].load, runqueues[core_id].nr_tasks); + //kprintf("load of core %u: %u, %u, %u, %u\n", core_id, runqueues[core_id].load[0], runqueues[core_id].load[1], runqueues[core_id].load[2], runqueues[core_id].nr_tasks); } } #if MAX_CORES > 1 extern atomic_int32_t cpu_online; +#endif +void dump_load(void) +{ + uint32_t i; +#if MAX_CORES > 1 + uint32_t ncores = atomic_int32_read(&cpu_online); +#else + uint32_t ncores = 1; +#endif + + for(i=0; i> FSHIFT, + ((runqueues[i].load[0] & ((1 << FSHIFT) - 1)) * 100) / (1 << FSHIFT), + runqueues[i].load[1] >> FSHIFT, + ((runqueues[i].load[1] & ((1 << FSHIFT) - 1)) * 100) / (1 << FSHIFT), + runqueues[i].load[2] >> FSHIFT, + ((runqueues[i].load[2] & ((1 << FSHIFT) - 1)) * 100) / (1 << FSHIFT)); + } +} + +#if MAX_CORES > 1 void load_balancing(void) { -#if 0 +#if 1 uint32_t i, core_id = CORE_ID; uint32_t prio; task_t* task; - spinlock_lock(&runqueues[core_id].lock); + spinlock_irqsave_lock(&runqueues[core_id].lock); for(i=0; (i runqueues[core_id].load) { - kprintf("Try to steal a task from core %u (load %u) to %u (load %u)\n", i, runqueues[i].load, core_id, runqueues[core_id].load); - kprintf("Task on core %u: %u, core %u, %u\n", i, runqueues[i].nr_tasks, core_id, runqueues[i].nr_tasks); + spinlock_irqsave_lock(&runqueues[i].lock); + if ((runqueues[i].load[0] >> (FSHIFT-1)) > (runqueues[core_id].load[0] >> (FSHIFT-1))) { + //kprintf("Try to steal a task from core %u (load %u) to %u (load %u)\n", i, runqueues[i].load[0], core_id, runqueues[core_id].load[0]); + //kprintf("Task on core %u: %u, core %u, %u\n", i, runqueues[i].nr_tasks, core_id, runqueues[i].nr_tasks); - prio = last_set(runqueues[i].prio_bitmap); - if (prio) { + prio = lsb(runqueues[i].prio_bitmap); + if (prio < sizeof(size_t)*8) { // steal a ready task task = runqueues[i].queue[prio-1].last; - kprintf("Try to steal a ready task %d\n", task->id); + kprintf("Core %u steals the task %d form %u with prio %u\n", core_id, task->id, i, prio); // remove last element from queue i if (task->prev) task->prev->next = NULL; - runqueues[i].queue[prio-1].last = task->prev; - if (!runqueues[i].queue[prio-1].last) - runqueues[i].queue[prio-1].first = NULL; + if (runqueues[i].queue[prio-1].first == task) { + runqueues[i].queue[prio-1].first = runqueues[i].queue[prio-1].last = NULL; + runqueues[i].prio_bitmap &= ~(1 << prio); + } else runqueues[i].queue[prio-1].last = task->prev; // add task at the end of queue core_id if (!runqueues[core_id].queue[prio-1].last) { @@ -1147,12 +1180,13 @@ void load_balancing(void) runqueues[core_id].queue[prio-1].last = task; task->next = NULL; } + runqueues[core_id].prio_bitmap |= (1 << prio); // update task counters runqueues[core_id].nr_tasks++; runqueues[i].nr_tasks--; - runqueues[core_id].balance_counter = 5*TIMER_FREQ; - } else { + runqueues[core_id].balance_counter = TIMER_FREQ/2; + } /*else { task_t* tmp; // steal a blocked task @@ -1160,7 +1194,7 @@ void load_balancing(void) if (!task) // Ups, found no valid task to steal goto no_task_found; - kprintf("Try to steal blocked task %d\n", task->id); + kprintf("Core %u steals the blocked task %d from %u with prio %u\n", core_id, task->id, i, task->prio); // remove first timer from queue i if (runqueues[i].timers.first == runqueues[i].timers.last) @@ -1195,15 +1229,17 @@ void load_balancing(void) task->last_core = CORE_ID; // update task counters - runqueues[core_id].nr_tasks++; - runqueues[i].nr_tasks--; - runqueues[core_id].balance_counter = 5*TIMER_FREQ; - } + runqueues[core_id].balance_counter = TIMER_FREQ/2; + }*/ } -no_task_found: - spinlock_unlock(&runqueues[i].lock); +//no_task_found: + spinlock_irqsave_unlock(&runqueues[i].lock); } - spinlock_unlock(&runqueues[core_id].lock); + + if (runqueues[core_id].balance_counter <= 0) + runqueues[core_id].balance_counter = TIMER_FREQ/2; + + spinlock_irqsave_unlock(&runqueues[core_id].lock); #endif } #endif @@ -1271,6 +1307,8 @@ void scheduler(void) prio = msb(runqueues[core_id].prio_bitmap); // determines highest priority #if MAX_CORES > 1 if (prio >= sizeof(size_t)*8) { + // push load balancing + runqueues[core_id].balance_counter -= TIMER_FREQ/20; load_balancing(); prio = msb(runqueues[core_id].prio_bitmap); // retry... } diff --git a/kernel/tests.c b/kernel/tests.c index e8390fa6..c01d806b 100644 --- a/kernel/tests.c +++ b/kernel/tests.c @@ -87,7 +87,7 @@ static int foo(void* arg) return 0; for(i=0; i<5; i++) { - kprintf("Message from core %d: %s\n", smp_id(), (char*) arg); + kprintf("%s\n", (char*) arg); sleep(1); } @@ -262,7 +262,7 @@ static int join_test(void* arg) tid_t id, ret; int result = -1234; - create_kernel_task(&id, foo, "Hello from foo2", HIGH_PRIO); + create_kernel_task(&id, foo, "Hello from foo2", HIGH_PRIO-1); kprintf("Wait for child %u\n", id); do { @@ -278,7 +278,7 @@ int test_init(void) { // char* argv[] = {"/bin/mshell", NULL}; char* argv[] = {"/bin/tests", NULL}; -// char* server_argv[] = {"/bin/server", "6789", NULL}; + char* server_argv[] = {"/bin/server", "6789", NULL}; // char* client_argv[] = {"/bin/client", "127.0.0.1", "6789", NULL}; sem_init(&producing, 1);