From 17106b7a7445c6be223cf414995075106dc3ed06 Mon Sep 17 00:00:00 2001 From: Jacek Galowicz Date: Mon, 9 Apr 2012 20:01:39 +0200 Subject: [PATCH 01/12] Added a context switch measurement scenario from the operating systems lecture --- apps/tests.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/apps/tests.c b/apps/tests.c index d6b8a5eb..8d69133a 100644 --- a/apps/tests.c +++ b/apps/tests.c @@ -301,6 +301,26 @@ static int pi(void* arg) return 0; } +static int measure_ctx_switch(void) +{ + uint64_t max = 0; + uint64_t start, t1, t2; + uint64_t freq = get_cpu_frequency(); + + start = t1 = rdtsc(); + do { + asm volatile ("cpuid"); + t2 = rdtsc(); + if (t2 - t1 > max) + max = t2 - t1; + t1 = t2; + } while(t2-start < 10*freq); + + kprintf("maximum gap: %llu ticks\n", max); + + return 0; + } + int test_init(void) { // char* argv[] = {"/bin/mshell", NULL}; @@ -312,8 +332,9 @@ int test_init(void) sem_init(&consuming, 0); mailbox_int32_init(&mbox); - create_kernel_task(NULL, foo, "Hello from foo1", NORMAL_PRIO); - create_kernel_task(NULL, join_test, NULL, NORMAL_PRIO); + create_kernel_task(NULL, measure_ctx_switch, NULL, NORMAL_PRIO); + //create_kernel_task(NULL, foo, "Hello from foo1", NORMAL_PRIO); + //create_kernel_task(NULL, join_test, NULL, NORMAL_PRIO); //create_kernel_task(NULL, producer, , NORMAL_PRIO); //create_kernel_task(NULL, consumer, NULL, NORMAL_PRIO); //create_kernel_task(NULL, mail_ping, NULL, NORMAL_PRIO); @@ -322,7 +343,7 @@ int test_init(void) //create_kernel_task(NULL, pi, NULL, NORMAL_PRIO); //create_kernel_task(NULL, laplace, NULL, NORMAL_PRIO); //create_user_task(NULL, "/bin/hello", argv); - create_user_task(NULL, "/bin/tests", argv); + //create_user_task(NULL, "/bin/tests", argv); //create_user_task(NULL, "/bin/jacobi", argv); //create_user_task(NULL, "/bin/mshell", argv); //create_user_task(NULL, "/bin/jacobi", argv); From f4950981b20b0b74166c56932728785c33f225f6 Mon Sep 17 00:00:00 2001 From: Jacek Galowicz Date: Mon, 9 Apr 2012 20:02:21 +0200 Subject: [PATCH 02/12] Added a define constant for task switching modes. --- include/metalsvm/config.h.example | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/metalsvm/config.h.example b/include/metalsvm/config.h.example index 90f8b7c8..32ed2475 100644 --- a/include/metalsvm/config.h.example +++ b/include/metalsvm/config.h.example @@ -42,6 +42,8 @@ extern "C" { #define VIDEO_MEM_ADDR 0xB8000 // the video memora address #define SMP_SETUP_ADDR 0x07000 +#define SW_TASK_SWITCH + #define BYTE_ORDER LITTLE_ENDIAN /* From 49533df72ec8ed97083fba80561fdfb9e04e8409 Mon Sep 17 00:00:00 2001 From: Jacek Galowicz Date: Wed, 11 Apr 2012 08:41:58 +0200 Subject: [PATCH 03/12] Made the interrupt entry also safe the registers DS to GS. This may be removed in the future for speed purposes. --- arch/x86/include/asm/stddef.h | 12 ++++++++++++ arch/x86/kernel/entry.asm | 9 +++++++++ 2 files changed, 21 insertions(+) diff --git a/arch/x86/include/asm/stddef.h b/arch/x86/include/asm/stddef.h index f3ef4468..581c72ab 100644 --- a/arch/x86/include/asm/stddef.h +++ b/arch/x86/include/asm/stddef.h @@ -70,6 +70,18 @@ typedef unsigned int wint_t; * All the interrupt handler routines use this type for their only parameter. */ struct state { + /// GS register + unsigned short gs; + unsigned short __gs; + /// FS register + unsigned short fs; + unsigned short __fs; + /// ES register + unsigned short es; + unsigned short __es; + /// ds register + unsigned short ds; + unsigned short __ds; /// EDI register unsigned int edi; /// ESI register diff --git a/arch/x86/kernel/entry.asm b/arch/x86/kernel/entry.asm index 8d34dff9..8f9a3973 100644 --- a/arch/x86/kernel/entry.asm +++ b/arch/x86/kernel/entry.asm @@ -767,13 +767,22 @@ extern irq_handler common_stub: pusha + push ds + push es + push fs + push gs ; use the same handler for interrupts and exceptions push esp call irq_handler add esp, 4 + pop gs + pop fs + pop es + pop gs popa + add esp, 8 iret From f4a7d6bd3a86b4c504acc716ecc39e8743a2048e Mon Sep 17 00:00:00 2001 From: Jacek Galowicz Date: Sat, 14 Apr 2012 00:34:09 +0200 Subject: [PATCH 04/12] The context is now switched by software. - Cleanup to be done - Arch_fork does NOT work in this state - Multiprocessing support not yet implemented --- apps/tests.c | 3 +- arch/x86/kernel/entry.asm | 35 +++++++++++++- arch/x86/kernel/gdt.c | 84 ++++++++++++++++++++++++++++++++++ include/metalsvm/tasks_types.h | 3 ++ kernel/tasks.c | 15 +++++- 5 files changed, 137 insertions(+), 3 deletions(-) diff --git a/apps/tests.c b/apps/tests.c index 8d69133a..7f27eac3 100644 --- a/apps/tests.c +++ b/apps/tests.c @@ -333,7 +333,8 @@ int test_init(void) mailbox_int32_init(&mbox); create_kernel_task(NULL, measure_ctx_switch, NULL, NORMAL_PRIO); - //create_kernel_task(NULL, foo, "Hello from foo1", NORMAL_PRIO); + create_kernel_task(NULL, foo, "Hello from foo1", NORMAL_PRIO); + create_kernel_task(NULL, foo, "Hello from foo2", NORMAL_PRIO); //create_kernel_task(NULL, join_test, NULL, NORMAL_PRIO); //create_kernel_task(NULL, producer, , NORMAL_PRIO); //create_kernel_task(NULL, consumer, NULL, NORMAL_PRIO); diff --git a/arch/x86/kernel/entry.asm b/arch/x86/kernel/entry.asm index 8f9a3973..a0fe2694 100644 --- a/arch/x86/kernel/entry.asm +++ b/arch/x86/kernel/entry.asm @@ -507,6 +507,39 @@ hack: jmp 0x00 : 0xDEADBEAF ret +global sw_switch_context +sw_switch_context: + ;pushf + push DWORD 0x8 + push DWORD [esp+4] + push DWORD 0 + push DWORD 0xc0edbabe + pusha + push ds + push es + push fs + push gs + pushf + pop eax + mov [esp+64], eax + + mov ecx, [esp+68] + mov [ecx], esp + mov ecx, [esp+72] + mov esp, [ecx] + +sw_rollback: + + pop gs + pop fs + pop es + pop ds + popa + + add esp, 8 + iret + + ; 32: IRQ0 irq0: ; irq0 - irq15 are registered as "Interrupt Gate" @@ -780,7 +813,7 @@ common_stub: pop gs pop fs pop es - pop gs + pop ds popa add esp, 8 diff --git a/arch/x86/kernel/gdt.c b/arch/x86/kernel/gdt.c index 12631a65..ad5dd26d 100644 --- a/arch/x86/kernel/gdt.c +++ b/arch/x86/kernel/gdt.c @@ -27,7 +27,11 @@ #include gdt_ptr_t gp; +#ifdef SW_TASK_SWITCH +static tss_t task_state_segments[MAX_CORES] __attribute__ ((aligned (PAGE_SIZE))); +#else static tss_t task_state_segments[MAX_TASKS] __attribute__ ((aligned (PAGE_SIZE))); +#endif static unsigned char kstacks[MAX_TASKS][KERNEL_STACK_SIZE] __attribute__ ((aligned (PAGE_SIZE))) = {[0 ... MAX_TASKS-1][0 ... KERNEL_STACK_SIZE-1] = 0xCD}; uint32_t default_stack_pointer = (uint32_t) kstacks[0] + KERNEL_STACK_SIZE - sizeof(size_t); // currently, our kernel has full access to the ioports @@ -71,6 +75,7 @@ int register_task(task_t* task) { int arch_fork(task_t* task) { +#ifndef SW_TASK_SWITCH uint16_t cs = 0x08; uint16_t ds = 0x10; uint32_t id; @@ -120,6 +125,7 @@ int arch_fork(task_t* task) asm volatile ("pushf; pop %%eax" : "=a"(task_state_segments[id].eflags)); // This will be the entry point for the new task. asm volatile ("call read_eip" : "=a"(task_state_segments[id].eip)); +#endif return 0; } @@ -130,10 +136,40 @@ int create_default_frame(task_t* task, internal_entry_point_t ep, void* arg) uint16_t ds = 0x10; uint32_t id; +#ifdef SW_TASK_SWITCH + uint32_t *stack; + struct state *stptr; + uint32_t short_state_size = sizeof(struct state)/sizeof(uint32_t) -2; +#endif + if (BUILTIN_EXPECT(!task, 0)) return -EINVAL; id = task->id; +#ifdef SW_TASK_SWITCH + memset(kstacks[id], 0xCD, KERNEL_STACK_SIZE); + + stack = kstacks[id] +KERNEL_STACK_SIZE -sizeof(uint32_t); + *stack-- = 0xDEADBEEF; + *stack-- = arg; + *stack = leave_kernel_task; + stack -= short_state_size; + + stptr = stack; + memset(stptr, 0x00, short_state_size*sizeof(uint32_t)); + stptr->gs = stptr->fs = stptr->es = stptr->ds = ds; + stptr->esp = stack +short_state_size; + stptr->int_no = 0xB16B00B5; + stptr->error = 0xC03DB4B3; + + stptr->eip = ep; + stptr->cs = cs; + stptr->eflags = 0x1002; + //stptr->ss = ds; + //stptr->useresp = kstacks[id] +KERNEL_STACK_SIZE - 3*sizeof(uint32_t); + + task->stack = stack; +#else /* reset buffers */ memset(task_state_segments+id, 0x00, sizeof(tss_t)); memset(kstacks[id], 0xCD, KERNEL_STACK_SIZE); @@ -161,9 +197,48 @@ int create_default_frame(task_t* task, internal_entry_point_t ep, void* arg) /* setup for the kernel stack frame */ task_state_segments[id].ss0 = 0x10; task_state_segments[id].esp0 = (uint32_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t); +#endif + return 0; +} + +#ifdef SW_TASK_SWITCH +int create_default_tss(int id) +{ + uint16_t cs = 0x08; + uint16_t ds = 0x10; + + /* reset buffers */ + memset(task_state_segments+id, 0x00, sizeof(tss_t)); + + /* set default values of all registers */ + task_state_segments[id].cs = cs; + task_state_segments[id].ss = ds; + task_state_segments[id].ds = ds; + task_state_segments[id].fs = ds; + task_state_segments[id].gs = ds; + task_state_segments[id].es = ds; + task_state_segments[id].eflags = 0x1002; // 0x1202; + //task_state_segments[id].cr3 = (uint32_t) (virt_to_phys((size_t)task->pgd)); + //task_state_segments[id].eip = (uint32_t) ep; + task_state_segments[id].esp = (uint32_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t); + + /* build default stack frame */ + *((size_t*)task_state_segments[id].esp) = 0xDEADBEAF; /* dead-end */ + /* + task_state_segments[id].ebp = task_state_segments[id].esp; + task_state_segments[id].esp -= sizeof(size_t); + *((size_t*)task_state_segments[id].esp) = (size_t) arg; + task_state_segments[id].esp -= sizeof(size_t); + *((size_t*)task_state_segments[id].esp) = (size_t) leave_kernel_task; + */ + + /* setup for the kernel stack frame */ + task_state_segments[id].ss0 = 0x10; + task_state_segments[id].esp0 = (uint32_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t); return 0; } +#endif /* Setup a descriptor in the Global Descriptor Table */ static void gdt_set_gate(int num, unsigned long base, unsigned long limit, @@ -203,7 +278,11 @@ void gdt_install(void) { unsigned int i; +#ifdef SW_TASK_SWITCH + memset(task_state_segments, 0x00, MAX_CORES*sizeof(tss_t)); +#else memset(task_state_segments, 0x00, MAX_TASKS*sizeof(tss_t)); +#endif /* Setup the GDT pointer and limit */ gp.limit = (sizeof(gdt_entry_t) * GDT_ENTRIES) - 1; @@ -247,7 +326,12 @@ void gdt_install(void) /* * Create TSS for each task at ring0 (we use these segments for task switching) */ +#ifdef SW_TASK_SWITCH + for(i=0; i 1 @@ -1374,14 +1380,21 @@ get_task_out: //kprintf("schedule from %u to %u with prio %u on core %u\n", // orig_task->id, curr_task->id, (uint32_t)curr_task->prio, CORE_ID); +#ifndef SW_TASK_SWITCH switch_task(curr_task->id); +#else + write_cr3(virt_to_phys((size_t)curr_task->pgd)); +#endif finish_task_switch(0); +#ifdef SW_TASK_SWITCH + sw_switch_context(&orig_task->stack, &curr_task->stack); +#endif } } void reschedule(void) { uint32_t flags = irq_nested_disable(); - scheduler(); + scheduler(); irq_nested_enable(flags); } From 43fc6c94e59aaa79166f075ba9f7ee44c3c12657 Mon Sep 17 00:00:00 2001 From: Jacek Galowicz Date: Sat, 14 Apr 2012 09:18:36 +0200 Subject: [PATCH 05/12] Removed DS, ES, FS and GS registers from task switch routines --- arch/x86/include/asm/stddef.h | 12 ------------ arch/x86/kernel/entry.asm | 24 +++--------------------- arch/x86/kernel/gdt.c | 1 - 3 files changed, 3 insertions(+), 34 deletions(-) diff --git a/arch/x86/include/asm/stddef.h b/arch/x86/include/asm/stddef.h index 581c72ab..f3ef4468 100644 --- a/arch/x86/include/asm/stddef.h +++ b/arch/x86/include/asm/stddef.h @@ -70,18 +70,6 @@ typedef unsigned int wint_t; * All the interrupt handler routines use this type for their only parameter. */ struct state { - /// GS register - unsigned short gs; - unsigned short __gs; - /// FS register - unsigned short fs; - unsigned short __fs; - /// ES register - unsigned short es; - unsigned short __es; - /// ds register - unsigned short ds; - unsigned short __ds; /// EDI register unsigned int edi; /// ESI register diff --git a/arch/x86/kernel/entry.asm b/arch/x86/kernel/entry.asm index a0fe2694..637d5cdc 100644 --- a/arch/x86/kernel/entry.asm +++ b/arch/x86/kernel/entry.asm @@ -515,25 +515,16 @@ sw_switch_context: push DWORD 0 push DWORD 0xc0edbabe pusha - push ds - push es - push fs - push gs pushf pop eax - mov [esp+64], eax + mov [esp+48], eax - mov ecx, [esp+68] + mov ecx, [esp+52] mov [ecx], esp - mov ecx, [esp+72] + mov ecx, [esp+56] mov esp, [ecx] sw_rollback: - - pop gs - pop fs - pop es - pop ds popa add esp, 8 @@ -800,22 +791,13 @@ extern irq_handler common_stub: pusha - push ds - push es - push fs - push gs ; use the same handler for interrupts and exceptions push esp call irq_handler add esp, 4 - pop gs - pop fs - pop es - pop ds popa - add esp, 8 iret diff --git a/arch/x86/kernel/gdt.c b/arch/x86/kernel/gdt.c index ad5dd26d..5378515d 100644 --- a/arch/x86/kernel/gdt.c +++ b/arch/x86/kernel/gdt.c @@ -157,7 +157,6 @@ int create_default_frame(task_t* task, internal_entry_point_t ep, void* arg) stptr = stack; memset(stptr, 0x00, short_state_size*sizeof(uint32_t)); - stptr->gs = stptr->fs = stptr->es = stptr->ds = ds; stptr->esp = stack +short_state_size; stptr->int_no = 0xB16B00B5; stptr->error = 0xC03DB4B3; From 0984eb593f3d1a4c7dfdc6861fd2b7c42b766a55 Mon Sep 17 00:00:00 2001 From: Jacek Galowicz Date: Sat, 14 Apr 2012 09:57:18 +0200 Subject: [PATCH 06/12] Commented everything --- apps/tests.c | 4 ++-- arch/x86/kernel/entry.asm | 28 +++++++++++++++++++--------- arch/x86/kernel/gdt.c | 25 +++++++++++++++++++++++-- kernel/tasks.c | 3 +-- 4 files changed, 45 insertions(+), 15 deletions(-) diff --git a/apps/tests.c b/apps/tests.c index 7f27eac3..26c1aba8 100644 --- a/apps/tests.c +++ b/apps/tests.c @@ -333,8 +333,8 @@ int test_init(void) mailbox_int32_init(&mbox); create_kernel_task(NULL, measure_ctx_switch, NULL, NORMAL_PRIO); - create_kernel_task(NULL, foo, "Hello from foo1", NORMAL_PRIO); - create_kernel_task(NULL, foo, "Hello from foo2", NORMAL_PRIO); + //create_kernel_task(NULL, foo, "Hello from foo1", NORMAL_PRIO); + //create_kernel_task(NULL, foo, "Hello from foo2", NORMAL_PRIO); //create_kernel_task(NULL, join_test, NULL, NORMAL_PRIO); //create_kernel_task(NULL, producer, , NORMAL_PRIO); //create_kernel_task(NULL, consumer, NULL, NORMAL_PRIO); diff --git a/arch/x86/kernel/entry.asm b/arch/x86/kernel/entry.asm index 637d5cdc..b1082278 100644 --- a/arch/x86/kernel/entry.asm +++ b/arch/x86/kernel/entry.asm @@ -507,22 +507,32 @@ hack: jmp 0x00 : 0xDEADBEAF ret +; This procedure is used by scheduler() to switch tasks. +; It is the software-equivalent to the hw-procedure switch_task from above. +; Call it in C with the following arguments: +; sw_switch_context(&old_tasks_stack_pointer, &new_tasks_stack_pointer) global sw_switch_context sw_switch_context: - ;pushf - push DWORD 0x8 - push DWORD [esp+4] - push DWORD 0 - push DWORD 0xc0edbabe - pusha + ; The stack layout looks like this: + ; [new stack pointer] + ; [old stack pointer] + ;pushf ; [this procedure's return address] overwritten by: EFLAGS (*1) + push DWORD 0x8 ; CS + push DWORD [esp+4] ; EIP + push DWORD 0 ; Interrupt number + push DWORD 0xc0edbabe ; Error code + pusha ; Registers... + ; ---- This will be popped off by iret later. + pushf pop eax - mov [esp+48], eax + mov [esp+48], eax ; Move EFLAGS to position (*1) by overwriting + ; the return address of sw_switch_context() mov ecx, [esp+52] - mov [ecx], esp + mov [ecx], esp ; Save stack position in old task structure mov ecx, [esp+56] - mov esp, [ecx] + mov esp, [ecx] ; Load new stack sw_rollback: popa diff --git a/arch/x86/kernel/gdt.c b/arch/x86/kernel/gdt.c index 5378515d..1ec3c24f 100644 --- a/arch/x86/kernel/gdt.c +++ b/arch/x86/kernel/gdt.c @@ -149,10 +149,29 @@ int create_default_frame(task_t* task, internal_entry_point_t ep, void* arg) #ifdef SW_TASK_SWITCH memset(kstacks[id], 0xCD, KERNEL_STACK_SIZE); + /* The difference between setting up a task for SW-task-switching + * and not for HW-task-switching is setting up a stack and not a TSS. + * This is the stack which will be activated and popped off for iret later. + */ stack = kstacks[id] +KERNEL_STACK_SIZE -sizeof(uint32_t); + + /* The next three things on the stack are a marker for debugging purposes, ... */ *stack-- = 0xDEADBEEF; + /* the first-function-to-be-called's arguments, ... */ *stack-- = arg; + /* and the "caller" we shall return to. + * This procedure cleans the task after exit. */ *stack = leave_kernel_task; + + /* Next bunch on the stack is the initial register state. + * The stack must look like the stack of a task which was + * scheduled away previously. */ + + /* short_state_size was introduced because the convenient "struct state" + * is used for filling the stack with initial values. But the problem is that + * "iret" will not remove the last two entries from the stack, since we're + * "returning" from kernel space to kernel space. Therefore it is shortened + * by its last two entries. */ stack -= short_state_size; stptr = stack; @@ -161,12 +180,14 @@ int create_default_frame(task_t* task, internal_entry_point_t ep, void* arg) stptr->int_no = 0xB16B00B5; stptr->error = 0xC03DB4B3; + /* The instruction pointer shall be set on the first function to be called + * after IRETing */ stptr->eip = ep; stptr->cs = cs; stptr->eflags = 0x1002; - //stptr->ss = ds; - //stptr->useresp = kstacks[id] +KERNEL_STACK_SIZE - 3*sizeof(uint32_t); + /* Set the task's stack pointer entry to the stack we have crafted right now. + * This is the pointer which will be used by sw_switch_task(old_task, new_task) later.*/ task->stack = stack; #else /* reset buffers */ diff --git a/kernel/tasks.c b/kernel/tasks.c index 34bc7cb6..c7c560df 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -1382,11 +1382,10 @@ get_task_out: // orig_task->id, curr_task->id, (uint32_t)curr_task->prio, CORE_ID); #ifndef SW_TASK_SWITCH switch_task(curr_task->id); -#else - write_cr3(virt_to_phys((size_t)curr_task->pgd)); #endif finish_task_switch(0); #ifdef SW_TASK_SWITCH + write_cr3(virt_to_phys((size_t)curr_task->pgd)); sw_switch_context(&orig_task->stack, &curr_task->stack); #endif } From 9b36a72f5f70ad456b2e0159261c6410a972ee13 Mon Sep 17 00:00:00 2001 From: Jacek Galowicz Date: Sun, 15 Apr 2012 19:40:51 +0200 Subject: [PATCH 07/12] Wrote a benchmark for measuring the context switch time. --- apps/tests.c | 72 ++++++++++++++++++++++++++++++++++++++++---------- kernel/tasks.c | 3 +-- 2 files changed, 59 insertions(+), 16 deletions(-) diff --git a/apps/tests.c b/apps/tests.c index 26c1aba8..c84d117c 100644 --- a/apps/tests.c +++ b/apps/tests.c @@ -301,22 +301,65 @@ static int pi(void* arg) return 0; } -static int measure_ctx_switch(void) -{ - uint64_t max = 0; - uint64_t start, t1, t2; - uint64_t freq = get_cpu_frequency(); +#define REPS 10000 - start = t1 = rdtsc(); - do { - asm volatile ("cpuid"); - t2 = rdtsc(); - if (t2 - t1 > max) - max = t2 - t1; - t1 = t2; - } while(t2-start < 10*freq); +volatile uint64_t t1, t2; +volatile int stop = !!0; +volatile int sid = 0; + +static int measure_ctx_switch(void* arg) +{ + int id = !!(int)arg; + int oid = !id; + uint64_t freq = get_cpu_frequency() *1000 *1000; + uint64_t diff, min = (uint64_t)-1, max = 0, avg = 0; + int i; + uint32_t a=0,b,c,d; + + // Size of a timeslice in ticks + uint64_t timeslice = freq / TIMER_FREQ; + + kprintf("ID: %d, ", id); +#ifdef SW_TASK_SWITCH + kprintf("Measuring SW task switch.\n"); +#else + kprintf("Measuring HW task switch.\n"); +#endif + + for (i=0; i < REPS && stop == 0; i++) { + while(id == sid && stop == 0) { + t2 = rdtsc(); + cpuid(0,&a,&b,&c,&d); + } + + cpuid(0,&a,&b,&c,&d); + diff = rdtsc() -t2; + + // The last measurement is garbage + if (stop) break; + // The first ones are garbage, too + if (i < 5) goto next_try; + if (diff >= timeslice) { + i--; + goto next_try; + } + + kprintf("%i: diff= %llu, i= %i\n", id, diff, i); + if (diff > max) max = diff; + if (diff < min) min = diff; + avg += diff; + +next_try: + sid = id; + } + avg /= i-5; + + stop = 1; kprintf("maximum gap: %llu ticks\n", max); + kprintf("minimum gap: %llu ticks\n", min); + kprintf("average gap: %llu ticks\n", avg); + kprintf("Timeslice size: %llu ticks\n", timeslice); return 0; } @@ -332,7 +375,8 @@ int test_init(void) sem_init(&consuming, 0); mailbox_int32_init(&mbox); - create_kernel_task(NULL, measure_ctx_switch, NULL, NORMAL_PRIO); + create_kernel_task(NULL, measure_ctx_switch, (int)0, NORMAL_PRIO); + create_kernel_task(NULL, measure_ctx_switch, (int)1, NORMAL_PRIO); //create_kernel_task(NULL, foo, "Hello from foo1", NORMAL_PRIO); //create_kernel_task(NULL, foo, "Hello from foo2", NORMAL_PRIO); //create_kernel_task(NULL, join_test, NULL, NORMAL_PRIO); diff --git a/kernel/tasks.c b/kernel/tasks.c index c7c560df..430b4cee 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -1378,8 +1378,7 @@ get_task_out: orig_task->flags &= ~TASK_FPU_USED; } - //kprintf("schedule from %u to %u with prio %u on core %u\n", - // orig_task->id, curr_task->id, (uint32_t)curr_task->prio, CORE_ID); + //kprintf("schedule from %u to %u with prio %u on core %u\n", orig_task->id, curr_task->id, (uint32_t)curr_task->prio, CORE_ID); #ifndef SW_TASK_SWITCH switch_task(curr_task->id); #endif From b9b962ea7347f822c2b3c18104ab5f20203b1f22 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Mon, 21 May 2012 15:04:05 +0200 Subject: [PATCH 08/12] revise software multitasking => remove hardware multitasking --- Makefile.example | 14 +- apps/tests.c | 14 +- arch/x86/include/asm/tasks.h | 7 + arch/x86/kernel/entry.asm | 96 ++++++------- arch/x86/kernel/gdt.c | 200 ++++++++-------------------- arch/x86/kernel/irq.c | 15 ++- include/metalsvm/config.h.example | 2 - include/metalsvm/config.inc.example | 4 + include/metalsvm/tasks.h | 6 +- include/metalsvm/tasks_types.h | 23 ++-- kernel/init.c | 2 +- kernel/tasks.c | 41 ++---- 12 files changed, 168 insertions(+), 256 deletions(-) create mode 100644 include/metalsvm/config.inc.example diff --git a/Makefile.example b/Makefile.example index 365fdb54..1219a476 100644 --- a/Makefile.example +++ b/Makefile.example @@ -1,4 +1,4 @@ -TOPDIR := $(shell pwd) +TOPDIR = $(shell pwd) ARCH = x86 NAME = metalsvm LWIPDIRS = lwip/src/arch lwip/src/api lwip/src/core lwip/src/core/ipv4 lwip/src/netif @@ -31,14 +31,15 @@ READELF_FOR_TARGET = $(CROSSCOMPREFIX)readelf MAKE = make RM = rm -rf NASM = nasm +QEMU = qemu-system-i386 EMU = qemu GDB = gdb -NASMFLAGS = -felf32 -g +NASMFLAGS = -felf32 -g -i$(TOPDIR)/include/metalsvm/ INCLUDE = -I$(TOPDIR)/include -I$(TOPDIR)/arch/$(ARCH)/include -I$(TOPDIR)/lwip/src/include -I$(TOPDIR)/lwip/src/include/ipv4 -I$(TOPDIR)/drivers # Compiler options for final code CFLAGS = -g -m32 -march=i586 -Wall -O2 -fno-builtin -fstrength-reduce -fomit-frame-pointer -finline-functions -nostdinc $(INCLUDE) $(STACKPROT) -# Compiler options for debugging +# Compiler options for debuging #CFLAGS = -g -O -m32 -march=i586 -Wall -fno-builtin -DWITH_FRAME_POINTER -nostdinc $(INCLUDE) $(STACKPROT) ARFLAGS = rsv LDFLAGS = -T link.ld -z max-page-size=4096 --defsym __BUILD_DATE=$(shell date +'%Y%m%d') --defsym __BUILD_TIME=$(shell date +'%H%M%S') @@ -69,6 +70,7 @@ newlib: RANLIB_FOR_TARGET=$(RANLIB_FOR_TARGET) \ STRIP_FOR_TARGET=$(STRIP_FOR_TARGET) \ READELF_FOR_TARGET=$(READELF_FOR_TARGET) -C newlib + tools: $(MAKE) -C tools @@ -80,10 +82,10 @@ $(NAME).elf: $Q$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $(NAME).elf qemu: newlib tools $(NAME).elf - qemu -monitor stdio -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img + $(QEMU) -monitor stdio -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img qemudbg: newlib tools $(NAME).elf - qemu -S -s -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img + $(QEMU) -s -S -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img gdb: $(NAME).elf make qemudbg > /dev/null & @@ -105,7 +107,7 @@ veryclean: clean @echo [CC] $@ $Q$(CC_FOR_TARGET) -c -D__KERNEL__ $(CFLAGS) -o $@ $< @echo [DEP] $*.dep - $Q$(CPP_FOR_TARGET) -MF $*.dep -MT $*.o -MM $(CFLAGS) $< + $Q$(CPP_FOR_TARGET) -MF $*.dep -MT $*.o -MM -D__KERNEL__ $(CFLAGS) $< %.o : %.asm diff --git a/apps/tests.c b/apps/tests.c index 7f877f88..e1711dee 100644 --- a/apps/tests.c +++ b/apps/tests.c @@ -432,11 +432,7 @@ static int measure_ctx_switch(void* arg) uint64_t timeslice = freq / TIMER_FREQ; kprintf("ID: %d, ", id); -#ifdef SW_TASK_SWITCH - kprintf("Measuring SW task switch.\n"); -#else - kprintf("Measuring HW task switch.\n"); -#endif + kprintf("Measuring SW task switching.\n"); for (i=0; i < REPS && stop == 0; i++) { while(id == sid && stop == 0) { @@ -487,9 +483,9 @@ int test_init(void) //sem_init(&consuming, 0); //mailbox_int32_init(&mbox); - create_kernel_task(NULL, measure_ctx_switch, (int)0, NORMAL_PRIO); - create_kernel_task(NULL, measure_ctx_switch, (int)1, NORMAL_PRIO); - //create_kernel_task(NULL, foo, "Hello from foo1", NORMAL_PRIO); + //create_kernel_task(NULL, measure_ctx_switch, (int)0, NORMAL_PRIO); + //create_kernel_task(NULL, measure_ctx_switch, (int)1, NORMAL_PRIO); + create_kernel_task(NULL, foo, "Hello from foo1", NORMAL_PRIO); //create_kernel_task(NULL, foo, "Hello from foo2", NORMAL_PRIO); //create_kernel_task(NULL, join_test, NULL, NORMAL_PRIO); //create_kernel_task(NULL, producer, , NORMAL_PRIO); @@ -502,7 +498,7 @@ int test_init(void) //create_kernel_task(NULL, laplace, NULL, NORMAL_PRIO); //create_kernel_task(NULL, jacobi, NULL, NORMAL_PRIO); //create_user_task(NULL, "/bin/hello", argv); - //create_user_task(NULL, "/bin/tests", argv); + create_user_task(NULL, "/bin/tests", argv); //create_user_task(NULL, "/bin/jacobi", argv); //create_user_task(NULL, "/bin/mshell", argv); //create_user_task(NULL, "/bin/jacobi", argv); diff --git a/arch/x86/include/asm/tasks.h b/arch/x86/include/asm/tasks.h index 9f45b691..e3ac8ad2 100644 --- a/arch/x86/include/asm/tasks.h +++ b/arch/x86/include/asm/tasks.h @@ -55,6 +55,13 @@ int arch_fork(task_t* task); */ void switch_task(uint32_t id); +/** + * @brief Switch to current task + * + * @param stack Pointer to the old stack pointer + */ +void switch_context(size_t** stack); + /** @brief Setup a default frame for a new task * * @param task Pointer to the task structure diff --git a/arch/x86/kernel/entry.asm b/arch/x86/kernel/entry.asm index b1082278..f3c6a6d4 100644 --- a/arch/x86/kernel/entry.asm +++ b/arch/x86/kernel/entry.asm @@ -21,6 +21,8 @@ ; perhaps setting up the GDT and segments. Please note that interrupts ; are disabled at this point: More on interrupts later! +%include "config.inc" + [BITS 32] ; We use a special name to map this section at the begin of our kernel ; => Multiboot needs its magic number at the begin of the kernel @@ -496,51 +498,6 @@ global apic_lint1 global apic_error global apic_svr -global switch_task -switch_task: - mov eax, [esp+4] - add ax, WORD 5 - mov bx, WORD 8 - mul bx - mov [hack+5], ax -hack: - jmp 0x00 : 0xDEADBEAF - ret - -; This procedure is used by scheduler() to switch tasks. -; It is the software-equivalent to the hw-procedure switch_task from above. -; Call it in C with the following arguments: -; sw_switch_context(&old_tasks_stack_pointer, &new_tasks_stack_pointer) -global sw_switch_context -sw_switch_context: - ; The stack layout looks like this: - ; [new stack pointer] - ; [old stack pointer] - ;pushf ; [this procedure's return address] overwritten by: EFLAGS (*1) - push DWORD 0x8 ; CS - push DWORD [esp+4] ; EIP - push DWORD 0 ; Interrupt number - push DWORD 0xc0edbabe ; Error code - pusha ; Registers... - ; ---- This will be popped off by iret later. - - pushf - pop eax - mov [esp+48], eax ; Move EFLAGS to position (*1) by overwriting - ; the return address of sw_switch_context() - - mov ecx, [esp+52] - mov [ecx], esp ; Save stack position in old task structure - mov ecx, [esp+56] - mov esp, [ecx] ; Load new stack - -sw_rollback: - popa - - add esp, 8 - iret - - ; 32: IRQ0 irq0: ; irq0 - irq15 are registered as "Interrupt Gate" @@ -798,7 +755,31 @@ apic_svr: jmp common_stub extern irq_handler +extern get_current_stack +extern finish_task_switch +extern apic_cpu_id +extern task_state_segments +global switch_context +ALIGN 4 +switch_context: + ; create on the stack a pseudo interrupt + ; afterwards, we switch to the task with iret + mov eax, [esp+4] ; on the stack is already the address to store the old esp + pushf ; EFLAGS + push DWORD 0x8 ; CS + push DWORD rollback ; EIP + push DWORD 0 ; Interrupt number + push DWORD 0xc0edbabe ; Error code + pusha ; Registers... + + jmp common_switch + +ALIGN 4 +rollback: + ret + +ALIGN 4 common_stub: pusha @@ -807,6 +788,31 @@ common_stub: call irq_handler add esp, 4 + cmp eax, 0 + je no_context_switch + +common_switch: + mov [eax], esp ; store old esp + call get_current_stack ; get new esp + xchg eax, esp + + ; determine TSS +%if MAX_CORES > 1 + call apic_cpu_id +%else + xor eax, eax +%endif + mov ecx, DWORD 0x68 + mul ecx + add eax, task_state_segments + add eax, DWORD 4 + ; set esp0 in TSS + mov [eax], esp + + ; call cleanup code + call finish_task_switch + +no_context_switch: popa add esp, 8 iret diff --git a/arch/x86/kernel/gdt.c b/arch/x86/kernel/gdt.c index 1ec3c24f..62982b0a 100644 --- a/arch/x86/kernel/gdt.c +++ b/arch/x86/kernel/gdt.c @@ -27,11 +27,7 @@ #include gdt_ptr_t gp; -#ifdef SW_TASK_SWITCH -static tss_t task_state_segments[MAX_CORES] __attribute__ ((aligned (PAGE_SIZE))); -#else -static tss_t task_state_segments[MAX_TASKS] __attribute__ ((aligned (PAGE_SIZE))); -#endif +tss_t task_state_segments[MAX_CORES] __attribute__ ((aligned (PAGE_SIZE))); static unsigned char kstacks[MAX_TASKS][KERNEL_STACK_SIZE] __attribute__ ((aligned (PAGE_SIZE))) = {[0 ... MAX_TASKS-1][0 ... KERNEL_STACK_SIZE-1] = 0xCD}; uint32_t default_stack_pointer = (uint32_t) kstacks[0] + KERNEL_STACK_SIZE - sizeof(size_t); // currently, our kernel has full access to the ioports @@ -49,6 +45,15 @@ extern void gdt_flush(void); */ extern void tss_switch(uint32_t id); +size_t* get_current_stack(void) +{ + task_t* curr_task = per_core(current_task); + + write_cr3(virt_to_phys((size_t)curr_task->pgd)); + + return curr_task->stack; +} + size_t get_stack(uint32_t id) { if (BUILTIN_EXPECT(id >= MAX_TASKS, 0)) @@ -58,27 +63,18 @@ size_t get_stack(uint32_t id) int register_task(task_t* task) { uint16_t sel; - uint32_t id = task->id; - if (BUILTIN_EXPECT(!task, 0)) - return -EINVAL; - - sel = (task->id+5) << 3; + sel = (CORE_ID+5) << 3; asm volatile ("mov %0, %%ax; ltr %%ax" : : "ir"(sel) : "%eax"); - // initialize the static elements of a TSS - task_state_segments[id].cr3 = (uint32_t) (task->pgd); - task_state_segments[id].ss0 = 0x10; - return 0; } int arch_fork(task_t* task) { -#ifndef SW_TASK_SWITCH uint16_t cs = 0x08; - uint16_t ds = 0x10; - uint32_t id; + uint32_t id, esp; + struct state* state; task_t* curr_task = per_core(current_task); if (BUILTIN_EXPECT(!task, 0)) @@ -88,44 +84,37 @@ int arch_fork(task_t* task) // copy kernel stack of the current task memcpy(kstacks[id], kstacks[curr_task->id], KERNEL_STACK_SIZE); - // reset TSS - memset(task_state_segments+id, 0x00, sizeof(tss_t)); + asm volatile ("mov %%esp, %0" : "=r"(esp)); + esp -= (uint32_t) kstacks[curr_task->id]; + esp += (uint32_t) kstacks[id]; - // set default values of all registers - task_state_segments[id].cs = cs; - task_state_segments[id].ss = ds; - task_state_segments[id].ds = ds; - task_state_segments[id].fs = ds; - task_state_segments[id].gs = ds; - task_state_segments[id].es = ds; - task_state_segments[id].cr3 = (uint32_t) (virt_to_phys((size_t)task->pgd)); - task_state_segments[id].ss0 = ds; - task_state_segments[id].esp0 = (uint32_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t); - - // save curret task context - asm volatile("mov %%esp, %0" : "=r"(task_state_segments[id].esp)); - task_state_segments[id].esp -= (uint32_t) kstacks[curr_task->id]; - task_state_segments[id].esp += (uint32_t) kstacks[id]; + state = (struct state*) (esp - sizeof(struct state) + 2*sizeof(size_t)); asm volatile ("pusha"); - asm volatile ("pop %0" : "=r"(task_state_segments[id].edi)); - asm volatile ("pop %0" : "=r"(task_state_segments[id].esi)); - asm volatile ("pop %0" : "=r"(task_state_segments[id].ebp)); + asm volatile ("pop %0" : "=r"(state->edi)); + asm volatile ("pop %0" : "=r"(state->esi)); + asm volatile ("pop %0" : "=r"(state->ebp)); #ifdef WITH_FRAME_POINTER - task_state_segments[id].ebp -= (uint32_t) kstacks[curr_task->id]; - task_state_segments[id].ebp += (uint32_t) kstacks[id]; + state->ebp -= (uint32_t) kstacks[curr_task->id]; + state->ebp += (uint32_t) kstacks[id]; #endif asm volatile ("add $4, %%esp" ::: "%esp"); - asm volatile ("pop %0" : "=r"(task_state_segments[id].ebx)); - asm volatile ("pop %0" : "=r"(task_state_segments[id].edx)); - asm volatile ("pop %0" : "=r"(task_state_segments[id].ecx)); - asm volatile ("pop %0" : "=r"(task_state_segments[id].eax)); + state->esp = (uint32_t) state; + task->stack = (size_t*) state; + asm volatile ("pop %0" : "=r"(state->ebx)); + asm volatile ("pop %0" : "=r"(state->edx)); + asm volatile ("pop %0" : "=r"(state->ecx)); + asm volatile ("pop %0" : "=r"(state->eax)); + state->int_no = 0xB16B00B5; + state->error = 0xC03DB4B3; + state->cs = cs; // store the current EFLAGS - asm volatile ("pushf; pop %%eax" : "=a"(task_state_segments[id].eflags)); + asm volatile ("pushf; pop %%eax" : "=a"(state->eflags)); + // enable interrupts + state->eflags |= (1 << 9); // This will be the entry point for the new task. - asm volatile ("call read_eip" : "=a"(task_state_segments[id].eip)); -#endif + asm volatile ("call read_eip" : "=a"(state->eip)); return 0; } @@ -133,132 +122,56 @@ int arch_fork(task_t* task) int create_default_frame(task_t* task, internal_entry_point_t ep, void* arg) { uint16_t cs = 0x08; - uint16_t ds = 0x10; uint32_t id; - -#ifdef SW_TASK_SWITCH uint32_t *stack; struct state *stptr; - uint32_t short_state_size = sizeof(struct state)/sizeof(uint32_t) -2; -#endif if (BUILTIN_EXPECT(!task, 0)) return -EINVAL; id = task->id; -#ifdef SW_TASK_SWITCH memset(kstacks[id], 0xCD, KERNEL_STACK_SIZE); /* The difference between setting up a task for SW-task-switching * and not for HW-task-switching is setting up a stack and not a TSS. * This is the stack which will be activated and popped off for iret later. */ - stack = kstacks[id] +KERNEL_STACK_SIZE -sizeof(uint32_t); + stack = (uint32_t*) (kstacks[id] + KERNEL_STACK_SIZE - sizeof(uint32_t)); /* The next three things on the stack are a marker for debugging purposes, ... */ *stack-- = 0xDEADBEEF; /* the first-function-to-be-called's arguments, ... */ - *stack-- = arg; + *stack-- = (size_t) arg; /* and the "caller" we shall return to. * This procedure cleans the task after exit. */ - *stack = leave_kernel_task; + *stack = (size_t) leave_kernel_task; /* Next bunch on the stack is the initial register state. * The stack must look like the stack of a task which was * scheduled away previously. */ - /* short_state_size was introduced because the convenient "struct state" - * is used for filling the stack with initial values. But the problem is that - * "iret" will not remove the last two entries from the stack, since we're - * "returning" from kernel space to kernel space. Therefore it is shortened - * by its last two entries. */ - stack -= short_state_size; + stack = (uint32_t*) ((size_t) stack - sizeof(struct state) + 2*sizeof(size_t)); - stptr = stack; - memset(stptr, 0x00, short_state_size*sizeof(uint32_t)); - stptr->esp = stack +short_state_size; + stptr = (struct state *) stack; + memset(stptr, 0x00, sizeof(struct state) - 2*sizeof(size_t)); + stptr->esp = (size_t)stack + sizeof(struct state) - 2*sizeof(size_t); stptr->int_no = 0xB16B00B5; stptr->error = 0xC03DB4B3; /* The instruction pointer shall be set on the first function to be called * after IRETing */ - stptr->eip = ep; + stptr->eip = (uint32_t)ep; stptr->cs = cs; - stptr->eflags = 0x1002; + stptr->eflags = 0x1202; + // the creation of a kernel tasks didn't change the IOPL level + // => useresp & ss is not required - /* Set the task's stack pointer entry to the stack we have crafted right now. - * This is the pointer which will be used by sw_switch_task(old_task, new_task) later.*/ - task->stack = stack; -#else - /* reset buffers */ - memset(task_state_segments+id, 0x00, sizeof(tss_t)); - memset(kstacks[id], 0xCD, KERNEL_STACK_SIZE); - /* set default values of all registers */ - task_state_segments[id].cs = cs; - task_state_segments[id].ss = ds; - task_state_segments[id].ds = ds; - task_state_segments[id].fs = ds; - task_state_segments[id].gs = ds; - task_state_segments[id].es = ds; - task_state_segments[id].eflags = 0x1002; // 0x1202; - task_state_segments[id].cr3 = (uint32_t) (virt_to_phys((size_t)task->pgd)); - task_state_segments[id].eip = (uint32_t) ep; - task_state_segments[id].esp = (uint32_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t); - - /* build default stack frame */ - *((size_t*)task_state_segments[id].esp) = 0xDEADBEAF; /* dead-end */ - task_state_segments[id].ebp = task_state_segments[id].esp; - task_state_segments[id].esp -= sizeof(size_t); - *((size_t*)task_state_segments[id].esp) = (size_t) arg; - task_state_segments[id].esp -= sizeof(size_t); - *((size_t*)task_state_segments[id].esp) = (size_t) leave_kernel_task; - - /* setup for the kernel stack frame */ - task_state_segments[id].ss0 = 0x10; - task_state_segments[id].esp0 = (uint32_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t); -#endif - return 0; -} - -#ifdef SW_TASK_SWITCH -int create_default_tss(int id) -{ - uint16_t cs = 0x08; - uint16_t ds = 0x10; - - /* reset buffers */ - memset(task_state_segments+id, 0x00, sizeof(tss_t)); - - /* set default values of all registers */ - task_state_segments[id].cs = cs; - task_state_segments[id].ss = ds; - task_state_segments[id].ds = ds; - task_state_segments[id].fs = ds; - task_state_segments[id].gs = ds; - task_state_segments[id].es = ds; - task_state_segments[id].eflags = 0x1002; // 0x1202; - //task_state_segments[id].cr3 = (uint32_t) (virt_to_phys((size_t)task->pgd)); - //task_state_segments[id].eip = (uint32_t) ep; - task_state_segments[id].esp = (uint32_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t); - - /* build default stack frame */ - *((size_t*)task_state_segments[id].esp) = 0xDEADBEAF; /* dead-end */ - /* - task_state_segments[id].ebp = task_state_segments[id].esp; - task_state_segments[id].esp -= sizeof(size_t); - *((size_t*)task_state_segments[id].esp) = (size_t) arg; - task_state_segments[id].esp -= sizeof(size_t); - *((size_t*)task_state_segments[id].esp) = (size_t) leave_kernel_task; - */ - - /* setup for the kernel stack frame */ - task_state_segments[id].ss0 = 0x10; - task_state_segments[id].esp0 = (uint32_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t); + /* Set the task's stack pointer entry to the stack we have crafted right now. */ + task->stack = (size_t*)stack; return 0; } -#endif /* Setup a descriptor in the Global Descriptor Table */ static void gdt_set_gate(int num, unsigned long base, unsigned long limit, @@ -298,11 +211,7 @@ void gdt_install(void) { unsigned int i; -#ifdef SW_TASK_SWITCH memset(task_state_segments, 0x00, MAX_CORES*sizeof(tss_t)); -#else - memset(task_state_segments, 0x00, MAX_TASKS*sizeof(tss_t)); -#endif /* Setup the GDT pointer and limit */ gp.limit = (sizeof(gdt_entry_t) * GDT_ENTRIES) - 1; @@ -346,15 +255,14 @@ void gdt_install(void) /* * Create TSS for each task at ring0 (we use these segments for task switching) */ -#ifdef SW_TASK_SWITCH for(i=0; i #include #include +#include #include #include #include @@ -224,13 +225,15 @@ int irq_init(void) * controller (an IRQ from 8 to 15) gets an interrupt, you need to * acknowledge the interrupt at BOTH controllers, otherwise, you * only send an EOI command to the first controller. If you don't send - * an EOI, it won't raise any more IRQs.\n - * \n + * an EOI, it won't raise any more IRQs. + * * Note: If we enabled the APIC, we also disabled the PIC. Afterwards, * we get no interrupts between 0 and 15. */ -void irq_handler(struct state *s) +size_t** irq_handler(struct state *s) { + size_t** ret = NULL; + /* This is a blank function pointer */ void (*handler) (struct state * s); @@ -276,7 +279,9 @@ void irq_handler(struct state *s) leave_handler: // timer interrupt? if ((s->int_no == 32) || (s->int_no == 123)) - scheduler(); // switch to a new task + ret = scheduler(); // switch to a new task else if ((s->int_no >= 32) && (get_highest_priority() > per_core(current_task)->prio)) - scheduler(); + ret = scheduler(); + + return ret; } diff --git a/include/metalsvm/config.h.example b/include/metalsvm/config.h.example index fd940c97..d0c33b47 100644 --- a/include/metalsvm/config.h.example +++ b/include/metalsvm/config.h.example @@ -42,8 +42,6 @@ extern "C" { #define VIDEO_MEM_ADDR 0xB8000 // the video memora address #define SMP_SETUP_ADDR 0x07000 -#define SW_TASK_SWITCH - #define BYTE_ORDER LITTLE_ENDIAN /* diff --git a/include/metalsvm/config.inc.example b/include/metalsvm/config.inc.example new file mode 100644 index 00000000..2b919abc --- /dev/null +++ b/include/metalsvm/config.inc.example @@ -0,0 +1,4 @@ +; config macros for the assembler code + +; define the maximum number of core +%define MAX_CORES 1 diff --git a/include/metalsvm/tasks.h b/include/metalsvm/tasks.h index 63c8b3f4..4fb842bd 100644 --- a/include/metalsvm/tasks.h +++ b/include/metalsvm/tasks.h @@ -110,8 +110,12 @@ void load_balancing(void); /** @brief Task switcher * * Timer-interrupted use of this function for task switching + * + * @return + * - 0 no context switch + * - !0 address of the old stack pointer */ -void scheduler(void); +size_t** scheduler(void); /** @brief Wake up a blocked task * diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h index c59b16fb..2962d2f3 100644 --- a/include/metalsvm/tasks_types.h +++ b/include/metalsvm/tasks_types.h @@ -67,13 +67,12 @@ struct page_dir; /** @brief The task_t structure */ typedef struct task { -#ifdef SW_TASK_SWITCH - uint32_t stack; -#endif /// Task id = position in the task table tid_t id; /// Task status (INVALID, READY, RUNNING, ...) uint32_t status; + /// copy of the stack pointer before a context switch + size_t* stack; /// Additional status flags. For instance, to signalize the using of the FPU uint8_t flags; /// Task priority @@ -86,29 +85,29 @@ typedef struct task { struct task* prev; /// last core id on which the task was running uint32_t last_core; - /// Usage in number of pages + /// usage in number of pages atomic_int32_t user_usage; - /// Avoids concurrent access to the page directory + /// avoids concurrent access to the page directory spinlock_t pgd_lock; /// pointer to the page directory struct page_dir* pgd; - /// Lock for the VMA_list + /// lock for the VMA_list spinlock_t vma_lock; - /// List of VMAs + /// list of VMAs vma_t* vma_list; - /// Filedescriptor table + /// filedescriptor table filp_t* fildes_table; /// starting time/tick of the task uint64_t start_tick; - /// Start address of the heap + /// start address of the heap size_t start_heap; - /// End address of the heap + /// end address of the heap size_t end_heap; /// LwIP error code int lwip_err; - /// Mail inbox + /// mail inbox mailbox_wait_msg_t inbox; - /// Mail outbox array + /// mail outbox array mailbox_wait_msg_t* outbox[MAX_TASKS]; /// FPU state union fpu_state fpu; diff --git a/kernel/init.c b/kernel/init.c index 853a690e..e702a5c8 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -253,7 +253,7 @@ int initd(void* arg) #endif // start echo, netio and rlogind - //echo_init(); + echo_init(); create_user_task(&id, "/bin/rlogind", argv); kprintf("Create rlogind with id %u\n", id); //netio_init(); diff --git a/kernel/tasks.c b/kernel/tasks.c index 430b4cee..fdd97394 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -46,16 +46,9 @@ * * A task's id will be its position in this array. */ -#ifdef SW_TASK_SWITCH static task_t task_table[MAX_TASKS] = { \ - [0] = {0, 0, TASK_IDLE, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, NULL, 0, 0, 0, 0}, \ - [1 ... MAX_TASKS-1] = {0, 0, TASK_INVALID, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, NULL, 0, 0, 0, 0}}; -#else -static task_t task_table[MAX_TASKS] = { \ - [0] = {0, TASK_IDLE, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, NULL, 0, 0, 0, 0}, \ - [1 ... MAX_TASKS-1] = {0, TASK_INVALID, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, NULL, 0, 0, 0, 0}}; -#endif - + [0] = {0, TASK_IDLE, NULL, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, NULL, 0, 0, 0, 0}, \ + [1 ... MAX_TASKS-1] = {0, TASK_INVALID, NULL, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, NULL, 0, 0, 0, 0}}; static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT; #if MAX_CORES > 1 static runqueue_t runqueues[MAX_CORES] = { \ @@ -126,7 +119,7 @@ size_t get_idle_task(uint32_t id) #endif } -static void finish_task_switch(uint32_t irq) +void finish_task_switch(void) { uint8_t prio; uint32_t core_id = CORE_ID; @@ -148,9 +141,6 @@ static void finish_task_switch(uint32_t irq) runqueues[core_id].prio_bitmap |= (1 << prio); } spinlock_irqsave_unlock(&runqueues[core_id].lock); - - if (irq) - irq_enable(); } /** @brief Wakeup tasks which are waiting for a message from the current one @@ -220,7 +210,7 @@ static void NORETURN do_exit(int arg) { irq_nested_enable(flags); reschedule(); - + kprintf("Kernel panic: scheduler on core %d found no valid task\n", CORE_ID); while(1) { HALT; @@ -421,7 +411,6 @@ int sys_fork(void) // Leave the function without releasing the locks // because the locks are already released // by the parent task! - finish_task_switch(1); return 0; } @@ -456,8 +445,6 @@ static int STDCALL kernel_entry(void* args) int ret; kernel_args_t* kernel_args = (kernel_args_t*) args; - finish_task_switch(1); - if (BUILTIN_EXPECT(!kernel_args, 0)) return -EINVAL; @@ -738,8 +725,6 @@ static int STDCALL user_entry(void* arg) { int ret; - finish_task_switch(1); - if (BUILTIN_EXPECT(!arg, 0)) return -EINVAL; @@ -1273,7 +1258,7 @@ void load_balancing(void) } #endif -void scheduler(void) +size_t** scheduler(void) { task_t* orig_task; task_t* curr_task; @@ -1379,20 +1364,18 @@ get_task_out: } //kprintf("schedule from %u to %u with prio %u on core %u\n", orig_task->id, curr_task->id, (uint32_t)curr_task->prio, CORE_ID); -#ifndef SW_TASK_SWITCH - switch_task(curr_task->id); -#endif - finish_task_switch(0); -#ifdef SW_TASK_SWITCH - write_cr3(virt_to_phys((size_t)curr_task->pgd)); - sw_switch_context(&orig_task->stack, &curr_task->stack); -#endif + + return (size_t**) &(orig_task->stack); } + + return NULL; } void reschedule(void) { + size_t** stack; uint32_t flags = irq_nested_disable(); - scheduler(); + if ((stack = scheduler())) + switch_context(stack); irq_nested_enable(flags); } From 50b8068a782c93ff92db7caab6ba1f4c9a2b2a66 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Mon, 21 May 2012 12:16:19 -0700 Subject: [PATCH 09/12] fix bug in inline assembler --- arch/x86/kernel/gdt.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/gdt.c b/arch/x86/kernel/gdt.c index 62982b0a..8d8f680f 100644 --- a/arch/x86/kernel/gdt.c +++ b/arch/x86/kernel/gdt.c @@ -89,23 +89,23 @@ int arch_fork(task_t* task) esp += (uint32_t) kstacks[id]; state = (struct state*) (esp - sizeof(struct state) + 2*sizeof(size_t)); + memset(state, 0x00, sizeof(struct state) - 2*sizeof(size_t)); - asm volatile ("pusha"); - asm volatile ("pop %0" : "=r"(state->edi)); + asm volatile ("pusha; pop %0" : "=r"(state->edi)); asm volatile ("pop %0" : "=r"(state->esi)); asm volatile ("pop %0" : "=r"(state->ebp)); -#ifdef WITH_FRAME_POINTER - state->ebp -= (uint32_t) kstacks[curr_task->id]; - state->ebp += (uint32_t) kstacks[id]; -#endif asm volatile ("add $4, %%esp" ::: "%esp"); - state->esp = (uint32_t) state; - task->stack = (size_t*) state; asm volatile ("pop %0" : "=r"(state->ebx)); asm volatile ("pop %0" : "=r"(state->edx)); asm volatile ("pop %0" : "=r"(state->ecx)); asm volatile ("pop %0" : "=r"(state->eax)); +#ifdef WITH_FRAME_POINTER + state->ebp -= (uint32_t) kstacks[curr_task->id]; + state->ebp += (uint32_t) kstacks[id]; +#endif + state->esp = (uint32_t) state; + task->stack = (size_t*) state; state->int_no = 0xB16B00B5; state->error = 0xC03DB4B3; state->cs = cs; From f8058bbee2a0fa85bf69266656284817b761c9ab Mon Sep 17 00:00:00 2001 From: Jacek Galowicz Date: Tue, 22 May 2012 13:38:41 +0200 Subject: [PATCH 10/12] Made the makescript generate config.inc automatically. --- Makefile.example | 6 +++++- include/metalsvm/config.inc.example | 4 ---- 2 files changed, 5 insertions(+), 5 deletions(-) delete mode 100644 include/metalsvm/config.inc.example diff --git a/Makefile.example b/Makefile.example index 1219a476..ab12c773 100644 --- a/Makefile.example +++ b/Makefile.example @@ -109,8 +109,12 @@ veryclean: clean @echo [DEP] $*.dep $Q$(CPP_FOR_TARGET) -MF $*.dep -MT $*.o -MM -D__KERNEL__ $(CFLAGS) $< +include/metalsvm/config.inc: include/metalsvm/config.h + @echo "; This file is generated automatically from the config.h file." > include/metalsvm/config.inc + @echo "; Before editing this, you should consider editing config.h." >> include/metalsvm/config.inc + @awk '/^#define MAX_CORES/{ print "%define MAX_CORES", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc -%.o : %.asm +%.o : %.asm include/metalsvm/config.inc @echo [ASM] $@ $Q$(NASM) $(NASMFLAGS) -o $@ $< diff --git a/include/metalsvm/config.inc.example b/include/metalsvm/config.inc.example deleted file mode 100644 index 2b919abc..00000000 --- a/include/metalsvm/config.inc.example +++ /dev/null @@ -1,4 +0,0 @@ -; config macros for the assembler code - -; define the maximum number of core -%define MAX_CORES 1 From 181ae30e2cb97be94106f4fbe844dedd36ad6b9d Mon Sep 17 00:00:00 2001 From: Jacek Galowicz Date: Tue, 22 May 2012 13:44:33 +0200 Subject: [PATCH 11/12] Changed the .gitignore file to make git ignore config.inc --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 2ecbcd97..cae77b40 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ tags Makefile include/metalsvm/config.h +include/metalsvm/config.inc tools/make_initrd newlib/examples/hello newlib/examples/jacobi From fc0c903de31f3a1451e8727640f986cd45cc3ff4 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Wed, 23 May 2012 11:19:46 -0700 Subject: [PATCH 12/12] reduce overhead to determine TSS --- arch/x86/kernel/entry.asm | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/entry.asm b/arch/x86/kernel/entry.asm index f3c6a6d4..c9c0f4dc 100644 --- a/arch/x86/kernel/entry.asm +++ b/arch/x86/kernel/entry.asm @@ -799,15 +799,14 @@ common_switch: ; determine TSS %if MAX_CORES > 1 call apic_cpu_id + mov ecx, DWORD 0x68 + mul ecx %else xor eax, eax %endif - mov ecx, DWORD 0x68 - mul ecx add eax, task_state_segments - add eax, DWORD 4 ; set esp0 in TSS - mov [eax], esp + mov [eax+4], esp ; call cleanup code call finish_task_switch