diff --git a/hermit/arch/x86/include/asm/stddef.h b/hermit/arch/x86/include/asm/stddef.h index b95f5f1c7..e2d616705 100644 --- a/hermit/arch/x86/include/asm/stddef.h +++ b/hermit/arch/x86/include/asm/stddef.h @@ -40,6 +40,28 @@ extern "C" { #endif +#define per_core(var) ({ \ + typeof(var) ptr; \ + switch (sizeof(var)) { \ + case 4: \ + asm volatile ("movl %%gs:(" #var "), %0" : "=r"(ptr)); \ + break; \ + case 8: \ + asm volatile ("movq %%gs:(" #var "), %0" : "=r"(ptr)); \ + break; \ + } \ + ptr; }) + +#define per_core_set(var, value) ({ \ + switch (sizeof(var)) { \ + case 4: asm volatile ("movl %0, %%gs:(" #var ")" :: "r"(value)); \ + break; \ + case 8: \ + asm volatile ("movq %0, %%gs:(" #var ")" :: "r"(value)); \ + break; \ + } \ + }) + #if __SIZEOF_POINTER__ == 4 #define KERNEL_SPACE (1UL << 30) /* 1 GiB */ diff --git a/hermit/arch/x86/kernel/entry.asm b/hermit/arch/x86/kernel/entry.asm index f03e0a152..4fd1fe550 100644 --- a/hermit/arch/x86/kernel/entry.asm +++ b/hermit/arch/x86/kernel/entry.asm @@ -525,16 +525,23 @@ extern irq_handler extern get_current_stack extern finish_task_switch extern syscall_handler -extern get_kernel_stack +extern kernel_stack global isrsyscall align 8 ; used to realize system calls isrsyscall: - ; IF flag is already cleared => see processor.c + ; IF flag is already cleared ; cli - ; save space for caller's red zone - sub rsp, 128 + ; only called from user space => get kernel-level selector + swapgs + ; get kernel stack + xchg rsp, [gs:kernel_stack] + + ; push old rsp and restore [gs:kernel_stack] + push QWORD [gs:kernel_stack] + mov QWORD [gs:kernel_stack], rsp + ; save registers accross function call push r8 push r9 @@ -546,27 +553,7 @@ isrsyscall: push rsi ; push system call number - push rax - - ; get kernel stack - call get_kernel_stack - - ; restore registers - mov r8, [rsp+64] - mov r9, [rsp+56] - mov r10, [rsp+48] - mov r11, [rsp+40] - mov rdx, [rsp+32] - ; see below - ; mov rcx, [rsp+24] - mov rdi, [rsp+16] - mov rsi, [rsp+8] - - xchg rsp, rax ; => rax contains pointer to the kernel stack - push rax ; store user-level stack pointer - - ; restore system call number - mov rax, [rax+0] + ; push rax ; syscall stores in rcx the return address ; => using of r10 for the temporary storage of the 4th argument @@ -578,12 +565,8 @@ isrsyscall: call [rax*8+syscall_table] cli - ; restore user-level stack pointer - pop r10 - mov rsp, r10 - ; restore registers - add rsp, 8 ; ignore old value of rax + ;add rsp, 8 ; ignore old value of rax pop rsi pop rdi pop rcx @@ -592,8 +575,12 @@ isrsyscall: pop r10 pop r9 pop r8 - ; remove red zone - add rsp, 128 + + ; restore user-level stack + mov rsp, [rsp] + + ; set user-level selector + swapgs ; EFLAGS (and IF flag) will be restored by sysret ; sti o64 sysret @@ -607,7 +594,7 @@ switch_context: push QWORD 0x10 ; SS push rsp ; RSP add QWORD [rsp], 0x08 ; => value of rsp before the creation of a pseudo interrupt - pushfq ; RFLAGS + push QWORD 0x1202 ; RFLAGS push QWORD 0x08 ; CS push QWORD rollback ; RIP push QWORD 0x00 ; Interrupt number @@ -637,6 +624,11 @@ rollback: align 8 common_stub: + ; do we interrupt user-level code? + cmp QWORD [rsp+24], 0x08 + je kernel_space1 + swapgs ; set GS to the kernel selector +kernel_space1: push rax push rcx push rdx @@ -662,13 +654,13 @@ common_stub: je no_context_switch common_switch: - mov [rax], rsp ; store old rsp + mov QWORD [rax], rsp ; store old rsp call get_current_stack ; get new rsp - xchg rax, rsp + mov rsp, rax ; set task switched flag mov rax, cr0 - or eax, 8 + or rax, 8 mov cr0, rax ; call cleanup code @@ -692,6 +684,11 @@ no_context_switch: pop rcx pop rax +; do we interrupt user-level code? + cmp QWORD [rsp+24], 0x08 + je kernel_space2 + swapgs ; set GS to the user-level selector +kernel_space2: add rsp, 16 iretq diff --git a/hermit/arch/x86/kernel/gdt.c b/hermit/arch/x86/kernel/gdt.c index 7b93aaa22..0bbf5f941 100644 --- a/hermit/arch/x86/kernel/gdt.c +++ b/hermit/arch/x86/kernel/gdt.c @@ -35,10 +35,10 @@ #include #include -gdt_ptr_t gp; -static tss_t task_state_segments[MAX_CORES] __attribute__ ((aligned (PAGE_SIZE))); +gdt_ptr_t gp; +tss_t task_state_segments[MAX_CORES] __attribute__ ((aligned (PAGE_SIZE))); // currently, our kernel has full access to the ioports -static gdt_entry_t gdt[GDT_ENTRIES] = {[0 ... GDT_ENTRIES-1] = {0, 0, 0, 0, 0, 0}}; +static gdt_entry_t gdt[GDT_ENTRIES] = {[0 ... GDT_ENTRIES-1] = {0, 0, 0, 0, 0, 0}}; /* * This is defined in entry.asm. We use this to properly reload @@ -48,13 +48,6 @@ extern void gdt_flush(void); extern const void boot_stack; -size_t get_kernel_stack(void) -{ - task_t* curr_task = per_core(current_task); - - return (size_t) curr_task->stack + KERNEL_STACK_SIZE - 0x10; // => stack is 16byte aligned -} - /* Setup a descriptor in the Global Descriptor Table */ void gdt_set_gate(int num, unsigned long base, unsigned long limit, unsigned char access, unsigned char gran) diff --git a/hermit/arch/x86/kernel/processor.c b/hermit/arch/x86/kernel/processor.c index fd41d861e..24c0a666f 100644 --- a/hermit/arch/x86/kernel/processor.c +++ b/hermit/arch/x86/kernel/processor.c @@ -32,6 +32,14 @@ #include #include +/* + * * Note that linker symbols are not variables, they have no memory allocated for + * * maintaining a value, rather their address is their value. + * */ +extern const void percore_start; +extern const void percore_end0; +extern const void percore_end; + extern void isrsyscall(void); cpu_info_t cpu_info = { 0, 0, 0, 0, 0}; @@ -219,12 +227,21 @@ int cpu_detection(void) { wrmsr(MSR_STAR, (0x1BULL << 48) | (0x08ULL << 32)); wrmsr(MSR_LSTAR, (size_t) &isrsyscall); // clear IF flag during an interrupt - wrmsr(MSR_SYSCALL_MASK, (1 << 9)); + wrmsr(MSR_SYSCALL_MASK, EFLAGS_TF|EFLAGS_DF|EFLAGS_IF|EFLAGS_AC|EFLAGS_NT); } else kputs("Processor doesn't support syscalls\n"); if (has_nx()) wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NXE); + wrmsr(MSR_FS_BASE, 0); + wrmsr(MSR_GS_BASE, apic_cpu_id() * ((size_t) &percore_end0 - (size_t) &percore_start)); + wrmsr(MSR_KERNEL_GS_BASE, 0); + + kprintf("Core %d set per_core offset to 0x%x\n", apic_cpu_id(), rdmsr(MSR_GS_BASE)); + + /* set core id to apic_cpu_id */ + per_core_set(__core_id, apic_cpu_id()); + if (first_time && has_sse()) wmb = sfence; diff --git a/hermit/arch/x86/kernel/tasks.c b/hermit/arch/x86/kernel/tasks.c index d32bc5c4c..8fdcc4e2b 100644 --- a/hermit/arch/x86/kernel/tasks.c +++ b/hermit/arch/x86/kernel/tasks.c @@ -36,11 +36,18 @@ #include #include #include +#include + +extern tss_t task_state_segments[MAX_CORES] __attribute__ ((aligned (PAGE_SIZE))); size_t* get_current_stack(void) { + uint32_t core_id = CORE_ID; task_t* curr_task = per_core(current_task); + per_core_set(kernel_stack, curr_task->stack + KERNEL_STACK_SIZE - 0x10); + task_state_segments[core_id].rsp0 = (size_t) curr_task->stack + KERNEL_STACK_SIZE - 0x10; + // use new page table write_cr3(curr_task->page_map); @@ -340,6 +347,8 @@ static int load_task(load_args_t* largs) //vma_dump(); + asm volatile ("swapgs"); + jump_to_user_code(header.entry, stack+offset); return 0; diff --git a/hermit/include/hermit/stddef.h b/hermit/include/hermit/stddef.h index 1330cd2d3..0d4c460f1 100644 --- a/hermit/include/hermit/stddef.h +++ b/hermit/include/hermit/stddef.h @@ -47,43 +47,22 @@ extern "C" { /// represents a task identifier typedef unsigned int tid_t; -#if MAX_CORES == 1 -#define per_core(name) name -#define DECLARE_PER_CORE(type, name) extern type name; -#define DEFINE_PER_CORE(type, name, def_value) type name = def_value; -#define DEFINE_PER_CORE_STATIC(type, name, def_value) static type name = def_value; -#define CORE_ID 0 -#else -#define per_core(name) (*__get_percore_##name()) -#define DECLARE_PER_CORE(type, name) \ - typedef struct { type var __attribute__ ((aligned (CACHE_LINE))); } aligned_##name;\ - extern aligned_##name name[MAX_CORES];\ - inline static type* __get_percore_##name(void) {\ - type* ret; \ - uint8_t flags = irq_nested_disable(); \ - ret = &(name[smp_id()].var); \ - irq_nested_enable(flags);\ - return ret; \ - } -#define DEFINE_PER_CORE(type, name, def_value) \ - aligned_##name name[MAX_CORES] = {[0 ... MAX_CORES-1] = {def_value}}; -#define DEFINE_PER_CORE_STATIC(type, name, def_value) \ - typedef struct { type var __attribute__ ((aligned (CACHE_LINE))); } aligned_##name;\ - static aligned_##name name[MAX_CORES] = {[0 ... MAX_CORES-1] = {def_value}}; \ - inline static type* __get_percore_##name(void) {\ - type* ret; \ - uint8_t flags = irq_nested_disable(); \ - ret = &(name[smp_id()].var); \ - irq_nested_enable(flags);\ - return ret; \ - } -#define CORE_ID smp_id() -#endif +#define DECLARE_PER_CORE(type, name) extern type name __attribute__ ((section (".percore"))); +#define DEFINE_PER_CORE(type, name, def_value) type name __attribute__ ((section (".percore"))) = def_value; +#define DEFINE_PER_CORE_STATIC(type, name, def_value) static type name __attribute__ ((section (".percore"))) = def_value; /* needed to find the task, which is currently running on this core */ struct task; DECLARE_PER_CORE(struct task*, current_task); +/* allows fast access to the kernel stack */ +DECLARE_PER_CORE(char*, kernel_stack); + +/* allows fast access to the core id */ +DECLARE_PER_CORE(uint32_t, __core_id); + +#define CORE_ID per_core(__core_id) + #ifdef __cplusplus } #endif diff --git a/hermit/kernel/main.c b/hermit/kernel/main.c index d918480a8..6838b2f86 100644 --- a/hermit/kernel/main.c +++ b/hermit/kernel/main.c @@ -58,6 +58,9 @@ extern const void kernel_start; extern const void kernel_end; extern const void bss_start; extern const void bss_end; +extern const void percore_start; +extern const void percore_end0; +extern const void percore_end; extern char __BUILD_DATE; /* Page frame counters */ @@ -81,9 +84,16 @@ static int foo(void* arg) static int hermit_init(void) { + uint32_t i; + size_t sz = (size_t) &percore_end0 - (size_t) &percore_start; + // initialize .bss section memset((void*)&bss_start, 0x00, ((size_t) &bss_end - (size_t) &bss_start)); + // initialize .percore section => copy first section to all other sections + for(i=1; i MAX_PRIO) { if ((curr_task->status == TASK_RUNNING) || (curr_task->status == TASK_IDLE)) goto get_task_out; - curr_task = per_core(current_task) = readyqueues[core_id].idle; + curr_task = readyqueues[core_id].idle; + per_core_set(current_task, curr_task); } else { // Does the current task have an higher priority? => no task switch if ((curr_task->prio > prio) && (curr_task->status == TASK_RUNNING)) @@ -605,7 +612,8 @@ size_t** scheduler(void) readyqueues[core_id].old_task = curr_task; } - curr_task = per_core(current_task) = readyqueues[core_id].queue[prio-1].first; + curr_task = readyqueues[core_id].queue[prio-1].first; + per_core_set(current_task, curr_task); if (BUILTIN_EXPECT(curr_task->status == TASK_INVALID, 0)) { kprintf("Upps!!!!!!! Got invalid task %d, orig task %d\n", curr_task->id, orig_task->id); } diff --git a/hermit/link.ld b/hermit/link.ld index 4f3307165..85d8c7d16 100644 --- a/hermit/link.ld +++ b/hermit/link.ld @@ -2,6 +2,7 @@ OUTPUT_FORMAT("elf64-x86-64") OUTPUT_ARCH("i386:x86-64") ENTRY(start) phys = (2 << 20); +cores = 8; SECTIONS { @@ -31,5 +32,12 @@ SECTIONS *(.bss) } bss_end = .; - kernel_end = .; + .percore ALIGN(4096) : AT(ADDR(.percore)) { + percore_start = .; + *(.percore) + . = ALIGN(64); + percore_end0 = .; + } + percore_end = percore_start + cores * SIZEOF(.percore); + kernel_end = percore_end; }