mirror of
https://github.com/hermitcore/libhermit.git
synced 2025-03-09 00:00:03 +01:00
useage of GS selector to determine "per core data"
=> redesign of the system call interface => set RSP0 in TSS by a context switch
This commit is contained in:
parent
50bdff1a4f
commit
fed1d028a2
9 changed files with 128 additions and 83 deletions
|
@ -40,6 +40,28 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define per_core(var) ({ \
|
||||
typeof(var) ptr; \
|
||||
switch (sizeof(var)) { \
|
||||
case 4: \
|
||||
asm volatile ("movl %%gs:(" #var "), %0" : "=r"(ptr)); \
|
||||
break; \
|
||||
case 8: \
|
||||
asm volatile ("movq %%gs:(" #var "), %0" : "=r"(ptr)); \
|
||||
break; \
|
||||
} \
|
||||
ptr; })
|
||||
|
||||
#define per_core_set(var, value) ({ \
|
||||
switch (sizeof(var)) { \
|
||||
case 4: asm volatile ("movl %0, %%gs:(" #var ")" :: "r"(value)); \
|
||||
break; \
|
||||
case 8: \
|
||||
asm volatile ("movq %0, %%gs:(" #var ")" :: "r"(value)); \
|
||||
break; \
|
||||
} \
|
||||
})
|
||||
|
||||
#if __SIZEOF_POINTER__ == 4
|
||||
|
||||
#define KERNEL_SPACE (1UL << 30) /* 1 GiB */
|
||||
|
|
|
@ -525,16 +525,23 @@ extern irq_handler
|
|||
extern get_current_stack
|
||||
extern finish_task_switch
|
||||
extern syscall_handler
|
||||
extern get_kernel_stack
|
||||
extern kernel_stack
|
||||
|
||||
global isrsyscall
|
||||
align 8
|
||||
; used to realize system calls
|
||||
isrsyscall:
|
||||
; IF flag is already cleared => see processor.c
|
||||
; IF flag is already cleared
|
||||
; cli
|
||||
; save space for caller's red zone
|
||||
sub rsp, 128
|
||||
; only called from user space => get kernel-level selector
|
||||
swapgs
|
||||
; get kernel stack
|
||||
xchg rsp, [gs:kernel_stack]
|
||||
|
||||
; push old rsp and restore [gs:kernel_stack]
|
||||
push QWORD [gs:kernel_stack]
|
||||
mov QWORD [gs:kernel_stack], rsp
|
||||
|
||||
; save registers accross function call
|
||||
push r8
|
||||
push r9
|
||||
|
@ -546,27 +553,7 @@ isrsyscall:
|
|||
push rsi
|
||||
|
||||
; push system call number
|
||||
push rax
|
||||
|
||||
; get kernel stack
|
||||
call get_kernel_stack
|
||||
|
||||
; restore registers
|
||||
mov r8, [rsp+64]
|
||||
mov r9, [rsp+56]
|
||||
mov r10, [rsp+48]
|
||||
mov r11, [rsp+40]
|
||||
mov rdx, [rsp+32]
|
||||
; see below
|
||||
; mov rcx, [rsp+24]
|
||||
mov rdi, [rsp+16]
|
||||
mov rsi, [rsp+8]
|
||||
|
||||
xchg rsp, rax ; => rax contains pointer to the kernel stack
|
||||
push rax ; store user-level stack pointer
|
||||
|
||||
; restore system call number
|
||||
mov rax, [rax+0]
|
||||
; push rax
|
||||
|
||||
; syscall stores in rcx the return address
|
||||
; => using of r10 for the temporary storage of the 4th argument
|
||||
|
@ -578,12 +565,8 @@ isrsyscall:
|
|||
call [rax*8+syscall_table]
|
||||
cli
|
||||
|
||||
; restore user-level stack pointer
|
||||
pop r10
|
||||
mov rsp, r10
|
||||
|
||||
; restore registers
|
||||
add rsp, 8 ; ignore old value of rax
|
||||
;add rsp, 8 ; ignore old value of rax
|
||||
pop rsi
|
||||
pop rdi
|
||||
pop rcx
|
||||
|
@ -592,8 +575,12 @@ isrsyscall:
|
|||
pop r10
|
||||
pop r9
|
||||
pop r8
|
||||
; remove red zone
|
||||
add rsp, 128
|
||||
|
||||
; restore user-level stack
|
||||
mov rsp, [rsp]
|
||||
|
||||
; set user-level selector
|
||||
swapgs
|
||||
; EFLAGS (and IF flag) will be restored by sysret
|
||||
; sti
|
||||
o64 sysret
|
||||
|
@ -607,7 +594,7 @@ switch_context:
|
|||
push QWORD 0x10 ; SS
|
||||
push rsp ; RSP
|
||||
add QWORD [rsp], 0x08 ; => value of rsp before the creation of a pseudo interrupt
|
||||
pushfq ; RFLAGS
|
||||
push QWORD 0x1202 ; RFLAGS
|
||||
push QWORD 0x08 ; CS
|
||||
push QWORD rollback ; RIP
|
||||
push QWORD 0x00 ; Interrupt number
|
||||
|
@ -637,6 +624,11 @@ rollback:
|
|||
|
||||
align 8
|
||||
common_stub:
|
||||
; do we interrupt user-level code?
|
||||
cmp QWORD [rsp+24], 0x08
|
||||
je kernel_space1
|
||||
swapgs ; set GS to the kernel selector
|
||||
kernel_space1:
|
||||
push rax
|
||||
push rcx
|
||||
push rdx
|
||||
|
@ -662,13 +654,13 @@ common_stub:
|
|||
je no_context_switch
|
||||
|
||||
common_switch:
|
||||
mov [rax], rsp ; store old rsp
|
||||
mov QWORD [rax], rsp ; store old rsp
|
||||
call get_current_stack ; get new rsp
|
||||
xchg rax, rsp
|
||||
mov rsp, rax
|
||||
|
||||
; set task switched flag
|
||||
mov rax, cr0
|
||||
or eax, 8
|
||||
or rax, 8
|
||||
mov cr0, rax
|
||||
|
||||
; call cleanup code
|
||||
|
@ -692,6 +684,11 @@ no_context_switch:
|
|||
pop rcx
|
||||
pop rax
|
||||
|
||||
; do we interrupt user-level code?
|
||||
cmp QWORD [rsp+24], 0x08
|
||||
je kernel_space2
|
||||
swapgs ; set GS to the user-level selector
|
||||
kernel_space2:
|
||||
add rsp, 16
|
||||
iretq
|
||||
|
||||
|
|
|
@ -35,10 +35,10 @@
|
|||
#include <asm/tss.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
gdt_ptr_t gp;
|
||||
static tss_t task_state_segments[MAX_CORES] __attribute__ ((aligned (PAGE_SIZE)));
|
||||
gdt_ptr_t gp;
|
||||
tss_t task_state_segments[MAX_CORES] __attribute__ ((aligned (PAGE_SIZE)));
|
||||
// currently, our kernel has full access to the ioports
|
||||
static gdt_entry_t gdt[GDT_ENTRIES] = {[0 ... GDT_ENTRIES-1] = {0, 0, 0, 0, 0, 0}};
|
||||
static gdt_entry_t gdt[GDT_ENTRIES] = {[0 ... GDT_ENTRIES-1] = {0, 0, 0, 0, 0, 0}};
|
||||
|
||||
/*
|
||||
* This is defined in entry.asm. We use this to properly reload
|
||||
|
@ -48,13 +48,6 @@ extern void gdt_flush(void);
|
|||
|
||||
extern const void boot_stack;
|
||||
|
||||
size_t get_kernel_stack(void)
|
||||
{
|
||||
task_t* curr_task = per_core(current_task);
|
||||
|
||||
return (size_t) curr_task->stack + KERNEL_STACK_SIZE - 0x10; // => stack is 16byte aligned
|
||||
}
|
||||
|
||||
/* Setup a descriptor in the Global Descriptor Table */
|
||||
void gdt_set_gate(int num, unsigned long base, unsigned long limit,
|
||||
unsigned char access, unsigned char gran)
|
||||
|
|
|
@ -32,6 +32,14 @@
|
|||
#include <hermit/processor.h>
|
||||
#include <hermit/tasks.h>
|
||||
|
||||
/*
|
||||
* * Note that linker symbols are not variables, they have no memory allocated for
|
||||
* * maintaining a value, rather their address is their value.
|
||||
* */
|
||||
extern const void percore_start;
|
||||
extern const void percore_end0;
|
||||
extern const void percore_end;
|
||||
|
||||
extern void isrsyscall(void);
|
||||
|
||||
cpu_info_t cpu_info = { 0, 0, 0, 0, 0};
|
||||
|
@ -219,12 +227,21 @@ int cpu_detection(void) {
|
|||
wrmsr(MSR_STAR, (0x1BULL << 48) | (0x08ULL << 32));
|
||||
wrmsr(MSR_LSTAR, (size_t) &isrsyscall);
|
||||
// clear IF flag during an interrupt
|
||||
wrmsr(MSR_SYSCALL_MASK, (1 << 9));
|
||||
wrmsr(MSR_SYSCALL_MASK, EFLAGS_TF|EFLAGS_DF|EFLAGS_IF|EFLAGS_AC|EFLAGS_NT);
|
||||
} else kputs("Processor doesn't support syscalls\n");
|
||||
|
||||
if (has_nx())
|
||||
wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NXE);
|
||||
|
||||
wrmsr(MSR_FS_BASE, 0);
|
||||
wrmsr(MSR_GS_BASE, apic_cpu_id() * ((size_t) &percore_end0 - (size_t) &percore_start));
|
||||
wrmsr(MSR_KERNEL_GS_BASE, 0);
|
||||
|
||||
kprintf("Core %d set per_core offset to 0x%x\n", apic_cpu_id(), rdmsr(MSR_GS_BASE));
|
||||
|
||||
/* set core id to apic_cpu_id */
|
||||
per_core_set(__core_id, apic_cpu_id());
|
||||
|
||||
if (first_time && has_sse())
|
||||
wmb = sfence;
|
||||
|
||||
|
|
|
@ -36,11 +36,18 @@
|
|||
#include <hermit/vma.h>
|
||||
#include <asm/elf.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/tss.h>
|
||||
|
||||
extern tss_t task_state_segments[MAX_CORES] __attribute__ ((aligned (PAGE_SIZE)));
|
||||
|
||||
size_t* get_current_stack(void)
|
||||
{
|
||||
uint32_t core_id = CORE_ID;
|
||||
task_t* curr_task = per_core(current_task);
|
||||
|
||||
per_core_set(kernel_stack, curr_task->stack + KERNEL_STACK_SIZE - 0x10);
|
||||
task_state_segments[core_id].rsp0 = (size_t) curr_task->stack + KERNEL_STACK_SIZE - 0x10;
|
||||
|
||||
// use new page table
|
||||
write_cr3(curr_task->page_map);
|
||||
|
||||
|
@ -340,6 +347,8 @@ static int load_task(load_args_t* largs)
|
|||
|
||||
//vma_dump();
|
||||
|
||||
asm volatile ("swapgs");
|
||||
|
||||
jump_to_user_code(header.entry, stack+offset);
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -47,43 +47,22 @@ extern "C" {
|
|||
/// represents a task identifier
|
||||
typedef unsigned int tid_t;
|
||||
|
||||
#if MAX_CORES == 1
|
||||
#define per_core(name) name
|
||||
#define DECLARE_PER_CORE(type, name) extern type name;
|
||||
#define DEFINE_PER_CORE(type, name, def_value) type name = def_value;
|
||||
#define DEFINE_PER_CORE_STATIC(type, name, def_value) static type name = def_value;
|
||||
#define CORE_ID 0
|
||||
#else
|
||||
#define per_core(name) (*__get_percore_##name())
|
||||
#define DECLARE_PER_CORE(type, name) \
|
||||
typedef struct { type var __attribute__ ((aligned (CACHE_LINE))); } aligned_##name;\
|
||||
extern aligned_##name name[MAX_CORES];\
|
||||
inline static type* __get_percore_##name(void) {\
|
||||
type* ret; \
|
||||
uint8_t flags = irq_nested_disable(); \
|
||||
ret = &(name[smp_id()].var); \
|
||||
irq_nested_enable(flags);\
|
||||
return ret; \
|
||||
}
|
||||
#define DEFINE_PER_CORE(type, name, def_value) \
|
||||
aligned_##name name[MAX_CORES] = {[0 ... MAX_CORES-1] = {def_value}};
|
||||
#define DEFINE_PER_CORE_STATIC(type, name, def_value) \
|
||||
typedef struct { type var __attribute__ ((aligned (CACHE_LINE))); } aligned_##name;\
|
||||
static aligned_##name name[MAX_CORES] = {[0 ... MAX_CORES-1] = {def_value}}; \
|
||||
inline static type* __get_percore_##name(void) {\
|
||||
type* ret; \
|
||||
uint8_t flags = irq_nested_disable(); \
|
||||
ret = &(name[smp_id()].var); \
|
||||
irq_nested_enable(flags);\
|
||||
return ret; \
|
||||
}
|
||||
#define CORE_ID smp_id()
|
||||
#endif
|
||||
#define DECLARE_PER_CORE(type, name) extern type name __attribute__ ((section (".percore")));
|
||||
#define DEFINE_PER_CORE(type, name, def_value) type name __attribute__ ((section (".percore"))) = def_value;
|
||||
#define DEFINE_PER_CORE_STATIC(type, name, def_value) static type name __attribute__ ((section (".percore"))) = def_value;
|
||||
|
||||
/* needed to find the task, which is currently running on this core */
|
||||
struct task;
|
||||
DECLARE_PER_CORE(struct task*, current_task);
|
||||
|
||||
/* allows fast access to the kernel stack */
|
||||
DECLARE_PER_CORE(char*, kernel_stack);
|
||||
|
||||
/* allows fast access to the core id */
|
||||
DECLARE_PER_CORE(uint32_t, __core_id);
|
||||
|
||||
#define CORE_ID per_core(__core_id)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -58,6 +58,9 @@ extern const void kernel_start;
|
|||
extern const void kernel_end;
|
||||
extern const void bss_start;
|
||||
extern const void bss_end;
|
||||
extern const void percore_start;
|
||||
extern const void percore_end0;
|
||||
extern const void percore_end;
|
||||
extern char __BUILD_DATE;
|
||||
|
||||
/* Page frame counters */
|
||||
|
@ -81,9 +84,16 @@ static int foo(void* arg)
|
|||
|
||||
static int hermit_init(void)
|
||||
{
|
||||
uint32_t i;
|
||||
size_t sz = (size_t) &percore_end0 - (size_t) &percore_start;
|
||||
|
||||
// initialize .bss section
|
||||
memset((void*)&bss_start, 0x00, ((size_t) &bss_end - (size_t) &bss_start));
|
||||
|
||||
// initialize .percore section => copy first section to all other sections
|
||||
for(i=1; i<MAX_CORES; i++)
|
||||
memcpy((char*) &percore_start + i*sz, (char*) &percore_start, sz);
|
||||
|
||||
koutput_init();
|
||||
system_init();
|
||||
irq_init();
|
||||
|
@ -165,6 +175,8 @@ int main(void)
|
|||
|
||||
kprintf("This is Hermit %s, build date %u\n", VERSION, &__BUILD_DATE);
|
||||
kprintf("Kernel starts at %p and ends at %p\n", &kernel_start, &kernel_end);
|
||||
kprintf("Per core data starts at %p and ends at %p\n", &percore_start, &percore_end);
|
||||
kprintf("Per core size 0x%llx\n", (size_t) &percore_end0 - (size_t) &percore_start);
|
||||
kprintf("Processor frequency: %u MHz\n", get_cpu_frequency());
|
||||
kprintf("Total memory: %lu KiB\n", atomic_int32_read(&total_pages) * PAGE_SIZE / 1024);
|
||||
kprintf("Current allocated memory: %lu KiB\n", atomic_int32_read(&total_allocated_pages) * PAGE_SIZE / 1024);
|
||||
|
|
|
@ -55,6 +55,8 @@ static readyqueues_t readyqueues[1] = {[0] = {task_table+0, NULL, 0, 0, 0, {[0 .
|
|||
#endif
|
||||
|
||||
DEFINE_PER_CORE(task_t*, current_task, task_table+0);
|
||||
DEFINE_PER_CORE(char*, kernel_stack, NULL);
|
||||
DEFINE_PER_CORE(uint32_t, __core_id, 0);
|
||||
extern const void boot_stack;
|
||||
|
||||
/** @brief helper function for the assembly code to determine the current task
|
||||
|
@ -89,6 +91,8 @@ int multitasking_init(void)
|
|||
|
||||
task_table[0].prio = IDLE_PRIO;
|
||||
task_table[0].stack = (char*) ((size_t)&boot_stack + core_id * KERNEL_STACK_SIZE);
|
||||
per_core_set(kernel_stack, task_table[0].stack + KERNEL_STACK_SIZE - 0x10);
|
||||
per_core_set(current_task, task_table+0);
|
||||
task_table[0].page_map = read_cr3();
|
||||
|
||||
readyqueues[core_id].idle = task_table+0;
|
||||
|
@ -136,6 +140,7 @@ int set_idle_task(void)
|
|||
task_table[i].last_core = core_id;
|
||||
task_table[i].last_stack_pointer = NULL;
|
||||
task_table[i].stack = (char*) ((size_t)&boot_stack + core_id * KERNEL_STACK_SIZE);
|
||||
per_core_set(kernel_stack, task_table[i].stack + KERNEL_STACK_SIZE - 0x10);
|
||||
task_table[i].prio = IDLE_PRIO;
|
||||
spinlock_init(&task_table[i].vma_lock);
|
||||
task_table[i].vma_list = NULL;
|
||||
|
@ -143,7 +148,8 @@ int set_idle_task(void)
|
|||
spinlock_irqsave_init(&task_table[i].page_lock);
|
||||
atomic_int32_set(&task_table[i].user_usage, 0);
|
||||
task_table[i].page_map = read_cr3();
|
||||
per_core(current_task) = readyqueues[core_id].idle = task_table+i;
|
||||
readyqueues[core_id].idle = task_table+i;
|
||||
per_core_set(current_task, readyqueues[core_id].idle);
|
||||
ret = 0;
|
||||
|
||||
break;
|
||||
|
@ -594,7 +600,8 @@ size_t** scheduler(void)
|
|||
if (prio > MAX_PRIO) {
|
||||
if ((curr_task->status == TASK_RUNNING) || (curr_task->status == TASK_IDLE))
|
||||
goto get_task_out;
|
||||
curr_task = per_core(current_task) = readyqueues[core_id].idle;
|
||||
curr_task = readyqueues[core_id].idle;
|
||||
per_core_set(current_task, curr_task);
|
||||
} else {
|
||||
// Does the current task have an higher priority? => no task switch
|
||||
if ((curr_task->prio > prio) && (curr_task->status == TASK_RUNNING))
|
||||
|
@ -605,7 +612,8 @@ size_t** scheduler(void)
|
|||
readyqueues[core_id].old_task = curr_task;
|
||||
}
|
||||
|
||||
curr_task = per_core(current_task) = readyqueues[core_id].queue[prio-1].first;
|
||||
curr_task = readyqueues[core_id].queue[prio-1].first;
|
||||
per_core_set(current_task, curr_task);
|
||||
if (BUILTIN_EXPECT(curr_task->status == TASK_INVALID, 0)) {
|
||||
kprintf("Upps!!!!!!! Got invalid task %d, orig task %d\n", curr_task->id, orig_task->id);
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ OUTPUT_FORMAT("elf64-x86-64")
|
|||
OUTPUT_ARCH("i386:x86-64")
|
||||
ENTRY(start)
|
||||
phys = (2 << 20);
|
||||
cores = 8;
|
||||
|
||||
SECTIONS
|
||||
{
|
||||
|
@ -31,5 +32,12 @@ SECTIONS
|
|||
*(.bss)
|
||||
}
|
||||
bss_end = .;
|
||||
kernel_end = .;
|
||||
.percore ALIGN(4096) : AT(ADDR(.percore)) {
|
||||
percore_start = .;
|
||||
*(.percore)
|
||||
. = ALIGN(64);
|
||||
percore_end0 = .;
|
||||
}
|
||||
percore_end = percore_start + cores * SIZEOF(.percore);
|
||||
kernel_end = percore_end;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue