/* * Copyright 2010 Stefan Lankes, Chair for Operating Systems, * RWTH Aachen University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * This file is part of MetalSVM. */ #include #include #include #include #include #include #include #include gdt_ptr_t gp; tss_t task_state_segments[MAX_CORES] __attribute__ ((aligned (PAGE_SIZE))); static unsigned char kstacks[MAX_TASKS][KERNEL_STACK_SIZE] __attribute__ ((aligned (PAGE_SIZE))) = {[0 ... MAX_TASKS-1][0 ... KERNEL_STACK_SIZE-1] = 0xCD}; size_t default_stack_pointer = (size_t) kstacks[0] + KERNEL_STACK_SIZE - sizeof(size_t); // currently, our kernel has full access to the ioports static gdt_entry_t gdt[GDT_ENTRIES] = {[0 ... GDT_ENTRIES-1] = {0, 0, 0, 0, 0, 0}}; /* * This is defined in entry.asm. We use this to properly reload * the new segment registers */ extern void gdt_flush(void); /* * This is defined in entry.asm. We use this for a * hardware-based task switch. */ extern void tss_switch(uint32_t id); size_t* get_current_stack(void) { task_t* curr_task = per_core(current_task); write_cr3(virt_to_phys((size_t)curr_task->pgd)); return curr_task->stack; } size_t get_stack(uint32_t id) { if (BUILTIN_EXPECT(id >= MAX_TASKS, 0)) return -EINVAL; return (size_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t); } int register_task(task_t* task) { uint16_t sel; sel = (CORE_ID+5) << 3; asm volatile ("mov %0, %%ax; ltr %%ax" : : "ir"(sel) : "%eax"); return 0; } int arch_fork(task_t* task) { uint16_t cs = 0x08; uint32_t id, esp; struct state* state; task_t* curr_task = per_core(current_task); if (BUILTIN_EXPECT(!task, 0)) return -EINVAL; id = task->id; // copy kernel stack of the current task memcpy(kstacks[id], kstacks[curr_task->id], KERNEL_STACK_SIZE); #ifdef CONFIG_X86_32 asm volatile ("mov %%esp, %0" : "=r"(esp)); esp -= (uint32_t) kstacks[curr_task->id]; esp += (uint32_t) kstacks[id]; state = (struct state*) (esp - sizeof(struct state) + 2*sizeof(size_t)); memset(state, 0x00, sizeof(struct state) - 2*sizeof(size_t)); asm volatile ("pusha; pop %0" : "=r"(state->edi)); asm volatile ("pop %0" : "=r"(state->esi)); asm volatile ("pop %0" : "=r"(state->ebp)); asm volatile ("add $4, %%esp" ::: "%esp"); asm volatile ("pop %0" : "=r"(state->ebx)); asm volatile ("pop %0" : "=r"(state->edx)); asm volatile ("pop %0" : "=r"(state->ecx)); asm volatile ("pop %0" : "=r"(state->eax)); #ifdef WITH_FRAME_POINTER state->ebp -= (uint32_t) kstacks[curr_task->id]; state->ebp += (uint32_t) kstacks[id]; #endif state->esp = (uint32_t) state; task->stack = (size_t*) state; state->int_no = 0xB16B00B5; state->error = 0xC03DB4B3; state->cs = cs; // store the current EFLAGS asm volatile ("pushf; pop %%eax" : "=a"(state->eflags)); // enable interrupts state->eflags |= (1 << 9); // This will be the entry point for the new task. asm volatile ("call read_eip" : "=a"(state->eip)); #else #warning Currently, not supported! return -1; #endif return 0; } int create_default_frame(task_t* task, entry_point_t ep, void* arg) { uint16_t cs = 0x08; uint32_t id; size_t *stack; struct state *stptr; size_t state_size; if (BUILTIN_EXPECT(!task, 0)) return -EINVAL; id = task->id; memset(kstacks[id], 0xCD, KERNEL_STACK_SIZE); /* The difference between setting up a task for SW-task-switching * and not for HW-task-switching is setting up a stack and not a TSS. * This is the stack which will be activated and popped off for iret later. */ stack = (size_t*) (kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t)); /* The next three things on the stack are a marker for debugging purposes, ... */ *stack-- = 0xDEADBEEF; #ifdef CONFIG_X86_32 /* the first-function-to-be-called's arguments, ... */ *stack-- = (size_t) arg; #endif /* and the "caller" we shall return to. * This procedure cleans the task after exit. */ *stack = (size_t) leave_kernel_task; /* Next bunch on the stack is the initial register state. * The stack must look like the stack of a task which was * scheduled away previously. */ /* In 64bit mode, he stack pointer (SS:RSP) is pushed unconditionally on interrupts. * In legacy modes, this push is conditional and based on a change in current privilege level (CPL).*/ #ifdef CONFIG_X86_32 state_size = sizeof(struct state) - 2*sizeof(size_t); #else state_size = sizeof(struct state); #endif stack = (size_t*) ((size_t) stack - state_size); stptr = (struct state *) stack; memset(stptr, 0x00, state_size); #ifdef CONFIG_X86_32 stptr->esp = (size_t)stack + state_size; #else stptr->rsp = (size_t)stack + state_size; /* the first-function-to-be-called's arguments, ... */ stptr->rdi = (size_t) arg; #endif stptr->int_no = 0xB16B00B5; stptr->error = 0xC03DB4B3; /* The instruction pointer shall be set on the first function to be called * after IRETing */ #ifdef CONFIG_X86_32 stptr->eip = (size_t)ep; #else stptr->rip = (size_t)ep; #endif stptr->cs = cs; #ifdef CONFIG_X86_32 stptr->eflags = 0x1202; // the creation of a kernel tasks didn't change the IOPL level // => useresp & ss is not required #else stptr->rflags = 0x1202; stptr->ss = 0x10; stptr->userrsp = stptr->rsp; #endif /* Set the task's stack pointer entry to the stack we have crafted right now. */ task->stack = (size_t*)stack; return 0; } /** @brief Configures GDT descriptor with chosen attributes * * Just feed this function with address, limit and the flags * you have seen in gdt.h */ static void gdt_set_gate(int num, unsigned long base, unsigned long limit, unsigned char access, unsigned char gran) { /* Setup the descriptor base address */ gdt[num].base_low = (base & 0xFFFF); gdt[num].base_middle = (base >> 16) & 0xFF; gdt[num].base_high = (base >> 24) & 0xFF; /* Setup the descriptor limits */ gdt[num].limit_low = (limit & 0xFFFF); gdt[num].granularity = ((limit >> 16) & 0x0F); /* Finally, set up the granularity and access flags */ gdt[num].granularity |= (gran & 0xF0); gdt[num].access = access; } /* * This will setup the special GDT * pointer, set up the entries in our GDT, and then * finally call gdt_flush() in our assembler file in order * to tell the processor where the new GDT is and update the * new segment registers */ void gdt_install(void) { unsigned int i, mode; memset(task_state_segments, 0x00, MAX_CORES*sizeof(tss_t)); #ifdef CONFIG_X86_32 mode = GDT_FLAG_32_BIT; #elif defined(CONFIG_X86_64) mode = GDT_FLAG_64_BIT; #else #error invalid mode #endif /* Setup the GDT pointer and limit */ gp.limit = (sizeof(gdt_entry_t) * GDT_ENTRIES) - 1; gp.base = (size_t) &gdt; /* Our NULL descriptor */ gdt_set_gate(0, 0, 0, 0, 0); /* * The second entry is our Code Segment. The base address * is 0, the limit is 4 GByte, it uses 4KByte granularity, * uses 32-bit opcodes, and is a Code Segment descriptor. */ gdt_set_gate(1, 0, 0xFFFFFFFF, GDT_FLAG_RING0 | GDT_FLAG_SEGMENT | GDT_FLAG_CODESEG | GDT_FLAG_PRESENT, GDT_FLAG_4K_GRAN | mode); /* * The third entry is our Data Segment. It's EXACTLY the * same as our code segment, but the descriptor type in * this entry's access byte says it's a Data Segment */ gdt_set_gate(2, 0, 0xFFFFFFFF, GDT_FLAG_RING0 | GDT_FLAG_SEGMENT | GDT_FLAG_DATASEG | GDT_FLAG_PRESENT, GDT_FLAG_4K_GRAN | mode); /* * Create code segement for userspace applications (ring 3) */ gdt_set_gate(3, 0, 0xFFFFFFFF, GDT_FLAG_RING3 | GDT_FLAG_SEGMENT | GDT_FLAG_CODESEG | GDT_FLAG_PRESENT, GDT_FLAG_4K_GRAN | mode); /* * Create data segement for userspace applications (ring 3) */ gdt_set_gate(4, 0, 0xFFFFFFFF, GDT_FLAG_RING3 | GDT_FLAG_SEGMENT | GDT_FLAG_DATASEG | GDT_FLAG_PRESENT, GDT_FLAG_4K_GRAN | mode); /* * Create TSS for each task at ring0 (we use these segments for task switching) */ for(i=0; i