diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 5246fc4f..faea9d96 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -228,7 +228,7 @@ static inline void tlb_flush(void) static inline uint32_t read_eflags(void) { uint32_t result; - asm volatile ("pushf; popl %%eax" : "=a"(result) :: "memory"); + asm volatile ("pushf; pop %%eax" : "=a"(result)); return result; } diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 151b9143..d35ae989 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -56,6 +56,7 @@ static apic_mp_t* apic_mp = NULL; static apic_config_table_t* apic_config = NULL; static uint32_t lapic = 0; static volatile ioapic_t* ioapic = NULL; +static uint32_t icr = 0; static uint32_t ncores = 1; static uint8_t irq_redirect[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF}; #if MAX_CORES > 1 @@ -256,19 +257,28 @@ int smp_init(void) static int lapic_reset(void) { - uint32_t max_lvt = apic_lvt_entries(); + uint32_t max_lvt; + + if (!lapic) + return -ENXIO; + + max_lvt = apic_lvt_entries(); lapic_write(APIC_SVR, 0x17F); // enable the apic and connect to the idt entry 127 lapic_write(APIC_TPR, 0x00); // allow all interrupts - lapic_write(APIC_LVT_T, 0x10000); // disable timer interrupt + if (icr) { + lapic_write(APIC_DCR, 0xB); // set it to 1 clock increments + lapic_write(APIC_LVT_T, 0x2007B); // connects the timer to 123 and enables it + lapic_write(APIC_ICR, icr); + } else + lapic_write(APIC_LVT_T, 0x10000); // disable timer interrupt if (max_lvt >= 4) - lapic_write(APIC_LVT_TSR, 0x10000); // disable thermal sensor interrupt + lapic_write(APIC_LVT_TSR, 0x10000); // disable thermal sensor interrupt if (max_lvt >= 5) - lapic_write(APIC_LVT_PMC, 0x10000); // disable performance counter interrupt + lapic_write(APIC_LVT_PMC, 0x10000); // disable performance counter interrupt lapic_write(APIC_LINT0, 0x7C); // connect LINT0 to idt entry 124 lapic_write(APIC_LINT1, 0x7D); // connect LINT1 to idt entry 125 lapic_write(APIC_LVT_ER, 0x7E); // connect error to idt entry 126 - lapic_write(APIC_ID, 0x00); // reset boot processor id return 0; } @@ -328,13 +338,10 @@ int apic_calibration(void) HALT; diff = 0xFFFFFFFFUL - lapic_read(APIC_CCR); - diff = diff / 3; + icr = diff / 3; flags = irq_nested_disable(); lapic_reset(); - lapic_write(APIC_DCR, 0xB); // set it to 1 clock increments - lapic_write(APIC_LVT_T, 0x2007B); // connects the timer to 123 and enables it - lapic_write(APIC_ICR, diff); irq_nested_enable(flags); // Now, MetalSVM is able to use the APIC => Therefore, we disable the PIC @@ -353,26 +360,23 @@ int apic_calibration(void) lapic_write(APIC_ICR, 0xFFFFFFFFUL); /* wait 3 time slices to determine a ICR */ - rmb(); + mb(); start = rdtsc(); do { - rmb(); + mb(); end = rdtsc(); ticks = end > start ? end - start : start - end; } while(ticks*TIMER_FREQ < 3*RC_REFCLOCKMHZ*1000000UL); diff = 0xFFFFFFFFUL - lapic_read(APIC_CCR); - diff = diff / 3; + icr = diff / 3; lapic_reset(); - lapic_write(APIC_DCR, 0xB); // set it to 1 clock increments - lapic_write(APIC_LVT_T, 0x2007B); // connects the timer to 123 and enables it - lapic_write(APIC_ICR, diff); irq_nested_enable(flags); #endif - kprintf("APIC calibration determines an ICR of 0x%x\n", diff); + kprintf("APIC calibration determines an ICR of 0x%x\n", icr); flags = irq_nested_disable(); #if MAX_CORES > 1 @@ -568,6 +572,7 @@ int apic_init(void) // set APIC error handler irq_install_handler(126, apic_err_handler); + // initialize local apic ret = lapic_reset(); if (!ret) return ret; diff --git a/arch/x86/kernel/entry.asm b/arch/x86/kernel/entry.asm index f24b33dd..1a325dba 100644 --- a/arch/x86/kernel/entry.asm +++ b/arch/x86/kernel/entry.asm @@ -145,7 +145,6 @@ global isr29 global isr30 global isr31 global isrsyscall -global jump_to_child ; 0: Divide By Zero Exception isr0: @@ -433,6 +432,10 @@ extern syscall_handler ; used to realize system calls isrsyscall: + push ds + push fs + push gs + push es push ebp push edi push esi @@ -449,17 +452,10 @@ isrsyscall: pop esi pop edi pop ebp - iret - -jump_to_child: - add esp, 4 - mov eax, 0 ; child got always zero as return value - pop ebx - pop ecx - pop edx - pop esi - pop edi - pop ebp + pop es + pop gs + pop fs + pop ds iret global irq0 diff --git a/arch/x86/kernel/gdt.c b/arch/x86/kernel/gdt.c index b4fd61ae..30e165c7 100644 --- a/arch/x86/kernel/gdt.c +++ b/arch/x86/kernel/gdt.c @@ -68,9 +68,6 @@ int arch_fork(task_t* task) memcpy(task_state_segments+id, task_state_segments+curr_task->id, sizeof(tss_t)); memcpy(kstacks[id], kstacks[curr_task->id], KERNEL_STACK_SIZE); task_state_segments[id].cr3 = (uint32_t) (virt_to_phys((size_t)task->pgd)); - task_state_segments[id].eflags = read_eflags(); - // the parent task will enable the IF flag - task_state_segments[id].eflags |= (1 << 9); task_state_segments[id].esp0 = (uint32_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t); asm volatile("mov %%esp, %0" : "=r"(task_state_segments[id].esp)); @@ -91,6 +88,9 @@ int arch_fork(task_t* task) asm volatile ("pop %0" : "=r"(task_state_segments[id].ecx)); asm volatile ("pop %0" : "=r"(task_state_segments[id].eax)); + // store current EFLAGS and set IF flag + // => the parent task will enable the interrupt handling + task_state_segments[id].eflags = read_eflags() | (1 << 9); // This will be the entry point for the new task. task_state_segments[id].eip = read_eip(); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 5c1cb621..247a55ea 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -238,9 +238,11 @@ void irq_handler(struct state *s) * Find out if we have a custom handler to run for this * IRQ and then finally, run it */ - handler = irq_routines[s->int_no]; - if (handler) - handler(s); + if (BUILTIN_EXPECT(s->int_no < MAX_HANDLERS, 1)) { + handler = irq_routines[s->int_no]; + if (handler) + handler(s); + } else kprintf("Invalid interrupt number %d\n", s->int_no); /* * If the IDT entry that was invoked was greater-than-or-equal to 48, diff --git a/arch/x86/kernel/processor.c b/arch/x86/kernel/processor.c index 2b1c84bf..aa4988f4 100644 --- a/arch/x86/kernel/processor.c +++ b/arch/x86/kernel/processor.c @@ -49,12 +49,12 @@ uint32_t detect_cpu_frequency(void) while((ticks = get_clock_tick()) - old == 0) HALT; - rmb(); + mb(); start = rdtsc(); /* wait a second to determine the frequency */ while(get_clock_tick() - ticks < TIMER_FREQ) HALT; - rmb(); + mb(); end = rdtsc(); diff = end > start ? end - start : start - end; @@ -78,7 +78,7 @@ void udelay(uint32_t usecs) uint64_t deadline = get_cpu_frequency() * usecs; do { - rmb(); + mb(); end = rdtsc(); diff = end > start ? end - start : start - end; } while(diff < deadline); diff --git a/arch/x86/kernel/schedule.asm b/arch/x86/kernel/schedule.asm deleted file mode 100644 index 0a4b0286..00000000 --- a/arch/x86/kernel/schedule.asm +++ /dev/null @@ -1,147 +0,0 @@ -; -; Copyright 2010 Stefan Lankes, Chair for Operating Systems, -; RWTH Aachen University -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. -; -; This file is part of metalsvm. - -[BITS 32] -SECTION .code -extern get_new_task -extern current_task -;extern task_table - -; C prototyp: -; -; typedef struct { -; unsigned char* top; -; unsigned int ip; -; tid_t id; -; ... -; } task_t; - -; After an interrupt, the original return address has to be pushed -; on the stack because we want to comeback... -global schedule_entry -schedule_entry: - cli - sub esp, 4 ; add some space for the return address, - ; which is stored in current_task->ip - - pushfd - pusha - push ds - push es - push fs - push gs - - mov ax, 0x10 - mov ds, ax - mov es, ax - mov fs, ax - mov gs, ax - - ;mov eax, DWORD [current_id] - ;shl eax, 0x4 ; 16 = sizeof(task_t) - ;mov edx, DWORD [eax+task_table+4] - ;mov [esp+13*4], edx - ;mov DWORD [eax+task_table+4], 0 - mov eax, DWORD [current_task] - mov edx, [eax+4] - mov DWORD [esp+13*4], edx - mov DWORD [eax+4], 0x00 - - jmp L1 - -; Scheduler, which switches to the new tasks -global schedule -schedule: - ret - - cli - ; return address is already on the stack (via call schedule) - pop eax - pushfd - push 0x08 - push eax - push byte 0 - push byte 32 - pusha - push ds - push es - push fs - push gs - - mov ax, 0x10 - mov ds, ax - mov es, ax - mov fs, ax - mov gs, ax - - mov eax, DWORD [current_task] - mov [eax], esp - - ;call get_new_task - - mov DWORD [current_task], eax - mov esp, [eax] - - pop gs - pop fs - pop es - pop ds - popa - add esp, 8 - xor eax, eax ; return value is 0 - iret - - cli - pushfd - pusha - push ds - push es - push fs - push gs - - mov ax, 0x10 - mov ds, ax - mov es, ax - mov fs, ax - mov gs, ax - - ; store current esp - ;mov eax, DWORD [current_id] - ;shl eax, 0x4 ; 16 = sizeof(task_t) -L1: - ;mov [eax+task_table], esp - mov eax, DWORD [current_task] - mov [eax], esp - - ;call get_new_task - - ; set current task and restore esp - ;mov DWORD [current_id], eax - ;shl eax, 0x4 ; 16 = sizeof(task_t) - ;mov esp, [eax+task_table] - mov DWORD [current_task], eax - mov esp, [eax] - - pop gs - pop fs - pop es - pop ds - popa - popfd - sti - ret diff --git a/include/metalsvm/spinlock.h b/include/metalsvm/spinlock.h index 4cdff155..701762aa 100644 --- a/include/metalsvm/spinlock.h +++ b/include/metalsvm/spinlock.h @@ -32,6 +32,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -42,6 +43,9 @@ extern "C" { * Initialize each spinlock before use! * * @param s Pointer to the spinlock structure to initialize. + * @return + * - 0 on success + * - -EINVAL (-22) on failure */ inline static int spinlock_init(spinlock_t* s) { if (BUILTIN_EXPECT(!s, 0)) @@ -56,18 +60,24 @@ inline static int spinlock_init(spinlock_t* s) { } /** @brief Destroy spinlock after use - * @return 0 in any case + * @return + * - 0 on success + * - -EINVAL (-22) on failure */ inline static int spinlock_destroy(spinlock_t* s) { + if (BUILTIN_EXPECT(!s, 0)) + return -EINVAL; + s->owner = MAX_TASKS; s->counter = 0; + return 0; } /** @brief Lock spinlock at entry of critical section * @return * - 0 on success - * - -EINVAL (-22) on failure) + * - -EINVAL (-22) on failure */ inline static int spinlock_lock(spinlock_t* s) { int32_t ticket; @@ -81,8 +91,9 @@ inline static int spinlock_lock(spinlock_t* s) { } ticket = atomic_int32_inc(&s->queue); - while(atomic_int32_read(&s->dequeue) != ticket) - ; + while(atomic_int32_read(&s->dequeue) != ticket) { + NOP1; + } s->owner = per_core(current_task)->id; s->counter = 1; @@ -107,57 +118,94 @@ inline static int spinlock_unlock(spinlock_t* s) { return 0; } -/** @brief Lock spinlock at entry of critical section and disable interrupts +/** @brief Initialization of a irqsave spinlock + * + * Initialize each irqsave spinlock before use! * * @return * - 0 on success * - -EINVAL (-22) on failure */ -inline static int spinlock_lock_irqsave(spinlock_t* s) { - uint32_t flags; - int ret; - +inline static int spinlock_irqsave_init(spinlock_irqsave_t* s) { if (BUILTIN_EXPECT(!s, 0)) return -EINVAL; - - flags = irq_nested_disable(); - ret = spinlock_lock(s); - if (ret) { - // ret can only be !0 in case of an error (-EINVAL) - // re-enable interrupts and pass error value - irq_nested_enable(flags); - return ret; - } + atomic_int32_set(&s->queue, 0); + atomic_int32_set(&s->dequeue, 1); + s->flags = 0; + s->coreid = (uint32_t)-1; + s->counter = 0; - if (!ret && (s->counter == 1)) - s->flags = flags; - - return ret; + return 0; } -/** @brief Unlock spinlock on exit of critical section and re-enable interrupts +/** @brief Destroy irqsave spinlock after use * @return * - 0 on success * - -EINVAL (-22) on failure */ -inline static int spinlock_unlock_irqsave(spinlock_t* s) { - int ret, restore = 0; - uint32_t flags = 0; +inline static int spinlock_irqsave_destroy(spinlock_irqsave_t* s) { + if (BUILTIN_EXPECT(!s, 0)) + return -EINVAL; + + s->flags = 0; + s->coreid = (uint32_t)-1; + s->counter = 0; + + return 0; +} + +/** @brief Unlock an irqsave spinlock on exit of critical section + * @return + * - 0 on success + * - -EINVAL (-22) on failure + */ +inline static int spinlock_irqsave_lock(spinlock_irqsave_t* s) { + uint32_t flags; + int32_t ticket; if (BUILTIN_EXPECT(!s, 0)) return -EINVAL; - if (s->counter == 1) { - restore = 1; - flags = s->flags; + flags = irq_nested_disable(); + if (s->coreid == CORE_ID) { + s->counter++; + return 0; } - ret = spinlock_unlock(s); - if (!ret && restore) - irq_nested_enable(flags); + ticket = atomic_int32_inc(&s->queue); + while (atomic_int32_read(&s->dequeue) != ticket) { + NOP1; + } - return ret; + s->coreid = CORE_ID; + s->flags = flags; + s->counter = 1; + + return 0; +} + +/** @brief Unlock irqsave spinlock on exit of critical section and re-enable interrupts + * @return + * - 0 on success + * - -EINVAL (-22) on failure + */ +inline static int spinlock_irqsave_unlock(spinlock_irqsave_t* s) { + uint32_t flags; + + if (BUILTIN_EXPECT(!s, 0)) + return -EINVAL; + + s->counter--; + if (!s->counter) { + flags = s->flags; + s->coreid = (uint32_t) -1; + s->flags = 0; + atomic_int32_inc(&s->dequeue); + irq_nested_enable(flags); + } + + return 0; } #ifdef __cplusplus diff --git a/include/metalsvm/spinlock_types.h b/include/metalsvm/spinlock_types.h index 334d19e7..9715a305 100644 --- a/include/metalsvm/spinlock_types.h +++ b/include/metalsvm/spinlock_types.h @@ -44,12 +44,25 @@ typedef struct spinlock { tid_t owner; /// Internal counter var uint32_t counter; - /// Flags - uint32_t flags; } spinlock_t; +typedef struct spinlock_irqsave { + /// Internal queue + atomic_int32_t queue; + /// Internal dequeue + atomic_int32_t dequeue; + /// Interrupt flag + uint32_t flags; + /// Core Id of the lock owner + uint32_t coreid; + /// Internal counter var + uint32_t counter; +} spinlock_irqsave_t; + /// Macro for spinlock initialization -#define SPINLOCK_INIT { ATOMIC_INIT(0), ATOMIC_INIT(1), MAX_TASKS, 0, 0 } +#define SPINLOCK_INIT { ATOMIC_INIT(0), ATOMIC_INIT(1), MAX_TASKS, 0} +/// Macro for irqsave spinlock initialization +#define SPINLOCK_IRQSAVE_INIT { ATOMIC_INIT(0), ATOMIC_INIT(1), 0, (uint32_t)-1, 0} #ifdef __cplusplus } diff --git a/include/metalsvm/stddef.h b/include/metalsvm/stddef.h index 2dfc9ba6..07d3ed27 100644 --- a/include/metalsvm/stddef.h +++ b/include/metalsvm/stddef.h @@ -35,6 +35,7 @@ typedef unsigned int tid_t; #define per_core(name) name #define DECLARE_PER_CORE(type, name) extern type name; #define DEFINE_PER_CORE(type, name, def_value) type name = def_value; +#define CORE_ID 0 #else #define per_core(name) name[LOGICAL_CPUID].var #define DECLARE_PER_CORE(type, name) \ @@ -42,6 +43,7 @@ typedef unsigned int tid_t; extern aligned_##name name[MAX_CORES]; #define DEFINE_PER_CORE(type, name, def_value) \ aligned_##name name[MAX_CORES] = {[0 ... MAX_CORES-1] = {def_value}}; +#define CORE_ID LOGICAL_CPUID #endif /* needed to find the task, which is currently running on this core */ diff --git a/kernel/tasks.c b/kernel/tasks.c index 9ee46fa8..37d5ecd7 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -38,6 +38,7 @@ #include #include #include +#include #include DEFINE_PER_CORE(task_t*, current_task, NULL); @@ -46,10 +47,9 @@ DEFINE_PER_CORE(task_t*, current_task, NULL); * * A task's id will be its position in this array. */ -static task_t task_table[MAX_TASKS] = {[0 ... MAX_TASKS-1] = \ - {0, TASK_INVALID, ATOMIC_INIT(0), \ - SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL}}; -static spinlock_t table_lock = SPINLOCK_INIT; +static task_t task_table[MAX_TASKS] = {[0 ... MAX_TASKS-1] = {0, TASK_INVALID, ATOMIC_INIT(0), \ + SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL}}; +static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT; /** @brief helper function for the assembly code to determine the current task * @return Pointer to the task_t structure of current task @@ -82,7 +82,7 @@ static void wakeup_blocked_tasks(int result) wait_msg_t tmp = { per_core(current_task)->id, result }; unsigned int i; - spinlock_lock_irqsave(&table_lock); + spinlock_irqsave_lock(&table_lock); /* wake up blocked tasks */ for(i=0; ivma_lock); + spinlock_irqsave_lock(&table_lock); for(i=0; ivma_lock); child = &task_table[i].vma_list; parent = per_core(current_task)->vma_list; tmp = NULL; @@ -246,7 +246,6 @@ int sys_fork(void) tmp = *child; child = &((*child)->next); } - spinlock_unlock(&per_core(current_task)->vma_lock); mailbox_wait_msg_init(&task_table[i].inbox); memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS); @@ -254,8 +253,13 @@ int sys_fork(void) ret = arch_fork(task_table+i); - if (parent_task != per_core(current_task)) - return 0; // Oh, the new child! => leave function + if (parent_task != per_core(current_task)) { + // Oh, the current task is the new child task! + // Leave the function without releasing the locks + // because the locks are already released + // by the parent task! + return 0; + } if (!ret) { task_table[i].status = TASK_READY; @@ -266,7 +270,8 @@ int sys_fork(void) } create_task_out: - spinlock_unlock_irqsave(&table_lock); + spinlock_irqsave_unlock(&table_lock); + spinlock_unlock(&per_core(current_task)->vma_lock); return ret; } @@ -642,7 +647,7 @@ int wakeup_task(tid_t id) int ret = -EINVAL; /* avoid nested locking */ - spinlock_lock_irqsave(&table_lock); + spinlock_irqsave_lock(&table_lock); if (task_table[id].status != TASK_BLOCKED) { kprintf("Task %d is not blocked!\n", id); @@ -651,7 +656,7 @@ int wakeup_task(tid_t id) ret = 0; } - spinlock_unlock_irqsave(&table_lock); + spinlock_irqsave_unlock(&table_lock); return ret; } @@ -666,14 +671,14 @@ int block_task(tid_t id) { int ret = -EINVAL; - spinlock_lock_irqsave(&table_lock); + spinlock_irqsave_lock(&table_lock); if ((task_table[id].status == TASK_RUNNING) || (task_table[id].status == TASK_READY)) { task_table[id].status = TASK_BLOCKED; ret = 0; } else kprintf("Unable to block task %d!\n", id); - spinlock_unlock_irqsave(&table_lock); + spinlock_irqsave_unlock(&table_lock); return ret; } @@ -687,7 +692,9 @@ void scheduler(void) unsigned int i; unsigned int new_id; - spinlock_lock(&table_lock); +#if MAX_CORES > 1 + spinlock_irqsave_lock(&table_lock); +#endif /* signalize that this task could be reused */ if (per_core(current_task)->status == TASK_FINISHED) @@ -721,5 +728,7 @@ void scheduler(void) } get_task_out: - spinlock_unlock(&table_lock); +#if MAX_CORES > 1 + spinlock_irqsave_unlock(&table_lock); +#endif } diff --git a/newlib/examples/tests.c b/newlib/examples/tests.c index 7288ed82..e2bc27d3 100644 --- a/newlib/examples/tests.c +++ b/newlib/examples/tests.c @@ -34,7 +34,7 @@ int main(int argc, char** argv) printf("Create child process...\n"); - pid = 42; //fork(); + pid = fork(); if (pid == 0) { // child char* newargs[] = {"/bin/hello", "one", "two", "three", NULL}; char* newenv[] = {"USER=root", "PATH=/bin:/sbin:/usr/bin", "PWD=/", "TEMP=/tmp", NULL};