diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index bb526e73..4a79d9ec 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -25,11 +25,21 @@ extern "C" { #endif inline static void irq_disable(void) { - asm volatile("cli": : : "memory"); + asm volatile("cli" ::: "memory"); +} + +inline static uint32_t irq_nested_disable(void) { + uint32_t flags; + asm volatile("pushf; cli; popl %0": "=r"(flags) : : "memory"); + return flags; } inline static void irq_enable(void) { - asm volatile ("sti": : : "memory"); + asm volatile("sti" ::: "memory"); +} + +inline static void irq_nested_enable(uint32_t flags) { + asm volatile("pushl %0; popf" : : "r"(flags) : "memory"); } #ifdef __cplusplus diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index b1f53301..95658800 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -96,9 +96,9 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t pages, uint32_t type) int arch_paging_init(void); /* - * Setup a kernel task with a valid entry to the kernel's page directory + * Returns the page directory of the boot task */ -int get_boot_pgd(task_t* task); +page_dir_t* get_boot_pgd(void); /* * Setup a new page directory for a new user-level task diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index c90d7189..4bc5d7df 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -241,6 +241,7 @@ int smp_init(void) int apic_calibration(void) { uint8_t i; + uint32_t flags; #ifndef CONFIG_ROCKCREEK uint64_t ticks, old; @@ -319,7 +320,7 @@ int apic_calibration(void) kprintf("APIC calibration determines an ICR of 0x%x\n", diff / 3); - irq_disable(); + flags = irq_nested_disable(); #if MAX_CORES > 1 //smp_init(); #endif @@ -332,7 +333,7 @@ int apic_calibration(void) ioapic_inton(i, apic_processors[boot_processor]->id); } initialized = 1; - irq_enable(); + irq_nested_enable(flags); return 0; } diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c index 6dd698b7..166075b4 100644 --- a/arch/x86/mm/page.c +++ b/arch/x86/mm/page.c @@ -51,26 +51,25 @@ extern const void kernel_end; // boot task's page directory and page directory lock static page_dir_t boot_pgd = {{[0 ... 1023] = 0}}; +//static spinlock_t boot_lock = SPINLOCK_INIT; static int paging_enabled = 0; -int get_boot_pgd(task_t* task) +page_dir_t* get_boot_pgd(void) { - if (BUILTIN_EXPECT(!task, 0)) - return -EINVAL; - - task->pgd = &boot_pgd; - - return 0; + return &boot_pgd; } /* - * TODO: We create a full copy of the current. Copy-On-Access will be the better solution. + * TODO: We create a full copy of the current task. Copy-On-Access will be the better solution. + * + * No PGD locking is needed because onls creat_pgd use this function and holds already the + * PGD lock. */ inline static size_t copy_page_table(uint32_t pgd_index, page_table_t* pgt, int* counter) { uint32_t i; page_table_t* new_pgt; - size_t viraddr, phyaddr; + size_t phyaddr; task_t* curr_task = per_core(current_task); if (BUILTIN_EXPECT(!pgt, 0)) @@ -161,6 +160,8 @@ int create_pgd(task_t* task, int copy) task->pgd = pgd; if (copy) { + spinlock_unlock(&curr_task->pgd_lock); + for (i=KERNEL_SPACE/(1024*PAGE_SIZE); i<1024; i++) { if (!(curr_task->pgd->entries[i])) continue; @@ -169,6 +170,8 @@ int create_pgd(task_t* task, int copy) if (phyaddr) pgd->entries[i] = (phyaddr & 0xFFFFF000) | (curr_task->pgd->entries[i] & 0xFFF); } + + spinlock_unlock(&curr_task->pgd_lock); } // frees the virtual regions, because only the new child task need access to the new pgd and pgt @@ -180,27 +183,33 @@ int create_pgd(task_t* task, int copy) return counter; } +/* + * drops all page frames and the PGD of a user task + */ int drop_pgd(void) { uint32_t i; page_dir_t* pgd = per_core(current_task)->pgd; size_t phy_pgd = virt_to_phys((size_t) pgd); + task_t* task = per_core(current_task); if (BUILTIN_EXPECT(pgd == &boot_pgd, 0)) return -EINVAL; - spinlock_lock(&(per_core(current_task)->pgd_lock)); + spinlock_lock(&task->pgd_lock); for(i=KERNEL_SPACE/(1024*PAGE_SIZE); i<1024; i++) - if (pgd->entries[i] & 0xFFFFF000) + if (pgd->entries[i] & 0xFFFFF000) { put_page(pgd->entries[i] & 0xFFFFF000); + pgd->entries[i] = 0; + } // freeing the page directory put_page(phy_pgd); - per_core(current_task)->pgd = NULL; + task->pgd = NULL; - spinlock_unlock(&(per_core(current_task)->pgd_lock)); + spinlock_unlock(&task->pgd_lock); return 0; } @@ -218,7 +227,7 @@ size_t virt_to_phys(size_t viraddr) if (BUILTIN_EXPECT(!task || !task->pgd, 0)) return 0; - spinlock_lock(&(per_core(current_task)->pgd_lock)); + spinlock_lock(&task->pgd_lock); index1 = viraddr >> 22; index2 = (viraddr >> 12) & 0x3FF; @@ -235,7 +244,7 @@ size_t virt_to_phys(size_t viraddr) out: //kprintf("vir %p to phy %p\n", viraddr, ret); - spinlock_unlock(&(per_core(current_task)->pgd_lock)); + spinlock_unlock(&task->pgd_lock); return ret; } @@ -330,7 +339,7 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag tlb_flush_one_page(viraddr); } - + spinlock_unlock(&task->pgd_lock); return ret; @@ -343,6 +352,7 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags) size_t phyaddr; page_table_t* pgt; page_dir_t* pgd; + task_t* task = per_core(current_task); if (BUILTIN_EXPECT(!paging_enabled, 0)) return -EINVAL; @@ -351,7 +361,7 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags) if (BUILTIN_EXPECT(!pgd, 0)) return -EINVAL; - spinlock_lock(&(per_core(current_task)->pgd_lock)); + spinlock_lock(&task->pgd_lock); while (viraddr < end) { @@ -380,7 +390,7 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags) } } - spinlock_unlock(&(per_core(current_task)->pgd_lock)); + spinlock_unlock(&task->pgd_lock); return 0; } @@ -397,14 +407,11 @@ size_t vm_alloc(uint32_t npages, uint32_t flags) size_t viraddr, i, ret = 0; size_t start, end; page_table_t* pgt; - uint32_t has_lock; if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0)) return 0; - has_lock = spinlock_has_lock(&task->pgd_lock); - if (!has_lock) - spinlock_lock(&task->pgd_lock); + spinlock_lock(&task->pgd_lock); if (flags & MAP_KERNEL_SPACE) { start = (((size_t) &kernel_end) + PAGE_SIZE) & 0xFFFFF000; @@ -438,8 +445,7 @@ size_t vm_alloc(uint32_t npages, uint32_t flags) if ((j >= npages) && (viraddr < end)) ret = viraddr; - if (!has_lock) - spinlock_unlock(&task->pgd_lock); + spinlock_unlock(&task->pgd_lock); return ret; } @@ -448,15 +454,13 @@ int vm_free(size_t viraddr, uint32_t npages) { task_t* task = per_core(current_task); uint32_t i; - uint32_t index1, index2, has_lock; + uint32_t index1, index2; page_table_t* pgt; if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0)) return -EINVAL; - has_lock = spinlock_has_lock(&task->pgd_lock); - if (!has_lock) - spinlock_lock(&task->pgd_lock); + spinlock_lock(&task->pgd_lock); for(i=0; ientries[index2] = 0; } - if (!has_lock) - spinlock_unlock(&task->pgd_lock); + spinlock_unlock(&task->pgd_lock); return 0; } int print_paging_tree(size_t viraddr) { + task_t* task = per_core(current_task); uint32_t index1, index2; page_dir_t* pgd = NULL; page_table_t* pgt = NULL; @@ -487,8 +491,10 @@ int print_paging_tree(size_t viraddr) index1 = viraddr >> 22; index2 = (viraddr >> 12) & 0x3FF; + spinlock_lock(&task->pgd_lock); + kprintf("Paging dump of address 0x%x\n", viraddr); - pgd = per_core(current_task)->pgd; + pgd = task->pgd; kprintf("\tPage directory entry %u: ", index1); if (pgd) { kprintf("0x%0x\n", pgd->entries[index1]); @@ -506,6 +512,8 @@ int print_paging_tree(size_t viraddr) else kputs("invalid page table\n"); + spinlock_unlock(&task->pgd_lock); + return 0; } diff --git a/include/metalsvm/spinlock.h b/include/metalsvm/spinlock.h index ae4b44ac..e6fcd15f 100644 --- a/include/metalsvm/spinlock.h +++ b/include/metalsvm/spinlock.h @@ -1,6 +1,6 @@ /* - * Copyright 2010 Stefan Lankes, Chair for Operating Systems, - * RWTH Aachen University + * Copyright 2010-2011 Stefan Lankes, Chair for Operating Systems, + * RWTH Aachen University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -32,17 +33,19 @@ extern "C" { inline static int spinlock_init(spinlock_t* s) { if (BUILTIN_EXPECT(!s, 0)) - return -1; + return -EINVAL; atomic_int32_set(&s->queue, 0); atomic_int32_set(&s->dequeue, 1); s->owner = MAX_TASKS; + s->counter = 0; return 0; } inline static int spinlock_destroy(spinlock_t* s) { s->owner = MAX_TASKS; + s->counter = 0; return 0; } @@ -50,48 +53,73 @@ inline static int spinlock_lock(spinlock_t* s) { int32_t ticket; if (BUILTIN_EXPECT(!s, 0)) - return -1; + return -EINVAL; + + if (s->owner == per_core(current_task)->id) { + s->counter++; + return 0; + } ticket = atomic_int32_inc(&s->queue); while(atomic_int32_read(&s->dequeue) != ticket) ; s->owner = per_core(current_task)->id; + s->counter = 1; return 0; } inline static int spinlock_unlock(spinlock_t* s) { if (BUILTIN_EXPECT(!s, 0)) - return -1; + return -EINVAL; - s->owner = MAX_TASKS; - atomic_int32_inc(&s->dequeue); + s->counter--; + if (!s->counter) { + s->owner = MAX_TASKS; + atomic_int32_inc(&s->dequeue); + } return 0; } inline static int spinlock_lock_irqsave(spinlock_t* s) { - if (BUILTIN_EXPECT(!s, 0)) - return -1; - - irq_disable(); - return spinlock_lock(s); -} - -inline static int spinlock_unlock_irqsave(spinlock_t* s) { + uint32_t flags; int ret; if (BUILTIN_EXPECT(!s, 0)) - return -1; + return -EINVAL; + + flags = irq_nested_disable(); + ret = spinlock_lock(s); - ret = spinlock_unlock(s); - irq_enable(); + if (ret) { + irq_nested_enable(flags); + return ret; + } + if (!ret && (s->counter == 1)) + s->flags = flags; + return ret; } -inline static int spinlock_has_lock(spinlock_t* s) { - return (s->owner == per_core(current_task)->id); +inline static int spinlock_unlock_irqsave(spinlock_t* s) { + int ret, restore = 0; + uint32_t flags = 0; + + if (BUILTIN_EXPECT(!s, 0)) + return -EINVAL; + + if (s->counter == 1) { + restore = 1; + flags = s->flags; + } + + ret = spinlock_unlock(s); + if (!ret && restore) + irq_nested_enable(flags); + + return ret; } #ifdef __cplusplus diff --git a/include/metalsvm/spinlock_types.h b/include/metalsvm/spinlock_types.h index a7bd82b6..bac2f5df 100644 --- a/include/metalsvm/spinlock_types.h +++ b/include/metalsvm/spinlock_types.h @@ -30,9 +30,11 @@ extern "C" { typedef struct spinlock { atomic_int32_t queue, dequeue; tid_t owner; + uint32_t counter; + uint32_t flags; } spinlock_t; -#define SPINLOCK_INIT { ATOMIC_INIT(0), ATOMIC_INIT(1), MAX_TASKS } +#define SPINLOCK_INIT { ATOMIC_INIT(0), ATOMIC_INIT(1), MAX_TASKS, 0, 0 } #ifdef __cplusplus } diff --git a/kernel/tasks.c b/kernel/tasks.c index 203e3bce..88e1cd98 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -51,7 +51,7 @@ int multitasking_init(void) { mailbox_wait_msg_init(&task_table[0].inbox); memset(task_table[0].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS); per_core(current_task) = task_table+0; - get_boot_pgd(task_table+0); + per_core(current_task)->pgd = get_boot_pgd(); return 0; } @@ -402,11 +402,9 @@ tid_t wait(int32_t* result) int wakeup_task(tid_t id) { int ret = -EINVAL; - int need_lock = !spinlock_has_lock(&table_lock); /* avoid nested locking */ - if (need_lock) - spinlock_lock_irqsave(&table_lock); + spinlock_lock_irqsave(&table_lock); if (task_table[id].status != TASK_BLOCKED) { kprintf("Task %d is not blocked!\n", id); @@ -415,8 +413,7 @@ int wakeup_task(tid_t id) ret = 0; } - if (need_lock) - spinlock_unlock_irqsave(&table_lock); + spinlock_unlock_irqsave(&table_lock); return ret; } @@ -424,19 +421,15 @@ int wakeup_task(tid_t id) int block_task(tid_t id) { int ret = -EINVAL; - int need_lock = !spinlock_has_lock(&table_lock); - /* avoid nested locking */ - if (need_lock) - spinlock_lock_irqsave(&table_lock); + spinlock_lock_irqsave(&table_lock); if ((task_table[id].status == TASK_RUNNING) || (task_table[id].status == TASK_READY)) { task_table[id].status = TASK_BLOCKED; ret = 0; } else kprintf("Unable to block task %d!\n", id); - if (need_lock) - spinlock_unlock_irqsave(&table_lock); + spinlock_unlock_irqsave(&table_lock); return ret; }