From 5ce3ed93380701554f8785c73455092ee24a7d66 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Wed, 2 Mar 2011 13:49:36 +0100 Subject: [PATCH] First steps to support the system calls "fork" and "wait" - Currently, the system call "fork" doesn't work and has a memory leak - However, it is a good starting point for further developments. --- arch/x86/include/asm/page.h | 2 +- arch/x86/include/asm/processor.h | 15 ++- arch/x86/include/asm/string.h | 26 ----- arch/x86/include/asm/tasks.h | 11 +++ arch/x86/kernel/entry.asm | 12 +-- arch/x86/kernel/gdt.c | 40 +++++++- arch/x86/mm/page.c | 84 +++++++++++++++- include/metalsvm/syscall.h | 1 + include/metalsvm/tasks.h | 4 +- include/metalsvm/tasks_types.h | 7 +- kernel/syscall.c | 6 ++ kernel/tasks.c | 161 ++++++++++++++++++------------- kernel/tests.c | 20 ++-- 13 files changed, 268 insertions(+), 121 deletions(-) diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 76f8c970..b1f53301 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -103,7 +103,7 @@ int get_boot_pgd(task_t* task); /* * Setup a new page directory for a new user-level task */ -int create_pgd(task_t* task); +int create_pgd(task_t* task, int copy); /* * Delete page directory and its page tables diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f21b200f..95d8a91b 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -96,6 +96,12 @@ static inline void write_cr0(uint32_t val) { asm volatile("mov %0, %%cr0" : : "r"(val)); } +static inline uint32_t read_cr2(void) { + uint32_t val; + asm volatile("mov %%cr2, %0" : "=r"(val)); + return val; +} + static inline uint32_t read_cr3(void) { uint32_t val; asm volatile("mov %%cr3, %0" : "=r"(val)); @@ -119,7 +125,14 @@ static inline void tlb_flush(void) write_cr3(val); } -void read_eip(void); +static inline uint32_t read_eflags(void) +{ + uint32_t result; + asm volatile ("pushf; popl %%eax" : "=a"(result) :: "memory"); + return result; +} + +uint32_t read_eip(void); /* * invalidate (not flush!) lines in L1 that map to MPB lines diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h index 84d4d0af..0eba11ae 100644 --- a/arch/x86/include/asm/string.h +++ b/arch/x86/include/asm/string.h @@ -19,7 +19,6 @@ extern "C" { #endif #ifdef HAVE_ARCH_MEMCPY -#if 0 inline static void *memcpy(void *dest, const void *src, size_t count) { int32_t i, j, k; @@ -37,31 +36,6 @@ inline static void *memcpy(void *dest, const void *src, size_t count) return dest; } -#else -inline static void *memcpy(void *dest, const void *src, size_t count) -{ - int32_t h, i, j, k, l, m; - - if (BUILTIN_EXPECT(!dest || !src, 0)) - return dest; - - asm volatile ( - "cld;\n\t" - "1: cmpl $0, %%eax ; je 2f\n\t" - "movl (%%edi), %%edx\n\t" - "movl $8, %%ecx\n\t" - "rep ; movsl\n\t" - "dec %%eax ; jmp 1b\n\t" - "2: movl (%%edi), %%edx\n\t" - "movl %%ebx, %%ecx\n\t" - "andl $31, %%ecx\n\t" - "rep ; movsb\n\t" - : "=&a"(h), "=&D"(i), "=&S"(j), "=&b"(k), "=&c"(l), "=&d"(m) - : "0"(count/32), "1"(dest), "2"(src), "3"(count) : "memory"); - - return dest; -} -#endif #endif #ifdef HAVE_ARCH_MEMSET diff --git a/arch/x86/include/asm/tasks.h b/arch/x86/include/asm/tasks.h index 0480fb72..0efecc53 100644 --- a/arch/x86/include/asm/tasks.h +++ b/arch/x86/include/asm/tasks.h @@ -24,6 +24,11 @@ #include #include +#ifdef __cplusplus +extern "C" { +#endif + +int arch_fork(task_t* task); int create_default_frame(task_t* task, entry_point_t ep, void* arg); int register_task(task_t* task); void reschedule(void); @@ -37,4 +42,10 @@ static inline int jump_to_user_code(uint32_t ep, uint32_t stack) return 0; } +int jump_to_child(void); + +#ifdef __cplusplus +} +#endif + #endif diff --git a/arch/x86/kernel/entry.asm b/arch/x86/kernel/entry.asm index e24e201a..f22b44ad 100644 --- a/arch/x86/kernel/entry.asm +++ b/arch/x86/kernel/entry.asm @@ -54,11 +54,13 @@ mboot: ; dd end ; dd start +extern default_stack_pointer + SECTION .text ALIGN 4 stublet: ; initialize stack pointer. - mov esp, _sys_stack-4 + mov esp, default_stack_pointer ; enable cache and turn on FPU exceptions mov eax, cr0 ; enable cache @@ -820,12 +822,4 @@ irq_common_stub: add esp, 8 iret -; Here is the definition of our BSS section. Right now, we'll use -; it just to store the stack. Remember that a stack actually grows -; downwards, so we declare the size of the data before declaring -; the identifier '_sys_stack' -SECTION .bss - resb 8192 ; This reserves 8KBytes of memory here -_sys_stack: - SECTION .note.GNU-stack noalloc noexec nowrite progbits diff --git a/arch/x86/kernel/gdt.c b/arch/x86/kernel/gdt.c index 7b22f715..1a131494 100644 --- a/arch/x86/kernel/gdt.c +++ b/arch/x86/kernel/gdt.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,7 @@ static tss_t task_state_segments[MAX_TASKS] __attribute__ ((aligned (4096))); // currently, our kernel has full access to the ioports static gdt_entry_t gdt[GDT_ENTRIES] = {[0 ... GDT_ENTRIES-1] = {0, 0, 0, 0, 0, 0}}; static unsigned char kstacks[MAX_TASKS][KERNEL_STACK_SIZE]; +unsigned char* default_stack_pointer = kstacks[0] + KERNEL_STACK_SIZE - sizeof(size_t); /* * This is in start.asm. We use this to properly reload @@ -54,6 +56,40 @@ int register_task(task_t* task) { return 0; } +int arch_fork(task_t* task) +{ + uint32_t id; + task_t* curr_task = per_core(current_task); + + if (BUILTIN_EXPECT(!task, 0)) + return -EINVAL; + id = task->id; + + memcpy(task_state_segments+id, task_state_segments+curr_task->id, sizeof(tss_t)); + task_state_segments[id].cr3 = (uint32_t) (virt_to_phys((size_t)task->pgd)); + task_state_segments[id].eflags = read_eflags(); + task_state_segments[id].esp0 = (uint32_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t); + + asm volatile ("pusha" ::: "%esp"); + + memcpy(kstacks[id], kstacks[curr_task->id], KERNEL_STACK_SIZE); + + asm volatile("mov %%esp, %0" : "=r"(task_state_segments[id].esp)); + if (id > curr_task->id) + task_state_segments[id].esp += (id - curr_task->id) * KERNEL_STACK_SIZE; + else + task_state_segments[id].esp -= (curr_task->id - id) * KERNEL_STACK_SIZE; + + // This will be the entry point for the new task. + task_state_segments[id].eip = read_eip(); + + kputs("A\n"); + asm volatile ("popa" ::: "%esp"); + kputs("B\n"); + + return 0; +} + int create_default_frame(task_t* task, entry_point_t ep, void* arg) { uint16_t cs = 0x08; @@ -61,7 +97,7 @@ int create_default_frame(task_t* task, entry_point_t ep, void* arg) uint32_t id; if (BUILTIN_EXPECT(!task, 0)) - return -1; + return -EINVAL; id = task->id; /* reset buffers */ @@ -90,7 +126,7 @@ int create_default_frame(task_t* task, entry_point_t ep, void* arg) /* setup for the kernel stack frame */ task_state_segments[id].ss0 = 0x10; - task_state_segments[id].esp0 = task_state_segments[id].esp; + task_state_segments[id].esp0 = (uint32_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t); return 0; } diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c index 6f20ac50..a8c6a8ad 100644 --- a/arch/x86/mm/page.c +++ b/arch/x86/mm/page.c @@ -63,14 +63,70 @@ int get_boot_pgd(task_t* task) return 0; } -int create_pgd(task_t* task) +/* + * TODO: We create a full copy of the current. Copy-On-Access will be the better solution. + */ +inline static size_t copy_page_table(uint32_t pgd_index, page_table_t* pgt, int* counter) +{ + uint32_t i; + page_table_t* new_pgt; + size_t viraddr, phyaddr; + task_t* curr_task = per_core(current_task); + + if (BUILTIN_EXPECT(!pgt, 0)) + return 0; + + new_pgt = kmalloc(sizeof(page_table_t)); + if (!new_pgt) + return 0; + memset(new_pgt, 0, sizeof(page_table_t)); + if (counter) + (*counter)++; + + for(i=0; i<1024; i++) { + if (pgt->entries[i] & 0xFFFFF000) { + phyaddr = get_page(); + if (!phyaddr) + continue; + if (counter) + (*counter)++; + + viraddr = map_region(0, phyaddr, 1, MAP_KERNEL_SPACE); + if (!viraddr) { + put_page(phyaddr); + continue; + } + + memcpy((void*) viraddr, (void*) ((pgd_index << 22) | (i << 12)), PAGE_SIZE); + + new_pgt->entries[i] = phyaddr | (pgt->entries[i] & 0xFFF); + + // only the child use the copy => unmap copy + if (!vm_free(viraddr, 1)) + atomic_int32_sub(&curr_task->mem_usage, 1); + + } + } + + phyaddr = virt_to_phys((size_t)new_pgt); + + // only the child use the copy => unmap copy + if (!vm_free((size_t)new_pgt, 1)) + atomic_int32_sub(&curr_task->mem_usage, 1); + + return phyaddr; +} + +int create_pgd(task_t* task, int copy) { page_dir_t* pgd; page_table_t* pgt; page_table_t* pgt_container; uint32_t i; uint32_t index1, index2; - size_t viraddr; + size_t viraddr, phyaddr; + int counter = 0; + task_t* curr_task = per_core(current_task); if (BUILTIN_EXPECT(!paging_enabled, 0)) return -EINVAL; @@ -84,6 +140,7 @@ int create_pgd(task_t* task) if (!pgd) return -ENOMEM; memset(pgd, 0, sizeof(page_dir_t)); + counter++; // create a new "page table container" for the new task pgt = kmalloc(sizeof(page_table_t)); @@ -92,6 +149,7 @@ int create_pgd(task_t* task) return -ENOMEM; } memset(pgt, 0, sizeof(page_table_t)); + counter++; for(i=0; ientries[i] = boot_pgd.entries[i]; @@ -110,7 +168,25 @@ int create_pgd(task_t* task) task->pgd = pgd; - return 0; + if (copy) { + for (i=KERNEL_SPACE/(1024*PAGE_SIZE); i<1024; i++) { + if (!(curr_task->pgd->entries[i] & 0xFFFFF000)) + continue; + + kprintf("i %d\n", i); + phyaddr = copy_page_table(i, (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + i*PAGE_SIZE) & 0xFFFFF000), &counter); + if (phyaddr) + pgd->entries[i] = phyaddr | (pgt_container->entries[i] & 0x00000FFF); + } + } + + // frees the virtual regions, because only the new child task need access to the new pgd and pgt + //if (!vm_free((size_t)pgt, 1)) + // atomic_int32_sub(&curr_task->mem_usage, 1); + //if (!vm_free((size_t)pgd, 1)) + // atomic_int32_sub(&curr_task->mem_usage, 1); + + return counter; } int drop_pgd(void) @@ -444,7 +520,7 @@ int print_paging_tree(size_t viraddr) static void pagefault_handler(struct state *s) { - kprintf("PAGE FAULT: Task %u got page fault at irq %u\n", per_core(current_task)->id, s->int_no); + kprintf("PAGE FAULT: Task %u got page fault at %p (irq 0x%x)\n", per_core(current_task)->id, read_cr2(), s->int_no); kprintf("Register state: eax = 0x%x, ebx = 0x%x, ecx = 0x%x, edx = 0x%x, edi = 0x%x, esi = 0x%x, ebp = 0x%x, esp = 0x%x\n", s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp); diff --git a/include/metalsvm/syscall.h b/include/metalsvm/syscall.h index c04d0c78..71863ade 100644 --- a/include/metalsvm/syscall.h +++ b/include/metalsvm/syscall.h @@ -39,6 +39,7 @@ extern "C" { #define __NR_fstat 10 #define __NR_sbrk 11 #define __NR_fork 12 +#define __NR_wait 13 #ifdef __cplusplus } diff --git a/include/metalsvm/tasks.h b/include/metalsvm/tasks.h index 19e13e6b..b84fc6fd 100644 --- a/include/metalsvm/tasks.h +++ b/include/metalsvm/tasks.h @@ -40,8 +40,8 @@ int create_kernel_task(tid_t*, entry_point_t, void*); /* create a user level task. if sz is zero, the task with the default stack size will be created */ int create_user_task(tid_t* id, size_t sz, const char* filename, int argc, char** argv); -/* until the task id is runnint, the current task is block */ -int join_task(tid_t id, int* result); +/* until a child task is terminated, the current task is block */ +tid_t wait(int32_t* result); /* timer interrupt use this function for task switching */ void scheduler(void); diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h index 985d724f..6e2c3d05 100644 --- a/include/metalsvm/tasks_types.h +++ b/include/metalsvm/tasks_types.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #ifdef __cplusplus @@ -37,18 +38,18 @@ extern "C" { #define TASK_IDLE 5 typedef int (STDCALL *entry_point_t)(void*); -struct mailbox_int32; struct page_dir; typedef struct task { tid_t id; /* task id = position in the task table */ uint32_t status; atomic_int32_t mem_usage; /* in number of pages */ - struct spinlock pgd_lock; /* avoids concurrent access to the page directoriy */ + spinlock_t pgd_lock; /* avoids concurrent access to the page directoriy */ struct page_dir* pgd; /* pointer to the page directory */ spinlock_t vma_lock; vma_t* vma_list; - struct mailbox_int32* mbox[MAX_TASKS]; + mailbox_wait_msg_t inbox; + mailbox_wait_msg_t* outbox[MAX_TASKS]; } __attribute__((packed)) task_t; #ifdef __cplusplus diff --git a/kernel/syscall.c b/kernel/syscall.c index d79eae6f..64ed5f6f 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -70,6 +70,12 @@ int syscall_handler(uint32_t sys_nr, ...) case __NR_fork: ret = sys_fork(); break; + case __NR_wait: { + int32_t* status = va_arg(vl, int32_t*); + + ret = wait(status); + break; + } case __NR_fstat: default: kputs("invalid system call\n"); diff --git a/kernel/tasks.c b/kernel/tasks.c index bd8440ed..ef64020c 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -44,18 +44,15 @@ task_t* get_current_task(void) { } int multitasking_init(void) { - unsigned int i; - - for(i=0; iid, result }; unsigned int i; spinlock_lock_irqsave(&table_lock); /* wake up blocked tasks */ for(i=0; imbox[i]) { - mailbox_int32_post(per_core(current_task)->mbox[i], result); - per_core(current_task)->mbox[i] = NULL; + if (per_core(current_task)->outbox[i]) { + mailbox_wait_msg_post(per_core(current_task)->outbox[i], tmp); + per_core(current_task)->outbox[i] = NULL; } } @@ -137,24 +135,94 @@ static int create_task(tid_t* id, entry_point_t ep, void* arg) for(i=0; iid] = &per_core(current_task)->inbox; if (id) *id = i; ret = create_default_frame(task_table+i, ep, arg); + + task_table[i].status = TASK_READY; + break; + } + } + +create_task_out: + spinlock_unlock_irqsave(&table_lock); + + return ret; +} + +int sys_fork(void) +{ + int ret = -ENOMEM; + unsigned int i; + task_t* parent = per_core(current_task); + + spinlock_lock_irqsave(&table_lock); + + for(i=0; ivma_list; + vma_t* tmp = NULL; + + while(parent) { + *child = (vma_t*) kmalloc(sizeof(vma_t)); + if (BUILTIN_EXPECT(!child, 0)) + break; + atomic_int32_inc(&task_table[i].mem_usage); + + (*child)->start = parent->start; + (*child)->end = parent->end; + (*child)->type = parent->type; + (*child)->prev = tmp; + (*child)->next = NULL; + + parent = parent->next; + tmp = *child; + child = &((*child)->next); + } + }*/ + + mailbox_wait_msg_init(&task_table[i].inbox); + memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS); + task_table[i].outbox[per_core(current_task)->id] = &per_core(current_task)->inbox; + + ret = arch_fork(task_table+i); + + if (parent != per_core(current_task)) + return 0; // Oh, the new child! => leave function + + if (ret >= 0) { + task_table[i].status = TASK_READY; + ret = i; + } break; } } @@ -312,60 +380,23 @@ int create_user_task(tid_t* id, size_t sz, const char* fname, int argc, char** a return create_task(id, user_entry, node); } -int sys_fork(void) +tid_t wait(int32_t* result) { - return -EINVAL; -} - -int join_task(tid_t id, int* result) -{ - int32_t tmp; - mailbox_int32_t mbox; - - mailbox_int32_init(&mbox); - - spinlock_lock_irqsave(&table_lock); + wait_msg_t tmp = { -1, -1}; /* * idle tasks are not allowed to wait for another task * they should always run... */ if (BUILTIN_EXPECT(per_core(current_task)->status == TASK_IDLE, 0)) - goto join_out; + return -EINVAL; - /* a task is not able to wait for itself */ - if (BUILTIN_EXPECT(per_core(current_task)->id == id, 0)) - goto join_out; - - /* invalid id */ - if (BUILTIN_EXPECT(id >= MAX_TASKS, 0)) - goto join_out; - - /* task already finished */ - if (BUILTIN_EXPECT(task_table[id].status == TASK_INVALID, 0)) - goto join_out; - - /* task already finished */ - if (BUILTIN_EXPECT(task_table[id].status == TASK_FINISHED, 0)) - goto join_out; - - task_table[id].mbox[per_core(current_task)->id] = &mbox; - - spinlock_unlock_irqsave(&table_lock); - - mailbox_int32_fetch(&mbox, &tmp); + mailbox_wait_msg_fetch(&per_core(current_task)->inbox, &tmp); if (result) - *result = tmp; + *result = tmp.result; - mailbox_int32_destroy(&mbox); - - return 0; - -join_out: - spinlock_unlock_irqsave(&table_lock); - mailbox_int32_destroy(&mbox); - return -EINVAL; + return tmp.id; } int wakeup_task(tid_t id) diff --git a/kernel/tests.c b/kernel/tests.c index 26140de1..d76bbe6b 100644 --- a/kernel/tests.c +++ b/kernel/tests.c @@ -85,13 +85,17 @@ static int STDCALL foo(void* arg) static int STDCALL join_test(void* arg) { - tid_t id; - int ret, result = -1234; + tid_t id, ret; + int result = -1234; - ret = create_kernel_task(&id, foo, "Hello from foo2\n"); - kprintf("Wait for task %u: ret = %d\n", id, ret); - ret = join_task(id, &result); - kprintf("Task %u finished: ret = %d, result = %d\n", id, ret, result); + create_kernel_task(&id, foo, "Hello from foo2\n"); + + kprintf("Wait for child %u\n", id); + do { + ret = wait(&result); + } while(ret != id); + + kprintf("Child %u finished: result = %d\n", id, result); return 0; } @@ -105,11 +109,11 @@ int test_init(void) mailbox_int32_init(&mbox); create_kernel_task(NULL, foo, "Hello from foo1\n"); - //create_kernel_task(NULL, join_test, NULL); + create_kernel_task(NULL, join_test, NULL); //create_kernel_task(NULL, producer, NULL); //create_kernel_task(NULL, consumer, NULL); create_user_task(NULL, 8192, "/bin/hello", 1, argv); - create_user_task(NULL, 8192, "/bin/test_fork", 1, argv); + //create_user_task(NULL, 8192, "/bin/tests", 1, argv); return 0; }