First steps to support the system calls "fork" and "wait"

- Currently, the system call "fork" doesn't work and has a memory leak
- However, it is a good starting point for further developments.
This commit is contained in:
Stefan Lankes 2011-03-02 13:49:36 +01:00
parent e5031e872e
commit 5ce3ed9338
13 changed files with 268 additions and 121 deletions

View file

@ -103,7 +103,7 @@ int get_boot_pgd(task_t* task);
/*
* Setup a new page directory for a new user-level task
*/
int create_pgd(task_t* task);
int create_pgd(task_t* task, int copy);
/*
* Delete page directory and its page tables

View file

@ -96,6 +96,12 @@ static inline void write_cr0(uint32_t val) {
asm volatile("mov %0, %%cr0" : : "r"(val));
}
static inline uint32_t read_cr2(void) {
uint32_t val;
asm volatile("mov %%cr2, %0" : "=r"(val));
return val;
}
static inline uint32_t read_cr3(void) {
uint32_t val;
asm volatile("mov %%cr3, %0" : "=r"(val));
@ -119,7 +125,14 @@ static inline void tlb_flush(void)
write_cr3(val);
}
void read_eip(void);
static inline uint32_t read_eflags(void)
{
uint32_t result;
asm volatile ("pushf; popl %%eax" : "=a"(result) :: "memory");
return result;
}
uint32_t read_eip(void);
/*
* invalidate (not flush!) lines in L1 that map to MPB lines

View file

@ -19,7 +19,6 @@ extern "C" {
#endif
#ifdef HAVE_ARCH_MEMCPY
#if 0
inline static void *memcpy(void *dest, const void *src, size_t count)
{
int32_t i, j, k;
@ -37,31 +36,6 @@ inline static void *memcpy(void *dest, const void *src, size_t count)
return dest;
}
#else
inline static void *memcpy(void *dest, const void *src, size_t count)
{
int32_t h, i, j, k, l, m;
if (BUILTIN_EXPECT(!dest || !src, 0))
return dest;
asm volatile (
"cld;\n\t"
"1: cmpl $0, %%eax ; je 2f\n\t"
"movl (%%edi), %%edx\n\t"
"movl $8, %%ecx\n\t"
"rep ; movsl\n\t"
"dec %%eax ; jmp 1b\n\t"
"2: movl (%%edi), %%edx\n\t"
"movl %%ebx, %%ecx\n\t"
"andl $31, %%ecx\n\t"
"rep ; movsb\n\t"
: "=&a"(h), "=&D"(i), "=&S"(j), "=&b"(k), "=&c"(l), "=&d"(m)
: "0"(count/32), "1"(dest), "2"(src), "3"(count) : "memory");
return dest;
}
#endif
#endif
#ifdef HAVE_ARCH_MEMSET

View file

@ -24,6 +24,11 @@
#include <metalsvm/stddef.h>
#include <metalsvm/tasks_types.h>
#ifdef __cplusplus
extern "C" {
#endif
int arch_fork(task_t* task);
int create_default_frame(task_t* task, entry_point_t ep, void* arg);
int register_task(task_t* task);
void reschedule(void);
@ -37,4 +42,10 @@ static inline int jump_to_user_code(uint32_t ep, uint32_t stack)
return 0;
}
int jump_to_child(void);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -54,11 +54,13 @@ mboot:
; dd end
; dd start
extern default_stack_pointer
SECTION .text
ALIGN 4
stublet:
; initialize stack pointer.
mov esp, _sys_stack-4
mov esp, default_stack_pointer
; enable cache and turn on FPU exceptions
mov eax, cr0
; enable cache
@ -820,12 +822,4 @@ irq_common_stub:
add esp, 8
iret
; Here is the definition of our BSS section. Right now, we'll use
; it just to store the stack. Remember that a stack actually grows
; downwards, so we declare the size of the data before declaring
; the identifier '_sys_stack'
SECTION .bss
resb 8192 ; This reserves 8KBytes of memory here
_sys_stack:
SECTION .note.GNU-stack noalloc noexec nowrite progbits

View file

@ -21,6 +21,7 @@
#include <metalsvm/stdlib.h>
#include <metalsvm/tasks.h>
#include <metalsvm/errno.h>
#include <metalsvm/processor.h>
#include <asm/gdt.h>
#include <asm/tss.h>
#include <asm/page.h>
@ -30,6 +31,7 @@ static tss_t task_state_segments[MAX_TASKS] __attribute__ ((aligned (4096)));
// currently, our kernel has full access to the ioports
static gdt_entry_t gdt[GDT_ENTRIES] = {[0 ... GDT_ENTRIES-1] = {0, 0, 0, 0, 0, 0}};
static unsigned char kstacks[MAX_TASKS][KERNEL_STACK_SIZE];
unsigned char* default_stack_pointer = kstacks[0] + KERNEL_STACK_SIZE - sizeof(size_t);
/*
* This is in start.asm. We use this to properly reload
@ -54,6 +56,40 @@ int register_task(task_t* task) {
return 0;
}
int arch_fork(task_t* task)
{
uint32_t id;
task_t* curr_task = per_core(current_task);
if (BUILTIN_EXPECT(!task, 0))
return -EINVAL;
id = task->id;
memcpy(task_state_segments+id, task_state_segments+curr_task->id, sizeof(tss_t));
task_state_segments[id].cr3 = (uint32_t) (virt_to_phys((size_t)task->pgd));
task_state_segments[id].eflags = read_eflags();
task_state_segments[id].esp0 = (uint32_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t);
asm volatile ("pusha" ::: "%esp");
memcpy(kstacks[id], kstacks[curr_task->id], KERNEL_STACK_SIZE);
asm volatile("mov %%esp, %0" : "=r"(task_state_segments[id].esp));
if (id > curr_task->id)
task_state_segments[id].esp += (id - curr_task->id) * KERNEL_STACK_SIZE;
else
task_state_segments[id].esp -= (curr_task->id - id) * KERNEL_STACK_SIZE;
// This will be the entry point for the new task.
task_state_segments[id].eip = read_eip();
kputs("A\n");
asm volatile ("popa" ::: "%esp");
kputs("B\n");
return 0;
}
int create_default_frame(task_t* task, entry_point_t ep, void* arg)
{
uint16_t cs = 0x08;
@ -61,7 +97,7 @@ int create_default_frame(task_t* task, entry_point_t ep, void* arg)
uint32_t id;
if (BUILTIN_EXPECT(!task, 0))
return -1;
return -EINVAL;
id = task->id;
/* reset buffers */
@ -90,7 +126,7 @@ int create_default_frame(task_t* task, entry_point_t ep, void* arg)
/* setup for the kernel stack frame */
task_state_segments[id].ss0 = 0x10;
task_state_segments[id].esp0 = task_state_segments[id].esp;
task_state_segments[id].esp0 = (uint32_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t);
return 0;
}

View file

@ -63,14 +63,70 @@ int get_boot_pgd(task_t* task)
return 0;
}
int create_pgd(task_t* task)
/*
* TODO: We create a full copy of the current. Copy-On-Access will be the better solution.
*/
inline static size_t copy_page_table(uint32_t pgd_index, page_table_t* pgt, int* counter)
{
uint32_t i;
page_table_t* new_pgt;
size_t viraddr, phyaddr;
task_t* curr_task = per_core(current_task);
if (BUILTIN_EXPECT(!pgt, 0))
return 0;
new_pgt = kmalloc(sizeof(page_table_t));
if (!new_pgt)
return 0;
memset(new_pgt, 0, sizeof(page_table_t));
if (counter)
(*counter)++;
for(i=0; i<1024; i++) {
if (pgt->entries[i] & 0xFFFFF000) {
phyaddr = get_page();
if (!phyaddr)
continue;
if (counter)
(*counter)++;
viraddr = map_region(0, phyaddr, 1, MAP_KERNEL_SPACE);
if (!viraddr) {
put_page(phyaddr);
continue;
}
memcpy((void*) viraddr, (void*) ((pgd_index << 22) | (i << 12)), PAGE_SIZE);
new_pgt->entries[i] = phyaddr | (pgt->entries[i] & 0xFFF);
// only the child use the copy => unmap copy
if (!vm_free(viraddr, 1))
atomic_int32_sub(&curr_task->mem_usage, 1);
}
}
phyaddr = virt_to_phys((size_t)new_pgt);
// only the child use the copy => unmap copy
if (!vm_free((size_t)new_pgt, 1))
atomic_int32_sub(&curr_task->mem_usage, 1);
return phyaddr;
}
int create_pgd(task_t* task, int copy)
{
page_dir_t* pgd;
page_table_t* pgt;
page_table_t* pgt_container;
uint32_t i;
uint32_t index1, index2;
size_t viraddr;
size_t viraddr, phyaddr;
int counter = 0;
task_t* curr_task = per_core(current_task);
if (BUILTIN_EXPECT(!paging_enabled, 0))
return -EINVAL;
@ -84,6 +140,7 @@ int create_pgd(task_t* task)
if (!pgd)
return -ENOMEM;
memset(pgd, 0, sizeof(page_dir_t));
counter++;
// create a new "page table container" for the new task
pgt = kmalloc(sizeof(page_table_t));
@ -92,6 +149,7 @@ int create_pgd(task_t* task)
return -ENOMEM;
}
memset(pgt, 0, sizeof(page_table_t));
counter++;
for(i=0; i<KERNEL_SPACE/(1024*PAGE_SIZE); i++) {
pgd->entries[i] = boot_pgd.entries[i];
@ -110,7 +168,25 @@ int create_pgd(task_t* task)
task->pgd = pgd;
return 0;
if (copy) {
for (i=KERNEL_SPACE/(1024*PAGE_SIZE); i<1024; i++) {
if (!(curr_task->pgd->entries[i] & 0xFFFFF000))
continue;
kprintf("i %d\n", i);
phyaddr = copy_page_table(i, (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + i*PAGE_SIZE) & 0xFFFFF000), &counter);
if (phyaddr)
pgd->entries[i] = phyaddr | (pgt_container->entries[i] & 0x00000FFF);
}
}
// frees the virtual regions, because only the new child task need access to the new pgd and pgt
//if (!vm_free((size_t)pgt, 1))
// atomic_int32_sub(&curr_task->mem_usage, 1);
//if (!vm_free((size_t)pgd, 1))
// atomic_int32_sub(&curr_task->mem_usage, 1);
return counter;
}
int drop_pgd(void)
@ -444,7 +520,7 @@ int print_paging_tree(size_t viraddr)
static void pagefault_handler(struct state *s)
{
kprintf("PAGE FAULT: Task %u got page fault at irq %u\n", per_core(current_task)->id, s->int_no);
kprintf("PAGE FAULT: Task %u got page fault at %p (irq 0x%x)\n", per_core(current_task)->id, read_cr2(), s->int_no);
kprintf("Register state: eax = 0x%x, ebx = 0x%x, ecx = 0x%x, edx = 0x%x, edi = 0x%x, esi = 0x%x, ebp = 0x%x, esp = 0x%x\n",
s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp);

View file

@ -39,6 +39,7 @@ extern "C" {
#define __NR_fstat 10
#define __NR_sbrk 11
#define __NR_fork 12
#define __NR_wait 13
#ifdef __cplusplus
}

View file

@ -40,8 +40,8 @@ int create_kernel_task(tid_t*, entry_point_t, void*);
/* create a user level task. if sz is zero, the task with the default stack size will be created */
int create_user_task(tid_t* id, size_t sz, const char* filename, int argc, char** argv);
/* until the task id is runnint, the current task is block */
int join_task(tid_t id, int* result);
/* until a child task is terminated, the current task is block */
tid_t wait(int32_t* result);
/* timer interrupt use this function for task switching */
void scheduler(void);

View file

@ -23,6 +23,7 @@
#include <metalsvm/stddef.h>
#include <metalsvm/vma.h>
#include <metalsvm/spinlock_types.h>
#include <metalsvm/mailbox_types.h>
#include <asm/atomic.h>
#ifdef __cplusplus
@ -37,18 +38,18 @@ extern "C" {
#define TASK_IDLE 5
typedef int (STDCALL *entry_point_t)(void*);
struct mailbox_int32;
struct page_dir;
typedef struct task {
tid_t id; /* task id = position in the task table */
uint32_t status;
atomic_int32_t mem_usage; /* in number of pages */
struct spinlock pgd_lock; /* avoids concurrent access to the page directoriy */
spinlock_t pgd_lock; /* avoids concurrent access to the page directoriy */
struct page_dir* pgd; /* pointer to the page directory */
spinlock_t vma_lock;
vma_t* vma_list;
struct mailbox_int32* mbox[MAX_TASKS];
mailbox_wait_msg_t inbox;
mailbox_wait_msg_t* outbox[MAX_TASKS];
} __attribute__((packed)) task_t;
#ifdef __cplusplus

View file

@ -70,6 +70,12 @@ int syscall_handler(uint32_t sys_nr, ...)
case __NR_fork:
ret = sys_fork();
break;
case __NR_wait: {
int32_t* status = va_arg(vl, int32_t*);
ret = wait(status);
break;
}
case __NR_fstat:
default:
kputs("invalid system call\n");

View file

@ -44,18 +44,15 @@ task_t* get_current_task(void) {
}
int multitasking_init(void) {
unsigned int i;
for(i=0; i<MAX_TASKS; i++) {
if (task_table[i].status == TASK_INVALID) {
task_table[i].id = i;
task_table[i].status = TASK_RUNNING;
atomic_int32_set(&task_table[i].mem_usage, 0);
memset(task_table[i].mbox, 0x00, sizeof(mailbox_int32_t*)*MAX_TASKS);
per_core(current_task) = task_table+i;
get_boot_pgd(task_table+i);
return 0;
}
if (task_table[0].status == TASK_INVALID) {
task_table[0].id = 0;
task_table[0].status = TASK_RUNNING;
atomic_int32_set(&task_table[0].mem_usage, 0);
mailbox_wait_msg_init(&task_table[0].inbox);
memset(task_table[0].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
per_core(current_task) = task_table+0;
get_boot_pgd(task_table+0);
return 0;
}
return -ENOMEM;
@ -63,15 +60,16 @@ int multitasking_init(void) {
static void wakeup_blocked_tasks(int result)
{
wait_msg_t tmp = { per_core(current_task)->id, result };
unsigned int i;
spinlock_lock_irqsave(&table_lock);
/* wake up blocked tasks */
for(i=0; i<MAX_TASKS; i++) {
if (per_core(current_task)->mbox[i]) {
mailbox_int32_post(per_core(current_task)->mbox[i], result);
per_core(current_task)->mbox[i] = NULL;
if (per_core(current_task)->outbox[i]) {
mailbox_wait_msg_post(per_core(current_task)->outbox[i], tmp);
per_core(current_task)->outbox[i] = NULL;
}
}
@ -137,24 +135,94 @@ static int create_task(tid_t* id, entry_point_t ep, void* arg)
for(i=0; i<MAX_TASKS; i++) {
if (task_table[i].status == TASK_INVALID) {
ret = create_pgd(task_table+i);
if (ret != 0) {
ret = create_pgd(task_table+i, 0);
if (ret < 0) {
ret = -ENOMEM;
goto create_task_out;
}
task_table[i].id = i;
task_table[i].status = TASK_READY;
// user-level pgd needs already a page
atomic_int32_set(&task_table[i].mem_usage, 1);
// at least one page is already created for the pgd
atomic_int32_set(&task_table[i].mem_usage, ret);
spinlock_init(&task_table[i].vma_lock);
task_table[i].vma_list = NULL;
memset(task_table[i].mbox, 0x00, sizeof(mailbox_int32_t*)*MAX_TASKS);
task_table[i].vma_list = NULL;
mailbox_wait_msg_init(&task_table[i].inbox);
memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[i].outbox[per_core(current_task)->id] = &per_core(current_task)->inbox;
if (id)
*id = i;
ret = create_default_frame(task_table+i, ep, arg);
task_table[i].status = TASK_READY;
break;
}
}
create_task_out:
spinlock_unlock_irqsave(&table_lock);
return ret;
}
int sys_fork(void)
{
int ret = -ENOMEM;
unsigned int i;
task_t* parent = per_core(current_task);
spinlock_lock_irqsave(&table_lock);
for(i=0; i<MAX_TASKS; i++) {
if (task_table[i].status == TASK_INVALID) {
ret = create_pgd(task_table+i, 1);
if (ret < 0) {
ret = -ENOMEM;
goto create_task_out;
}
task_table[i].id = i;
// at least one page is already created for the pgd
atomic_int32_set(&task_table[i].mem_usage, ret);
spinlock_init(&task_table[i].vma_lock);
task_table[i].vma_list = NULL;
/*if (copy) {
vma_t** child = &task_table[i].vma_list;
vma_t* parent = per_core(current_task)->vma_list;
vma_t* tmp = NULL;
while(parent) {
*child = (vma_t*) kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!child, 0))
break;
atomic_int32_inc(&task_table[i].mem_usage);
(*child)->start = parent->start;
(*child)->end = parent->end;
(*child)->type = parent->type;
(*child)->prev = tmp;
(*child)->next = NULL;
parent = parent->next;
tmp = *child;
child = &((*child)->next);
}
}*/
mailbox_wait_msg_init(&task_table[i].inbox);
memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[i].outbox[per_core(current_task)->id] = &per_core(current_task)->inbox;
ret = arch_fork(task_table+i);
if (parent != per_core(current_task))
return 0; // Oh, the new child! => leave function
if (ret >= 0) {
task_table[i].status = TASK_READY;
ret = i;
}
break;
}
}
@ -312,60 +380,23 @@ int create_user_task(tid_t* id, size_t sz, const char* fname, int argc, char** a
return create_task(id, user_entry, node);
}
int sys_fork(void)
tid_t wait(int32_t* result)
{
return -EINVAL;
}
int join_task(tid_t id, int* result)
{
int32_t tmp;
mailbox_int32_t mbox;
mailbox_int32_init(&mbox);
spinlock_lock_irqsave(&table_lock);
wait_msg_t tmp = { -1, -1};
/*
* idle tasks are not allowed to wait for another task
* they should always run...
*/
if (BUILTIN_EXPECT(per_core(current_task)->status == TASK_IDLE, 0))
goto join_out;
return -EINVAL;
/* a task is not able to wait for itself */
if (BUILTIN_EXPECT(per_core(current_task)->id == id, 0))
goto join_out;
/* invalid id */
if (BUILTIN_EXPECT(id >= MAX_TASKS, 0))
goto join_out;
/* task already finished */
if (BUILTIN_EXPECT(task_table[id].status == TASK_INVALID, 0))
goto join_out;
/* task already finished */
if (BUILTIN_EXPECT(task_table[id].status == TASK_FINISHED, 0))
goto join_out;
task_table[id].mbox[per_core(current_task)->id] = &mbox;
spinlock_unlock_irqsave(&table_lock);
mailbox_int32_fetch(&mbox, &tmp);
mailbox_wait_msg_fetch(&per_core(current_task)->inbox, &tmp);
if (result)
*result = tmp;
*result = tmp.result;
mailbox_int32_destroy(&mbox);
return 0;
join_out:
spinlock_unlock_irqsave(&table_lock);
mailbox_int32_destroy(&mbox);
return -EINVAL;
return tmp.id;
}
int wakeup_task(tid_t id)

View file

@ -85,13 +85,17 @@ static int STDCALL foo(void* arg)
static int STDCALL join_test(void* arg)
{
tid_t id;
int ret, result = -1234;
tid_t id, ret;
int result = -1234;
ret = create_kernel_task(&id, foo, "Hello from foo2\n");
kprintf("Wait for task %u: ret = %d\n", id, ret);
ret = join_task(id, &result);
kprintf("Task %u finished: ret = %d, result = %d\n", id, ret, result);
create_kernel_task(&id, foo, "Hello from foo2\n");
kprintf("Wait for child %u\n", id);
do {
ret = wait(&result);
} while(ret != id);
kprintf("Child %u finished: result = %d\n", id, result);
return 0;
}
@ -105,11 +109,11 @@ int test_init(void)
mailbox_int32_init(&mbox);
create_kernel_task(NULL, foo, "Hello from foo1\n");
//create_kernel_task(NULL, join_test, NULL);
create_kernel_task(NULL, join_test, NULL);
//create_kernel_task(NULL, producer, NULL);
//create_kernel_task(NULL, consumer, NULL);
create_user_task(NULL, 8192, "/bin/hello", 1, argv);
create_user_task(NULL, 8192, "/bin/test_fork", 1, argv);
//create_user_task(NULL, 8192, "/bin/tests", 1, argv);
return 0;
}