From 06877ff1087a57dc3c51dacfbe4d9a5cdaecfedb Mon Sep 17 00:00:00 2001 From: Steffen Vogel Date: Mon, 18 Nov 2013 15:47:26 +0100 Subject: [PATCH 01/21] bitmap cleanup --- include/metalsvm/mmu.h | 24 +++--- mm/memory.c | 180 ++++++++++++++++++++--------------------- 2 files changed, 105 insertions(+), 99 deletions(-) diff --git a/include/metalsvm/mmu.h b/include/metalsvm/mmu.h index 4db5a367..71cfab64 100644 --- a/include/metalsvm/mmu.h +++ b/include/metalsvm/mmu.h @@ -49,33 +49,39 @@ extern atomic_int32_t total_available_pages; */ int mmu_init(void); -/** @brief get continuous pages +/** @brief Get continuous pages * - * This function finds a continuous page region (first fit algorithm) - * - * @param no_pages Desired number of pages + * Use first fit algorithm to find a suitable, continous physical memory region * + * @param npages Desired number of pages * @return * - physical address on success * - 0 on failure */ -size_t get_pages(uint32_t no_pages); +size_t get_pages(uint32_t npages); -/** @brief get a single page +/** @brief Get a single page * * Convenience function: uses get_pages(1); */ static inline size_t get_page(void) { return get_pages(1); } -/** @brief Put back a page after use +/** @brief Put back a sequence of continous pages * - * @param phyaddr Physical address to put back + * @param phyaddr Physical address of the first page + * @param npages Number of pages * * @return * - 0 on success * - -EINVAL (-22) on failure */ -int put_page(size_t phyaddr); +int put_pages(size_t phyaddr, size_t npages); + +/** @brief Put a single page + * + * Convenience function: uses put_pages(1); + */ +static inline int put_page(size_t phyaddr) { return put_pages(phyaddr, 1); } #ifdef __cplusplus } diff --git a/mm/memory.c b/mm/memory.c index 2b6a4c51..8c7a363c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -37,17 +37,15 @@ #endif /* - * 0 => free - * 1 => occupied - * - * Set whole address space as occupied + * Set whole address space as occupied: + * 0 => free, 1 => occupied */ -static uint8_t bitmap[BITMAP_SIZE]; // = {[0 ... BITMAP_SIZE-1] = 0xFF}; -static spinlock_t bitmap_lock = SPINLOCK_INIT; -static size_t alloc_start; -atomic_int32_t total_pages = ATOMIC_INIT(0); -atomic_int32_t total_allocated_pages = ATOMIC_INIT(0); -atomic_int32_t total_available_pages = ATOMIC_INIT(0); +static uint8_t bitmap[BITMAP_SIZE] = {[0 ... BITMAP_SIZE-1] = 0xFF}; +static spinlock_t bitmap_lock = SPINLOCK_INIT; + +atomic_int32_t total_pages = ATOMIC_INIT(0); +atomic_int32_t total_allocated_pages = ATOMIC_INIT(0); +atomic_int32_t total_available_pages = ATOMIC_INIT(0); /* * Note that linker symbols are not variables, they have no memory allocated for @@ -74,8 +72,8 @@ inline static void page_set_mark(size_t i) size_t index = i >> 3; size_t mod = i & 0x7; - //if (page_marked(i)) - // kprintf("page %u is already marked\n", i); + if (page_marked(i)) + kprintf("page_set_mark(%u): already marked\n", i); bitmap[index] = bitmap[index] | (1 << mod); } @@ -86,11 +84,90 @@ inline static void page_clear_mark(size_t i) size_t mod = i % 8; if (page_unmarked(i)) - kprintf("page %u is already unmarked\n", i); + kprintf("page_clear_mark(%u): already unmarked\n", i); bitmap[index] = bitmap[index] & ~(1 << mod); } +size_t get_pages(uint32_t npages) +{ + // skip first page + static size_t start = 1; + + uint32_t i, j, l; + uint32_t k = 0; + size_t ret = 0; + + if (BUILTIN_EXPECT(!npages, 0)) + return ret; + + if (BUILTIN_EXPECT(npages > atomic_int32_read(&total_available_pages), 0)) + return ret; + + spinlock_lock(&bitmap_lock); + i = start; +next_try: + while((k < BITMAP_SIZE) && page_marked(i)) { + k++; + i = (i+1) & (BITMAP_SIZE-1); + } + + if (k >= BITMAP_SIZE) + goto oom; + + for(j=1; (j= BITMAP_SIZE) { + i = 1; + goto next_try; + } + + if (k >= BITMAP_SIZE) + goto oom; + + ret = i*PAGE_SIZE; + kprintf("get_pages: ret 0x%x, i = %d, j = %d, npages = %d\n", ret, i, j, npages); // TODO: remove + for(l=i; l> PAGE_SHIFT; + + spinlock_lock(&bitmap_lock); + for (index=0; index atomic_int32_read(&total_available_pages), 0)) - return ret; - - spinlock_lock(&bitmap_lock); - i = alloc_start; -next_try: - while((k < BITMAP_SIZE) && page_marked(i)) { - k++; - i = (i+1) & (BITMAP_SIZE-1); - } - - if (k >= BITMAP_SIZE) - goto oom; - - for(j=1; (j= BITMAP_SIZE) { - i = 0; - goto next_try; - } - - if (k >= BITMAP_SIZE) - goto oom; - - ret = i*PAGE_SIZE; - //kprintf("alloc: ret 0x%x, i = %d, j = %d, npages = %d\n", ret, i, j, npages); - for(l=i; l> PAGE_SHIFT; - - if (BUILTIN_EXPECT(!phyaddr, 0)) - return -EINVAL; - - spinlock_lock(&bitmap_lock); - page_clear_mark(index); - spinlock_unlock(&bitmap_lock); - - atomic_int32_sub(&total_allocated_pages, 1); - atomic_int32_add(&total_available_pages, 1); - - return 0; -} - void* mem_allocation(size_t sz, uint32_t flags) { size_t phyaddr, viraddr; From 0d7aa3d0ca6ee3d18bf5ccc818ce294c9d4264ec Mon Sep 17 00:00:00 2001 From: Steffen Vogel Date: Wed, 20 Nov 2013 11:15:10 +0100 Subject: [PATCH 02/21] allow memory initialization without multiboot mmap --- mm/memory.c | 53 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 8c7a363c..411f9161 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -175,34 +175,41 @@ int mmu_init(void) size_t addr; int ret = 0; - // at first, set default value of the bitmap - memset(bitmap, 0xFF, sizeof(uint8_t)*BITMAP_SIZE); - #ifdef CONFIG_MULTIBOOT - if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) { - size_t end_addr; - multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) ((size_t) mb_info->mmap_addr); - multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length); + if (mb_info) { + if (mb_info->flags & MULTIBOOT_INFO_MEM_MAP) { + multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) ((size_t) mb_info->mmap_addr); + multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length); - while (mmap < mmap_end) { - if (mmap->type == MULTIBOOT_MEMORY_AVAILABLE) { - // set the available memory as "unused" - addr = mmap->addr; - end_addr = addr + mmap->len; - - while (addr < end_addr) { - page_clear_mark(addr >> PAGE_SHIFT); - addr += PAGE_SIZE; - atomic_int32_inc(&total_pages); - atomic_int32_inc(&total_available_pages); + // mark available memory as free + while (mmap < mmap_end) { + if (mmap->type == MULTIBOOT_MEMORY_AVAILABLE) { + for (addr=mmap->addr; addr < mmap->addr + mmap->len; addr += PAGE_SIZE) { + page_clear_mark(addr >> PAGE_SHIFT); + atomic_int32_inc(&total_pages); + atomic_int32_inc(&total_available_pages); + } } + mmap++; } - mmap++; } - } else { - kputs("Unable to initialize the memory management subsystem\n"); - while(1) { - HALT; + else if (mb_info->flags & MULTIBOOT_INFO_MEM) { + size_t page; + size_t pages_lower = mb_info->mem_lower >> 2; + size_t pages_upper = mb_info->mem_upper >> 2; + + for (page=0; page Date: Wed, 20 Nov 2013 11:18:10 +0100 Subject: [PATCH 03/21] some rewrite of mmu_init concerning the initialization of the memory bitmap --- mm/memory.c | 71 ++++++++++++++++++++++++++--------------------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 411f9161..56fb7ae2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -170,7 +170,6 @@ int put_pages(size_t phyaddr, size_t npages) int mmu_init(void) { - size_t kernel_size; unsigned int i; size_t addr; int ret = 0; @@ -211,6 +210,20 @@ int mmu_init(void) kputs("Unable to initialize the memory management subsystem\n"); while (1) HALT; } + + // mark mb_info as used + page_set_mark((size_t) mb_info >> PAGE_SHIFT); + atomic_int32_inc(&total_allocated_pages); + atomic_int32_dec(&total_available_pages); + + // mark modules list as used + if (mb_info->flags & MULTIBOOT_INFO_MODS) { + for(addr=mb_info->mods_addr; addrmods_addr+mb_info->mods_count*sizeof(multiboot_module_t); addr+=PAGE_SIZE) { + page_set_mark(addr >> PAGE_SHIFT); + atomic_int32_inc(&total_allocated_pages); + atomic_int32_dec(&total_available_pages); + } + } } #elif defined(CONFIG_ROCKCREEK) // of course, the first slots belong to the private memory @@ -219,7 +232,7 @@ int mmu_init(void) if (addr > addr + PAGE_SIZE) break; atomic_int32_inc(&total_pages); - atomic_int32_inc(&total_available_pages); + atomic_int32_inc(&total_available_pages); } // Note: The last slot belongs always to the private memory. @@ -240,28 +253,20 @@ int mmu_init(void) #error Currently, MetalSVM supports only the Multiboot specification or the RockCreek processor! #endif - kernel_size = (size_t) &kernel_end - (size_t) &kernel_start; - if (kernel_size & (PAGE_SIZE-1)) - kernel_size += PAGE_SIZE - (kernel_size & (PAGE_SIZE-1)); - atomic_int32_add(&total_allocated_pages, kernel_size >> PAGE_SHIFT); - atomic_int32_sub(&total_available_pages, kernel_size >> PAGE_SHIFT); - - // set kernel space as used - for(i=(size_t) &kernel_start >> PAGE_SHIFT; i < (size_t) &kernel_end >> PAGE_SHIFT; i++) - page_set_mark(i); - if ((size_t) &kernel_end & (PAGE_SIZE-1)) - page_set_mark(i); - - alloc_start = (size_t) &kernel_end >> PAGE_SHIFT; - if ((size_t) &kernel_end & (PAGE_SIZE-1)) - alloc_start++; + // mark kernel as used + for(addr=(size_t) &kernel_start; addr<(size_t) &kernel_end; addr+=PAGE_SIZE) { + page_set_mark(addr >> PAGE_SHIFT); + atomic_int32_inc(&total_allocated_pages); + atomic_int32_dec(&total_available_pages); + } #if MAX_CORES > 1 - // reserve physical page for SMP boot code page_set_mark(SMP_SETUP_ADDR >> PAGE_SHIFT); - atomic_int32_add(&total_allocated_pages, 1); - atomic_int32_sub(&total_available_pages, 1); + atomic_int32_inc(&total_allocated_pages); + atomic_int32_dec(&total_available_pages); #endif + + // enable paging and map SMP, VGA, Multiboot modules etc. ret = paging_init(); if (ret) { kprintf("Failed to initialize paging: %d\n", ret); @@ -273,25 +278,19 @@ int mmu_init(void) * Modules like the init ram disk are already loaded. * Therefore, we set these pages as used. */ - if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) { - multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr); + if (mb_info) { - // mark the mb_info as used. - page_set_mark((size_t)mb_info >> PAGE_SHIFT); - atomic_int32_inc(&total_allocated_pages); - atomic_int32_dec(&total_available_pages); + if (mb_info->flags & MULTIBOOT_INFO_MODS) { + multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr); - for(addr = mb_info->mods_addr; addr < mb_info->mods_addr + mb_info->mods_count * sizeof(multiboot_module_t); addr += PAGE_SIZE) { - page_set_mark(addr >> PAGE_SHIFT); - atomic_int32_inc(&total_allocated_pages); - atomic_int32_dec(&total_available_pages); - } - for(i=0; imods_count; i++, mmodule++) { - for(addr=mmodule->mod_start; addrmod_end; addr+=PAGE_SIZE) { - page_set_mark(addr >> PAGE_SHIFT); - atomic_int32_inc(&total_allocated_pages); - atomic_int32_dec(&total_available_pages); + for(i=0; imods_count; i++) { + + for(addr=mmodule[i].mod_start; addr> PAGE_SHIFT); + atomic_int32_inc(&total_allocated_pages); + atomic_int32_dec(&total_available_pages); + } } } } From 421e7ec66ea22165edfc89308bf4e600475f5542 Mon Sep 17 00:00:00 2001 From: Steffen Vogel Date: Wed, 20 Nov 2013 11:20:52 +0100 Subject: [PATCH 04/21] added missing multiboot #defines --- arch/x86/include/asm/multiboot.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/multiboot.h b/arch/x86/include/asm/multiboot.h index a4adae39..96d123cd 100644 --- a/arch/x86/include/asm/multiboot.h +++ b/arch/x86/include/asm/multiboot.h @@ -35,9 +35,11 @@ #ifdef CONFIG_MULTIBOOT -/* are there modules to do something with? */ +/// Does the bootloader provide mem_* fields? +#define MULTIBOOT_INFO_MEM 0x00000001 +/// Does the bootloader provide a list of modules? #define MULTIBOOT_INFO_MODS 0x00000008 -/* is there a full memory map? */ +/// Does the bootloader provide a full memory map? #define MULTIBOOT_INFO_MEM_MAP 0x00000040 typedef uint16_t multiboot_uint16_t; @@ -114,7 +116,6 @@ struct multiboot_info multiboot_uint16_t vbe_interface_off; multiboot_uint16_t vbe_interface_len; }; - typedef struct multiboot_info multiboot_info_t; struct multiboot_mmap_entry From 3cd5a5853b54327c5740788975edd04690f8b274 Mon Sep 17 00:00:00 2001 From: Steffen Vogel Date: Wed, 20 Nov 2013 11:26:55 +0100 Subject: [PATCH 05/21] added VMA list implementation --- include/metalsvm/vma.h | 72 +++++++-- mm/vma.c | 355 ++++++++++++++++++++++++++++++++++------- 2 files changed, 354 insertions(+), 73 deletions(-) diff --git a/include/metalsvm/vma.h b/include/metalsvm/vma.h index 74c63233..e6bf8b77 100644 --- a/include/metalsvm/vma.h +++ b/include/metalsvm/vma.h @@ -27,56 +27,102 @@ #define __VMA_H__ #include +#include #ifdef __cplusplus extern "C" { #endif +/// Read access to this VMA is allowed #define VMA_READ (1 << 0) +/// Write access to this VMA is allowed #define VMA_WRITE (1 << 1) +/// Instructions fetches in this VMA are allowed #define VMA_EXECUTE (1 << 2) +/// This VMA is cacheable #define VMA_CACHEABLE (1 << 3) -#define VMA_NOACCESS (1 << 4) +/// This VMA is not accessable +#define VMA_NO_ACCESS (1 << 4) +/// This VMA should be part of the userspace +#define VMA_USER (1 << 5) +/// A collection of flags used for the kernel heap (kmalloc) +#define VMA_HEAP (VMA_READ|VMA_WRITE|VMA_CACHEABLE) + +// boundaries for VAS allocation +extern const void kernel_end; +//#define VMA_KERN_MIN (((size_t) &kernel_end + PAGE_SIZE) & PAGE_MASK) +#define VMA_KERN_MAX KERNEL_SPACE +#define VMA_USER_MAX (1UL << 47) // TODO struct vma; -/** @brief VMA structure definition */ +/** @brief VMA structure definition + * + * Each item in this linked list marks a used part of the virtual address space. + * Its used by vm_alloc() to find holes between them. + */ typedef struct vma { /// Start address of the memory area size_t start; /// End address of the memory area size_t end; /// Type flags field - uint32_t type; + uint32_t flags; /// Pointer of next VMA element in the list struct vma* next; /// Pointer to previous VMA element in the list struct vma* prev; } vma_t; -/** @brief Add a new virtual memory region to the list of VMAs +/** @brief Add a new virtual memory area to the list of VMAs * - * @param task Pointer to the task_t structure of the task - * @param start Start address of the new region - * @param end End address of the new region - * @param type Type flags the new region shall have + * @param start Start address of the new area + * @param end End address of the new area + * @param flags Type flags the new area shall have * * @return * - 0 on success * - -EINVAL (-22) or -EINVAL (-12) on failure */ -int vma_add(struct task* task, size_t start, size_t end, uint32_t type); +int vma_add(size_t start, size_t end, uint32_t flags); -/** @brief Dump information about this task's VMAs into the terminal. +/** @brief Search for a free memory area * - * This will print out Start, end and flags for each VMA in the task's list + * @param size Size of requestes VMA in bytes + * @param flags + * @return Type flags the new area shall have + * - 0 on failure + * - the start address of a free area + */ +size_t vma_alloc(size_t size, uint32_t flags); + +/** @brief Free an allocated memory area * - * @param task The task's task_t structure + * @param start Start address of the area to be freed + * @param end End address of the to be freed * @return * - 0 on success * - -EINVAL (-22) on failure */ -int vma_dump(struct task* task); +int vma_free(size_t start, size_t end); + +/** @brief Free all virtual memory areas + * + * @return + * - 0 on success + */ +int drop_vma_list(); + +/** @brief Copy the VMA list of the current task to task + * + * @param task The task where the list should be copied to + * @return + * - 0 on success + */ +int copy_vma_list(struct task* task); + +/** @brief Dump information about this task's VMAs into the terminal. */ +void vma_dump(); #ifdef __cplusplus } diff --git a/mm/vma.c b/mm/vma.c index f1e64580..a4e7622b 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -1,5 +1,5 @@ /* - * Copyright 2011 Stefan Lankes, Chair for Operating Systems, + * Copyright 2011 Steffen Vogel, Chair for Operating Systems, * RWTH Aachen University * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,87 +17,322 @@ * This file is part of MetalSVM. */ +#include #include #include #include #include -#include #include /* - * add a new virtual memory region to the list of VMAs + * Kernel space VMA list and lock + * + * For bootstrapping we initialize the VMA list with one empty VMA + * (start == end) and expand this VMA by calls to vma_alloc() */ -int vma_add(task_t* task, size_t start, size_t end, uint32_t type) +static vma_t vma_boot = { VMA_KERN_MAX, VMA_KERN_MAX, VMA_HEAP }; +static vma_t* vma_list = &vma_boot; +static spinlock_t vma_lock = SPINLOCK_INIT; + +size_t vma_alloc(size_t size, uint32_t flags) { - vma_t* new_vma; - - if (BUILTIN_EXPECT(!task || start > end, 0)) + task_t* task = per_core(current_task); + spinlock_t* lock; + vma_t** list; + size_t ret = 0; + + kprintf("vma_alloc(0x%lx, 0x%x)\n", size, flags); + + size_t base, limit; // boundaries for search + size_t start, end; + + if (BUILTIN_EXPECT(!size, 0)) + return 0; + + if (flags & VMA_USER) { + base = VMA_KERN_MAX; + limit = VMA_USER_MAX; + list = &task->vma_list; + lock = &task->vma_lock; + } + else { + base = 0; + limit = VMA_KERN_MAX; + list = &vma_list; + lock = &vma_lock; + } + + spinlock_lock(lock); + + // "last" fit search for free memory area + vma_t* pred = *list; // vma before current gap + vma_t* succ = NULL; // vma after current gap + do { + start = (pred) ? pred->end : base; + end = (succ) ? succ->start : limit; + + if (end > start && end - start > size) + break; // we found a gap + + succ = pred; + pred = (pred) ? pred->prev : NULL; + } while (pred || succ); + + if (BUILTIN_EXPECT(end > limit || end < start || end - start < size, 0)) { + spinlock_unlock(lock); + return 0; + } + + // resize existing vma + if (succ && succ->flags == flags) { + succ->start -= size; + ret = succ->start; + } + // insert new vma + else { + vma_t* new = kmalloc(sizeof(vma_t)); + if (BUILTIN_EXPECT(!new, 0)) + return 0; + + new->start = end-size; + new->end = end; + new->flags = flags; + new->next = succ; + new->prev = pred; + + if (pred) + pred->next = new; + if (succ) + succ->prev = new; + else + *list = new; + + ret = new->start; + } + + spinlock_unlock(lock); + return ret; +} + +int vma_free(size_t start, size_t end) +{ + task_t* task = per_core(current_task); + spinlock_t* lock; + vma_t* vma; + vma_t** list; + + if (BUILTIN_EXPECT(start >= end, 0)) return -EINVAL; - new_vma = kmalloc(sizeof(new_vma)); - if (!new_vma) - return -ENOMEM; + if (end <= VMA_KERN_MAX) { + lock = &vma_lock; + list = &vma_list; + } + else if (start >= VMA_KERN_MAX) { + lock = &task->vma_lock; + list = &task->vma_list; + } + else + return -EINVAL; + + if (BUILTIN_EXPECT(!*list, 0)) + return -EINVAL; + + spinlock_lock(lock); + + // search vma + vma = *list; + while (vma) { + if (start >= vma->start && end <= vma->end) break; + vma = vma->prev; + } + + if (BUILTIN_EXPECT(!vma, 0)) { + spinlock_unlock(lock); + return -EINVAL; + } + + // free/resize vma + if (start == vma->start && end == vma->end) { + if (vma == *list) + *list = vma->next; // update list head + if (vma->prev) + vma->prev->next = vma->next; + if (vma->next) + vma->next->prev = vma->prev; + kfree(vma); + } + else if (start == vma->start) + vma->start = end; + else if (end == vma->end) + vma->end = start; + else { + vma_t* new = kmalloc(sizeof(vma_t)); + if (BUILTIN_EXPECT(!new, 0)) { + spinlock_unlock(lock); + return -ENOMEM; + } + + new->start = end; + vma->end = start; + + new->end = vma->end; + new->next = vma->next; + new->prev = vma; + vma->next = new; + } + + spinlock_unlock(lock); + + return 0; +} + +int vma_add(size_t start, size_t end, uint32_t flags) +{ + task_t* task = per_core(current_task); + spinlock_t* lock; + vma_t** list; + + kprintf("vma_add(0x%lx, 0x%lx, 0x%x)\n", start, end, flags); + + if (BUILTIN_EXPECT(start >= end, 0)) + return -EINVAL; + + if (flags & VMA_USER) { + list = &task->vma_list; + lock = &task->vma_lock; + + // check if address is in userspace + if (BUILTIN_EXPECT(start < VMA_KERN_MAX, 0)) + return -EINVAL; + } + else { + list = &vma_list; + lock = &vma_lock; + + // check if address is in kernelspace + if (BUILTIN_EXPECT(end > VMA_KERN_MAX, 0)) + return -EINVAL; + } + + spinlock_lock(lock); + + // search gap + vma_t* pred = *list; + vma_t* succ = NULL; + while (pred) { + if ((!pred || pred->end <= start) && + (!succ || succ->start >= end)) + break; + + succ = pred; + pred = pred->prev; + } + + // resize existing vma + if (pred && pred->end == start && pred->flags == flags) + pred->end = end; + else if (succ && succ->start == end && succ->flags == flags) + succ->start = start; + // insert new vma + else { + vma_t* new = kmalloc(sizeof(vma_t)); + if (BUILTIN_EXPECT(!new, 0)) + return 0; + + new->start = start; + new->end = end; + new->flags = flags; + new->next = succ; + new->prev = pred; + + if (pred) + pred->next = new; + if (succ) + succ->prev = new; + else + *list = new; + } + + spinlock_unlock(lock); + + return 0; +} + +int copy_vma_list(task_t* task) +{ + task_t* parent_task = per_core(current_task); + + spinlock_init(&task->vma_lock); + spinlock_lock(&parent_task->vma_lock); + spinlock_lock(&task->vma_lock); + + int ret = 0; + vma_t* last = NULL; + vma_t* parent = parent_task->vma_list; + + while (parent) { + vma_t *new = kmalloc(sizeof(vma_t)); + if (BUILTIN_EXPECT(!new, 0)) { + ret = -ENOMEM; + goto out; + } + + new->start = parent->start; + new->end = parent->end; + new->flags = parent->flags; + new->prev = last; + + if (last) + last->next = new; + else + task->vma_list = new; + + last = new; + parent = parent->next; + } + +out: + spinlock_unlock(&task->vma_lock); + spinlock_unlock(&parent_task->vma_lock); + return ret; +} + +int drop_vma_list() +{ + task_t* task = per_core(current_task); spinlock_lock(&task->vma_lock); - new_vma->start = start; - new_vma->end = end; - new_vma->type = type; - - if (!(task->vma_list)) { - new_vma->next = new_vma->prev = NULL; - task->vma_list = new_vma; - } else { - vma_t* tmp = task->vma_list; - - while (tmp->next && tmp->start < start) - tmp = tmp->next; - - new_vma->next = tmp->next; - new_vma->prev = tmp; - tmp->next = new_vma; - } + while(task->vma_list) + pfree((void*) task->vma_list->start, task->vma_list->end - task->vma_list->start); spinlock_unlock(&task->vma_lock); return 0; } -int vma_dump(task_t* task) +void vma_dump() { - vma_t* tmp; - - if (BUILTIN_EXPECT(!task, 0)) - return -EINVAL; - - spinlock_lock(&task->vma_lock); - - int cnt = 0; - tmp = task->vma_list; - while (tmp) { - kprintf("#%d\t%8x - %8x: size=%6x, flags=", cnt, tmp->start, tmp->end, tmp->end - tmp->start); - - if (tmp->type & VMA_READ) - kputs("r"); - else - kputs("-"); - - if (tmp->type & VMA_WRITE) - kputs("w"); - else - kputs("-"); - - if (tmp->type & VMA_EXECUTE) - kputs("x"); - else - kputs("-"); - kputs("\n"); - - tmp = tmp->next; - cnt++; + void print_vma(vma_t *vma) { + while (vma) { + kprintf("0x%lx - 0x%lx: size=%x, flags=%c%c%c\n", vma->start, vma->end, vma->end - vma->start, + (vma->flags & VMA_READ) ? 'r' : '-', + (vma->flags & VMA_WRITE) ? 'w' : '-', + (vma->flags & VMA_EXECUTE) ? 'x' : '-'); + vma = vma->prev; + } } - spinlock_unlock(&task->vma_lock); + task_t* task = per_core(current_task); - return 0; + kputs("Kernelspace VMAs:\n"); + spinlock_lock(&vma_lock); + print_vma(vma_list); + spinlock_unlock(&vma_lock); + + kputs("Userspace VMAs:\n"); + spinlock_lock(&task->vma_lock); + print_vma(task->vma_list); + spinlock_unlock(&task->vma_lock); } From 79c4f2703e42f6f80990cfadd5f9db4a25a63c99 Mon Sep 17 00:00:00 2001 From: Steffen Vogel Date: Wed, 20 Nov 2013 11:27:49 +0100 Subject: [PATCH 06/21] setup kernelspace VMA list --- mm/memory.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 56fb7ae2..75efa8c8 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -273,18 +273,44 @@ int mmu_init(void) return ret; } + // add kernel to VMA list + vma_add((size_t) &kernel_start & PAGE_MASK, + PAGE_ALIGN((size_t) &kernel_end), + VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE); + + // add LAPIC tp VMA list + vma_add((size_t) &kernel_start - PAGE_SIZE, + (size_t) &kernel_start, + VMA_READ|VMA_WRITE); + +#if MAX_CORES > 1 + // reserve page for SMP boot code + vma_add(SMP_SETUP_ADDR & PAGE_MASK, + PAGE_ALIGN(SMP_SETUP_ADDR + PAGE_SIZE), + VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE); +#endif + #ifdef CONFIG_MULTIBOOT /* * Modules like the init ram disk are already loaded. * Therefore, we set these pages as used. */ if (mb_info) { + vma_add((size_t) mb_info & PAGE_MASK, + PAGE_ALIGN((size_t) mb_info + sizeof(multiboot_info_t)), + VMA_READ|VMA_CACHEABLE); if (mb_info->flags & MULTIBOOT_INFO_MODS) { multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr); + vma_add((size_t) mb_info->mods_addr & PAGE_MASK, + PAGE_ALIGN((size_t) mb_info->mods_addr + mb_info->mods_count*sizeof(multiboot_module_t)), + VMA_READ|VMA_CACHEABLE); for(i=0; imods_count; i++) { + vma_add(PAGE_ALIGN(mmodule[i].mod_start), + PAGE_ALIGN(mmodule[i].mod_end), + VMA_READ|VMA_WRITE|VMA_CACHEABLE); for(addr=mmodule[i].mod_start; addr> PAGE_SHIFT); From 707d7132c8932ea3401c7b467191668d7ddb6ea3 Mon Sep 17 00:00:00 2001 From: Steffen Vogel Date: Wed, 20 Nov 2013 11:30:04 +0100 Subject: [PATCH 07/21] added test for the new VMA implementation --- apps/paging.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/apps/paging.c b/apps/paging.c index 6ddd2adc..03c49a2b 100644 --- a/apps/paging.c +++ b/apps/paging.c @@ -23,11 +23,13 @@ #include #include #include +#include #include #include #define PAGE_COUNT 10 +#define SIZE (PAGE_COUNT*PAGE_SIZE) #define VIRT_FROM_ADDR 0x100000000000 #define VIRT_TO_ADDR 0x200000000000 @@ -168,11 +170,55 @@ static void paging(void) //sleep(3); } +/** @brief Test of the VMA allocator */ +static void vma(void) +{ + int ret; + + // vma_alloc + size_t a1 = vma_alloc(SIZE, VMA_HEAP); + test(a1, "vma_alloc(0x%x, 0x%x) = 0x%lx", SIZE, VMA_HEAP, a1); + vma_dump(); + + size_t a2 = vma_alloc(SIZE, VMA_HEAP|VMA_USER); + test(a2 != 0, "vma_alloc(0x%x, 0x%x) = 0x%lx", SIZE, VMA_HEAP|VMA_USER, a2); + vma_dump(); + + // vma_add + ret = vma_add(VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE, VMA_HEAP|VMA_USER); + test(ret >= 0, "vma_add(0x%lx, 0x%lx, 0x%x) = %u", VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE, VMA_HEAP|VMA_USER, ret); + vma_dump(); + + ret = vma_add(VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE, VMA_HEAP|VMA_USER); + test(ret >= 0, "vma_add(0x%lx, 0x%lx, 0x%x) = %u", VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE, VMA_HEAP|VMA_USER, ret); + vma_dump(); + + ret = vma_add(VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR, VMA_HEAP|VMA_USER); + test(ret >= 0, "vma_add(0x%lx, 0x%lx, 0x%x) = %u", VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR, VMA_HEAP|VMA_USER, ret); + vma_dump(); + + // vma_free + ret = vma_free(VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR); + test(ret >= 0, "vma_free(0x%lx, 0x%lx) = %u", VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR, ret); + vma_dump(); + + ret = vma_free(VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE); + test(ret >= 0, "vma_free(0x%lx, 0x%lx) = %u", VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE, ret); + vma_dump(); + + ret = vma_free(VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE); + test(ret >= 0, "vma_free(0x%lx, 0x%lx) = %u", VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE, ret); + vma_dump(); +} + /** @brief This is a simple procedure to test memory management subsystem */ int memory(void* arg) { kprintf("======== PAGING: test started...\n"); paging(); + kprintf("======== VMA: test started...\n"); + vma(); + kprintf("======== All tests finished successfull...\n"); From aa1730919ee3a82cf01cf51b817ac5c0e4da1a32 Mon Sep 17 00:00:00 2001 From: Steffen Vogel Date: Wed, 20 Nov 2013 12:03:24 +0100 Subject: [PATCH 08/21] standardized comment format and code cleanup --- arch/x86/mm/page32.c | 22 +++++++++------------- arch/x86/mm/page64.c | 21 +++++++++------------ fs/initrd.c | 8 ++++---- include/metalsvm/page.h | 5 +---- include/metalsvm/stddef.h | 6 +++--- include/metalsvm/stdlib.h | 2 +- include/metalsvm/tasks.h | 8 ++------ kernel/init.c | 2 +- kernel/main.c | 3 ++- kernel/syscall.c | 2 +- kernel/tasks.c | 20 ++++++++++---------- mm/memory.c | 5 ++--- 12 files changed, 45 insertions(+), 59 deletions(-) diff --git a/arch/x86/mm/page32.c b/arch/x86/mm/page32.c index 57a42ae8..391437ad 100644 --- a/arch/x86/mm/page32.c +++ b/arch/x86/mm/page32.c @@ -150,6 +150,7 @@ int create_page_map(task_t* task, int copy) } memset(pgt, 0x00, sizeof(page_map_t)); + // copy kernel tables spinlock_lock(&kslock); for(i=0; i> PAGE_SHIFT; if ((size_t)&kernel_end & (PAGE_SIZE-1)) @@ -723,7 +724,7 @@ int arch_paging_init(void) map_region((size_t)&kernel_start, (size_t)&kernel_start, npages, MAP_KERNEL_SPACE); #if MAX_CORES > 1 - // Reserve page for smp boot code + // reserve page for smp boot code if (!map_region(SMP_SETUP_ADDR, SMP_SETUP_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE)) { kputs("could not reserve page for smp boot code\n"); return -ENOMEM; @@ -741,9 +742,7 @@ int arch_paging_init(void) map_region((size_t) mb_info & PAGE_MASK, (size_t) mb_info & PAGE_MASK, 1, MAP_KERNEL_SPACE); #if 0 - /* - * Map reserved memory regions into the kernel space - */ + // map reserved memory regions into the kernel space if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) { multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr; multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length); @@ -818,10 +817,7 @@ int arch_paging_init(void) bootinfo->addr = viraddr; #endif - /* - * we turned on paging - * => now, we are able to register our task - */ + // we turned on paging => now, we are able to register our task register_task(); // APIC registers into the kernel address space diff --git a/arch/x86/mm/page64.c b/arch/x86/mm/page64.c index 5da16793..0ac422cf 100644 --- a/arch/x86/mm/page64.c +++ b/arch/x86/mm/page64.c @@ -515,7 +515,7 @@ int unmap_region(size_t viraddr, uint32_t npages) index_pgd = (viraddr >> 21) & 0x1FF; index_pgt = (viraddr >> 12) & 0x1FF; - // Currently, we allocate pages only in kernel space. + // currently, we allocate pages only in kernel space. // => physical address of the page table is identical of the virtual address pdpt = (page_map_t*) (task->page_map->entries[index_pml4] & PAGE_MASK); if (!pdpt) { @@ -644,8 +644,10 @@ static void pagefault_handler(struct state *s) kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr); put_page(phyaddr); } - // handle missing paging structures for userspace - // all kernel space paging structures have been initialized in entry64.asm + /* + * handle missing paging structures for userspace + * all kernel space paging structures have been initialized in entry64.asm + */ else if (viraddr >= PAGE_PGT) { kprintf("map_region: missing paging structure at: 0x%lx (%s)\n", viraddr, map_to_lvlname(viraddr)); @@ -683,7 +685,7 @@ int arch_paging_init(void) { uint32_t i, npages; - // uninstall default handler and install our own + // replace default pagefault handler irq_uninstall_handler(14); irq_install_handler(14, pagefault_handler); @@ -693,7 +695,7 @@ int arch_paging_init(void) */ #if MAX_CORES > 1 - // Reserve page for smp boot code + // reserve page for smp boot code if (!map_region(SMP_SETUP_ADDR, SMP_SETUP_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE)) { kputs("could not reserve page for smp boot code\n"); return -ENOMEM; @@ -702,9 +704,7 @@ int arch_paging_init(void) #ifdef CONFIG_MULTIBOOT #if 0 - /* - * Map reserved memory regions into the kernel space - */ + // map reserved memory regions into the kernel space if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) { multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr; multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length); @@ -744,10 +744,7 @@ int arch_paging_init(void) } #endif - /* - * we turned on paging - * => now, we are able to register our task - */ + // we turned on paging => now, we are able to register our task register_task(); // APIC registers into the kernel address space diff --git a/fs/initrd.c b/fs/initrd.c index bf92ff66..fe1cadca 100644 --- a/fs/initrd.c +++ b/fs/initrd.c @@ -253,7 +253,7 @@ static int initrd_open(fildes_t* file, const char* name) /* opendir was called: */ if (name[0] == '\0') return 0; - + /* open file was called: */ if (!(file->flags & O_CREAT)) return -ENOENT; @@ -264,11 +264,11 @@ static int initrd_open(fildes_t* file, const char* name) vfs_node_t* new_node = kmalloc(sizeof(vfs_node_t)); if (BUILTIN_EXPECT(!new_node, 0)) return -EINVAL; - + blist = &file->node->block_list; dir_block_t* dir_block; dirent_t* dirent; - + memset(new_node, 0x00, sizeof(vfs_node_t)); new_node->type = FS_FILE; new_node->read = &initrd_read; @@ -286,7 +286,7 @@ static int initrd_open(fildes_t* file, const char* name) if (!dirent->vfs_node) { dirent->vfs_node = new_node; strncpy(dirent->name, (char*) name, MAX_FNAME); - goto exit_create_file; // there might be a better Solution *************** + goto exit_create_file; // TODO: there might be a better Solution } } } diff --git a/include/metalsvm/page.h b/include/metalsvm/page.h index 04ef88ce..69ff33a1 100644 --- a/include/metalsvm/page.h +++ b/include/metalsvm/page.h @@ -29,10 +29,7 @@ #include #include -/** - * Sets up the environment, page directories etc and - * enables paging. - */ +/** @brief Sets up the environment, page directories etc and enables paging. */ static inline int paging_init(void) { return arch_paging_init(); } #endif diff --git a/include/metalsvm/stddef.h b/include/metalsvm/stddef.h index 725365e2..6dc522a0 100644 --- a/include/metalsvm/stddef.h +++ b/include/metalsvm/stddef.h @@ -28,7 +28,7 @@ extern "C" { #endif -#define NULL ((void*) 0) +#define NULL ((void*) 0) typedef unsigned int tid_t; @@ -62,10 +62,10 @@ typedef unsigned int tid_t; irq_nested_enable(flags);\ return ret; \ } -#define CORE_ID smp_id() + #define CORE_ID smp_id() #endif -/* needed to find the task, which is currently running on this core */ +// needed to find the task, which is currently running on this core struct task; DECLARE_PER_CORE(struct task*, current_task); diff --git a/include/metalsvm/stdlib.h b/include/metalsvm/stdlib.h index c8a2e8d1..1796fee0 100644 --- a/include/metalsvm/stdlib.h +++ b/include/metalsvm/stdlib.h @@ -113,7 +113,7 @@ unsigned long strtoul(const char* nptr, char** endptr, int base); */ static inline int atoi(const char *str) { - return (int)strtol(str, (char **)NULL, 10); + return (int)strtol(str, (char **) NULL, 10); } #ifdef __cplusplus diff --git a/include/metalsvm/tasks.h b/include/metalsvm/tasks.h index d1e2f973..21a38b5f 100644 --- a/include/metalsvm/tasks.h +++ b/include/metalsvm/tasks.h @@ -147,9 +147,7 @@ tid_t wait(int32_t* result); */ void update_load(void); -/** @brief Print the current cpu load - * - */ +/** @brief Print the current cpu load */ void dump_load(void); #if MAX_CORES > 1 @@ -201,9 +199,7 @@ int block_current_task(void); */ int set_timer(uint64_t deadline); -/** @brief check is a timer is expired - * - */ +/** @brief check is a timer is expired */ void check_timers(void); /** @brief Abort current task */ diff --git a/kernel/init.c b/kernel/init.c index c3f6d0e9..67b84784 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -63,7 +63,7 @@ extern const void bss_end; int lowlevel_init(void) { // initialize .bss section - memset((void*)&bss_start, 0x00, ((size_t) &bss_end - (size_t) &bss_start)); + memset(&bss_start, 0x00, (char*) &bss_end - (char*) &bss_start); koutput_init(); diff --git a/kernel/main.c b/kernel/main.c index 59355390..c0104837 100644 --- a/kernel/main.c +++ b/kernel/main.c @@ -74,6 +74,7 @@ int main(void) kprintf("This is MetalSVM %s Build %u, %u\n", METALSVM_VERSION, &__BUILD_DATE, &__BUILD_TIME); popbg(); + system_init(); irq_init(); timer_init(); @@ -86,7 +87,7 @@ int main(void) icc_init(); svm_init(); #endif - initrd_init(); + initrd_init(); irq_enable(); diff --git a/kernel/syscall.c b/kernel/syscall.c index 1d7eb0d2..bd086047 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -109,7 +109,7 @@ static int sys_open(const char* name, int flags, int mode) curr_task->fildes_table[fd] = NULL; return check; } - + return fd; } diff --git a/kernel/tasks.c b/kernel/tasks.c index f3c1cbf0..40594421 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -78,6 +78,7 @@ DEFINE_PER_CORE(task_t*, current_task, task_table+0); extern const void boot_stack; /** @brief helper function for the assembly code to determine the current task + * * @return Pointer to the task_t structure of current task */ task_t* get_current_task(void) { @@ -193,8 +194,7 @@ static void wakeup_blocked_tasks(int result) spinlock_irqsave_unlock(&table_lock); } -/** @brief A procedure to be called by - * procedures which are called by exiting tasks. */ +/** @brief A procedure to be called by procedures which are called by exiting tasks. */ static void NORETURN do_exit(int arg) { vma_t* tmp; task_t* curr_task = per_core(current_task); @@ -204,14 +204,14 @@ static void NORETURN do_exit(int arg) { for (fd = 0; fd < NR_OPEN; fd++) { if(curr_task->fildes_table[fd] != NULL) { /* - * delete a descriptor from the per-process object - * reference table. If this is not the last reference to the underlying - * object, the object will be ignored. - */ + * Delete a descriptor from the per-process object + * reference table. If this is not the last reference to the underlying + * object, the object will be ignored. + */ if (curr_task->fildes_table[fd]->count == 1) { - /* try to close the file */ + // try to close the file status = close_fs(curr_task->fildes_table[fd]); - /* close command failed -> return check = errno */ + // close command failed -> return check = errno if (BUILTIN_EXPECT(status < 0, 0)) kprintf("Task %u was not able to close file descriptor %i. close_fs returned %d", curr_task->id, fd, -status); kfree(curr_task->fildes_table[fd], sizeof(fildes_t)); @@ -247,7 +247,7 @@ static void NORETURN do_exit(int arg) { #if 0 if (atomic_int32_read(&curr_task->user_usage)) kprintf("Memory leak! Task %d did not release %d pages\n", - curr_task->id, atomic_int32_read(&curr_task->user_usage)); + curr_task->id, atomic_int32_read(&curr_task->user_usage)); #endif curr_task->status = TASK_FINISHED; @@ -412,6 +412,7 @@ int sys_fork(void) spinlock_init(&task_table[i].vma_lock); + // init fildes_table // copy VMA list child = &task_table[i].vma_list; parent = parent_task->vma_list; @@ -434,7 +435,6 @@ int sys_fork(void) } - /* init fildes_table */ task_table[i].fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN); memcpy(task_table[i].fildes_table, parent_task->fildes_table, sizeof(filp_t)*NR_OPEN); for (fd_i = 0; fd_i < NR_OPEN; fd_i++) diff --git a/mm/memory.c b/mm/memory.c index 56fb7ae2..d0008e6b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -248,7 +248,6 @@ int mmu_init(void) page_set_mark((size_t)bootinfo >> PAGE_SHIFT); atomic_int32_inc(&total_allocated_pages); atomic_int32_dec(&total_available_pages); - #else #error Currently, MetalSVM supports only the Multiboot specification or the RockCreek processor! #endif @@ -318,8 +317,8 @@ int mmu_init(void) * The init ram disk are already loaded. * Therefore, we set these pages as used. */ - for(addr=bootinfo->addr; addr < bootinfo->addr+bootinfo->size; addr+=PAGE_SIZE) { - // This area is already mapped, so we need to virt_to_phys() these addresses. + for(addr=bootinfo->addr; addraddr+bootinfo->size; addr+=PAGE_SIZE) { + // this area is already mapped, so we need to virt_to_phys() these addresses. page_set_mark(virt_to_phys(addr) >> PAGE_SHIFT); atomic_int32_inc(&total_allocated_pages); atomic_int32_dec(&total_available_pages); From af5fa15d8dda2999397696c24dab5eb902d4c114 Mon Sep 17 00:00:00 2001 From: Steffen Vogel Date: Wed, 20 Nov 2013 12:04:55 +0100 Subject: [PATCH 09/21] fixed possible wrap-around in tlb_flush functions --- arch/x86/include/asm/processor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 8e97d62a..398875ce 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -273,7 +273,7 @@ int ipi_tlb_flush(void); /** @brief Flush a specific page entry in TLB * @param addr The (virtual) address of the page to flush */ -static inline void tlb_flush_one_page(uint32_t addr) +static inline void tlb_flush_one_page(size_t addr) { asm volatile("invlpg (%0)" : : "r"(addr) : "memory"); #if MAX_CORES > 1 @@ -293,7 +293,7 @@ static inline void tlb_flush_one_page(uint32_t addr) */ static inline void tlb_flush(void) { - uint32_t val = read_cr3(); + size_t val = read_cr3(); if (val) write_cr3(val); From 76e52aa473c055ffb355f68dee5e42fdbd93707a Mon Sep 17 00:00:00 2001 From: Steffen Vogel Date: Wed, 20 Nov 2013 12:06:53 +0100 Subject: [PATCH 10/21] time is precious... --- kernel/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/main.c b/kernel/main.c index c0104837..2a511343 100644 --- a/kernel/main.c +++ b/kernel/main.c @@ -103,7 +103,7 @@ int main(void) disable_timer_irq(); #endif - sleep(5); + sleep(2); create_kernel_task(&id, initd, NULL, NORMAL_PRIO); kprintf("Create initd with id %u\n", id); reschedule(); From 71f55f0a8947e33b98b029753258f943cf129e4a Mon Sep 17 00:00:00 2001 From: Steffen Vogel Date: Wed, 20 Nov 2013 13:19:58 +0100 Subject: [PATCH 11/21] ported userspace tasks to new VMA implementation (untested!) --- kernel/tasks.c | 72 ++++++++++---------------------------------------- 1 file changed, 14 insertions(+), 58 deletions(-) diff --git a/kernel/tasks.c b/kernel/tasks.c index f3c1cbf0..6c04bea2 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -196,7 +196,6 @@ static void wakeup_blocked_tasks(int result) /** @brief A procedure to be called by * procedures which are called by exiting tasks. */ static void NORETURN do_exit(int arg) { - vma_t* tmp; task_t* curr_task = per_core(current_task); uint32_t flags, core_id, fd, status; @@ -230,18 +229,7 @@ static void NORETURN do_exit(int arg) { wakeup_blocked_tasks(arg); - //vma_dump(curr_task); - spinlock_lock(&curr_task->vma_lock); - - // remove memory regions - while((tmp = curr_task->vma_list) != NULL) { - kfree((void*) tmp->start, tmp->end - tmp->start + 1); - curr_task->vma_list = tmp->next; - kfree((void*) tmp, sizeof(vma_t)); - } - - spinlock_unlock(&curr_task->vma_lock); - + drop_vma_list(); // kfree virtual memory areas and the vma_list drop_page_map(); // delete page directory and its page tables #if 0 @@ -262,9 +250,7 @@ static void NORETURN do_exit(int arg) { reschedule(); kprintf("Kernel panic: scheduler on core %d found no valid task\n", CORE_ID); - while(1) { - HALT; - } + while(1) HALT; } /** @brief A procedure to be called by kernel tasks */ @@ -330,7 +316,7 @@ static int create_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio, uin ret = create_page_map(task_table+i, 0); if (ret < 0) { ret = -ENOMEM; - goto create_task_out; + goto out; } task_table[i].id = i; @@ -376,7 +362,7 @@ static int create_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio, uin } } -create_task_out: +out: spinlock_irqsave_unlock(&table_lock); return ret; @@ -387,11 +373,7 @@ int sys_fork(void) int ret = -ENOMEM; unsigned int i, core_id, fd_i; task_t* parent_task = per_core(current_task); - vma_t** child; - vma_t* parent; - vma_t* tmp; - spinlock_lock(&parent_task->vma_lock); spinlock_irqsave_lock(&table_lock); core_id = CORE_ID; @@ -403,34 +385,19 @@ int sys_fork(void) ret = create_page_map(task_table+i, 1); if (ret < 0) { ret = -ENOMEM; - goto create_task_out; + goto out; } + ret = copy_vma_list(child_task); + if (BUILTIN_EXPECT(!ret, 0)) { + ret = -ENOMEM; + goto out; + } task_table[i].id = i; task_table[i].last_stack_pointer = NULL; task_table[i].stack = create_stack(); - spinlock_init(&task_table[i].vma_lock); - // copy VMA list - child = &task_table[i].vma_list; - parent = parent_task->vma_list; - tmp = NULL; - - while(parent) { - *child = (vma_t*) kmalloc(sizeof(vma_t)); - if (BUILTIN_EXPECT(!child, 0)) - break; - - (*child)->start = parent->start; - (*child)->end = parent->end; - (*child)->type = parent->type; - (*child)->prev = tmp; - (*child)->next = NULL; - - parent = parent->next; - tmp = *child; - child = &((*child)->next); } @@ -487,9 +454,8 @@ int sys_fork(void) } } -create_task_out: +out: spinlock_irqsave_unlock(&table_lock); - spinlock_unlock(&parent_task->vma_lock); return ret; } @@ -679,7 +645,7 @@ static int load_task(load_args_t* largs) flags |= VMA_WRITE; if (prog_header.flags & PF_X) flags |= VMA_EXECUTE; - vma_add(curr_task, prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE-1, flags); + vma_add(prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE-1, flags); if (!(prog_header.flags & PF_W)) change_page_permissions(prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE-1, flags); @@ -708,7 +674,7 @@ static int load_task(load_args_t* largs) flags |= VMA_WRITE; if (prog_header.flags & PF_X) flags |= VMA_EXECUTE; - vma_add(curr_task, stack, stack+npages*PAGE_SIZE-1, flags); + vma_add(stack, stack+npages*PAGE_SIZE-1, flags); break; } } @@ -871,13 +837,11 @@ int create_user_task_on_core(tid_t* id, const char* fname, char** argv, uint32_t int sys_execve(const char* fname, char** argv, char** env) { vfs_node_t* node; - vma_t* tmp; size_t i, buffer_size = 0; load_args_t* load_args = NULL; char *dest, *src; int ret, argc = 0; int envc = 0; - task_t* curr_task = per_core(current_task); node = findnode_fs((char*) fname); if (!node || !(node->type == FS_FILE)) @@ -920,16 +884,8 @@ int sys_execve(const char* fname, char** argv, char** env) while ((*dest++ = *src++) != 0); } - spinlock_lock(&curr_task->vma_lock); - // remove old program - while((tmp = curr_task->vma_list) != NULL) { - kfree((void*) tmp->start, tmp->end - tmp->start + 1); - curr_task->vma_list = tmp->next; - kfree((void*) tmp, sizeof(vma_t)); - } - - spinlock_unlock(&curr_task->vma_lock); + drop_vma_list(); /* * we use a trap gate to enter the kernel From de33962e9d3c0026514fb90ef6e7ad9efbcf4b3b Mon Sep 17 00:00:00 2001 From: Steffen Vogel Date: Wed, 20 Nov 2013 13:22:09 +0100 Subject: [PATCH 12/21] removed old vm_alloc()/vm_free() which have been replaced by vma_alloc()/vma_free() --- arch/x86/mm/page64.c | 149 ------------------------------------------- 1 file changed, 149 deletions(-) diff --git a/arch/x86/mm/page64.c b/arch/x86/mm/page64.c index 5da16793..dc22b5fe 100644 --- a/arch/x86/mm/page64.c +++ b/arch/x86/mm/page64.c @@ -405,91 +405,6 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags) return -EINVAL; } -/* - * Use the first fit algorithm to find a valid address range - * - * TODO: O(n) => bad performance, we need a better approach - */ -size_t vm_alloc(uint32_t npages, uint32_t flags) -{ - task_t* task = per_core(current_task); - size_t viraddr, i, j, ret = 0; - size_t start, end; - page_map_t* pdpt, * pgd, * pgt; - uint16_t index_pml4, index_pdpt; - uint16_t index_pgd, index_pgt; - - if (BUILTIN_EXPECT(!task || !task->page_map || !paging_enabled, 0)) - return 0; - - if (flags & MAP_KERNEL_SPACE) { - start = (((size_t) &kernel_end) + 10*PAGE_SIZE) & PAGE_MASK; - end = (KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK; - } else { - start = KERNEL_SPACE & PAGE_MASK; - end = PAGE_MASK; - } - - if (BUILTIN_EXPECT(!npages, 0)) - return 0; - - if (flags & MAP_KERNEL_SPACE) - spinlock_lock(&kslock); - else - spinlock_irqsave_lock(&task->page_lock); - - viraddr = i = start; - j = 0; - do { - index_pml4 = (viraddr >> 39) & 0x1FF; - index_pdpt = (viraddr >> 30) & 0x1FF; - index_pgd = (viraddr >> 21) & 0x1FF; - index_pgt = (viraddr >> 12) & 0x1FF; - - // Currently, we allocate pages only in kernel space. - // => physical address of the page table is identical of the virtual address - pdpt = (page_map_t*) (task->page_map->entries[index_pml4] & PAGE_MASK); - if (!pdpt) { - i += (size_t)PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES*PAGE_SIZE; - j += PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES; - continue; - } - - pgd = (page_map_t*) (pdpt->entries[index_pdpt] & PAGE_MASK); - if (!pgd) { - i += PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES*PAGE_SIZE; - j += PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES; - continue; - } - - pgt = (page_map_t*) (pgd->entries[index_pgd] & PAGE_MASK); - if (!pgt) { - i += PAGE_MAP_ENTRIES*PAGE_SIZE; - j += PAGE_MAP_ENTRIES; - continue; - } - - if (!(pgt->entries[index_pgt])) { - i += PAGE_SIZE; - j++; - } else { - // restart search - j = 0; - viraddr = i + PAGE_SIZE; - i = i + PAGE_SIZE; - } - } while((j < npages) && (i<=end)); - - if ((j >= npages) && (viraddr < end)) - ret = viraddr; - - if (flags & MAP_KERNEL_SPACE) - spinlock_unlock(&kslock); - else - spinlock_irqsave_unlock(&task->page_lock); - - return ret; -} int unmap_region(size_t viraddr, uint32_t npages) { @@ -558,70 +473,6 @@ int unmap_region(size_t viraddr, uint32_t npages) return 0; } -int vm_free(size_t viraddr, uint32_t npages) -{ - task_t* task = per_core(current_task); - page_map_t* pdpt, * pgd, * pgt; - size_t i; - uint16_t index_pml4, index_pdpt; - uint16_t index_pgd, index_pgt; - - if (BUILTIN_EXPECT(!task || !task->page_map || !paging_enabled, 0)) - return -EINVAL; - - if (viraddr <= KERNEL_SPACE) - spinlock_lock(&kslock); - else - spinlock_irqsave_lock(&task->page_lock); - - i = 0; - while(i> 39) & 0x1FF; - index_pdpt = (viraddr >> 30) & 0x1FF; - index_pgd = (viraddr >> 21) & 0x1FF; - index_pgt = (viraddr >> 12) & 0x1FF; - - // Currently, we allocate pages only in kernel space. - // => physical address of the page table is identical of the virtual address - pdpt = (page_map_t*) (task->page_map->entries[index_pml4] & PAGE_MASK); - if (!pdpt) { - viraddr += (size_t) PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES*PAGE_SIZE; - i += PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES; - continue; - } - - pgd = (page_map_t*) (pdpt->entries[index_pdpt] & PAGE_MASK); - if (!pgd) { - viraddr += PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES*PAGE_SIZE; - i += PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES; - continue; - } - - pgt = (page_map_t*) (pgd->entries[index_pgd] & PAGE_MASK); - if (!pgt) { - viraddr += PAGE_MAP_ENTRIES*PAGE_SIZE; - i += PAGE_MAP_ENTRIES; - continue; - } - - if (pgt->entries[index_pgt]) - pgt->entries[index_pgt] = 0; - - viraddr +=PAGE_SIZE; - i++; - - tlb_flush_one_page(viraddr); - } - - if (viraddr <= KERNEL_SPACE) - spinlock_unlock(&kslock); - else - spinlock_irqsave_unlock(&task->page_lock); - - return 0; -} - static void pagefault_handler(struct state *s) { task_t* task = per_core(current_task); From acc6e2124e1c2a2d1d1eb212ed0118789403cab8 Mon Sep 17 00:00:00 2001 From: Steffen Vogel Date: Wed, 20 Nov 2013 13:51:03 +0100 Subject: [PATCH 13/21] disable VGA output and kill remaining processes --- Makefile.example | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Makefile.example b/Makefile.example index 148293dd..1b89d152 100644 --- a/Makefile.example +++ b/Makefile.example @@ -128,17 +128,20 @@ qemu: newlib tools $(NAME).elf $(QEMU) -monitor stdio -serial tcp::12346,server,nowait -smp $(SMP) -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -kernel metalsvm.elf -initrd tools/initrd.img qemudbg: newlib tools $(NAME).elf - $(QEMU) -s -S -monitor stdio -serial tcp::12346,server -smp $(SMP) -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -kernel metalsvm.elf -initrd tools/initrd.img + $(QEMU) -s -S -nographic -monitor stdio -serial tcp::12346,server -smp $(SMP) -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -kernel metalsvm.elf -initrd tools/initrd.img gdb: $(NAME).elf $(GDB) -q -x script.gdb debug: newlib tools $(NAME).elf + killall $(QEMU) || true + killall $(GDB) || true + sleep 1 gnome-terminal --working-directory=$(TOPDIR) \ - --tab --title=Debug --command="bash -c 'sleep 1 && telnet localhost 12346'" \ --tab --title=Shell --command="bash -c 'sleep 1 && telnet localhost 12345'" \ --tab --title=QEmu --command="make qemudbg" \ - --tab --title=GDB --command="make gdb" + --tab --title=GDB --command="make gdb" \ + --tab --title=Debug --command="bash -c 'sleep 1 && telnet localhost 12346'" clean: $Q$(RM) $(NAME).elf $(NAME).sym *~ From 1e98d0e4104a0aa24c6f2eb3d5e484879e285379 Mon Sep 17 00:00:00 2001 From: Steffen Vogel Date: Wed, 20 Nov 2013 11:38:24 +0100 Subject: [PATCH 14/21] added first implementation of a buddy system kmalloc() allocator --- include/metalsvm/malloc.h | 72 ++++++++++++++ mm/malloc.c | 203 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 275 insertions(+) create mode 100644 include/metalsvm/malloc.h create mode 100644 mm/malloc.c diff --git a/include/metalsvm/malloc.h b/include/metalsvm/malloc.h new file mode 100644 index 00000000..572f3411 --- /dev/null +++ b/include/metalsvm/malloc.h @@ -0,0 +1,72 @@ +/* + * Copyright 2010 Steffen Vogel, Chair for Operating Systems, + * RWTH Aachen University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file is part of MetalSVM. + */ + +#ifndef __MALLOC_H__ +#define __MALLOC_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/// Binary exponent of maximal size for kmalloc() +#define BUDDY_MAX 32 // 4 GB +/// Binary exponent of minimal buddy size +#define BUDDY_MIN 4 // 16 Byte >= sizeof(buddy_prefix_t) +/// Binary exponent of the size which we allocate at least in one call to buddy_fill(); +#define BUDDY_ALLOC 17 // 128 KByte >= PAGE_SHIFT, TODO: add Huge Page support? + +#define BUDDY_LISTS (BUDDY_MAX-BUDDY_MIN+1) +#define BUDDY_MAGIC 0xBABE + +union buddy; + +/** @brief Buddy + * + * Every free memory block is stored in a linked list according to its size. + * We can use this free memory to store store this buddy_t union which represents + * this block (the buddy_t union is alligned to the front). + * Therefore the address of the buddy_t union is equal with the address + * of the underlying free memory block. + * + * Every allocated memory block is prefixed with its binary size exponent and + * a known magic number. This prefix is hidden by the user because its located + * before the actual memory address returned by kmalloc() + */ +typedef union buddy { + /// Pointer to the next buddy in the linked list. + union buddy* next; + struct { + /// The binary exponent of the block size + uint8_t exponent; + /// Must be equal to BUDDY_MAGIC for a valid memory block + uint16_t magic; + } prefix; +} buddy_t; + +/** @brief Dump free buddies */ +void buddy_dump(void); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/mm/malloc.c b/mm/malloc.c new file mode 100644 index 00000000..110f38ba --- /dev/null +++ b/mm/malloc.c @@ -0,0 +1,203 @@ +/* + * Copyright 2010 Steffen Vogel, Chair for Operating Systems, + * RWTH Aachen University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file is part of MetalSVM. + */ + +#include +#include +#include +#include + +/// A linked list for each binary size exponent +static buddy_t* buddy_lists[BUDDY_LISTS] = { NULL }; +/// Lock for the buddy lists +static spinlock_t buddy_lock = SPINLOCK_INIT; + +/** @brief Check if larger free buddies are available */ +static inline int buddy_large_avail(uint8_t exp) +{ + while (exp BUDDY_MAX) + return 0; + else if (exp < BUDDY_MIN) + return BUDDY_MIN; + else + return exp; +} + +/** @brief Get a free buddy by potentially splitting a larger one */ +static buddy_t* buddy_get(int exp) +{ + spinlock_lock(&buddy_lock); + buddy_t** list = &buddy_lists[exp-BUDDY_MIN]; + buddy_t* buddy = *list; + buddy_t* split; + + if (buddy) + // there is already a free buddy => + // we remove it from the list + *list = buddy->next; + else if (exp >= BUDDY_ALLOC && !buddy_large_avail(exp)) + // theres no free buddy larger than exp => + // we can allocate new memory + buddy = (buddy_t*) palloc(1<next = *list; + *list = split; + } + +out: + spinlock_unlock(&buddy_lock); + + return buddy; +} + +/** @brief Put a buddy back to its free list + * + * TODO: merge adjacent buddies (memory compaction) + */ +static void buddy_put(buddy_t* buddy) +{ + spinlock_lock(&buddy_lock); + buddy_t** list = &buddy_lists[buddy->prefix.exponent-BUDDY_MIN]; + buddy->next = *list; + *list = buddy; + spinlock_unlock(&buddy_lock); +} + +void buddy_dump() +{ + size_t free = 0; + int i; + for (i=0; inext) { + kprintf(" %p -> %p \n", buddy, buddy->next); + free += 1<> PAGE_SHIFT; + + kprintf("palloc(%lu) (%lu pages)\n", sz, npages); // TODO: remove + + // get free virtual address space + viraddr = vma_alloc(npages*PAGE_SIZE, VMA_HEAP); + if (BUILTIN_EXPECT(!viraddr, 0)) + return NULL; + + // get continous physical pages + phyaddr = get_pages(npages); + if (BUILTIN_EXPECT(!phyaddr, 0)) { + vma_free(viraddr, viraddr+npages*PAGE_SIZE); + return NULL; + } + + // map physical pages to VMA + viraddr = map_region(viraddr, phyaddr, npages, flags); + if (BUILTIN_EXPECT(!viraddr, 0)) { + vma_free(viraddr, viraddr+npages*PAGE_SIZE); + put_pages(phyaddr, npages); + return NULL; + } + + return (void*) viraddr; +} + +void pfree(void* addr, size_t sz) +{ + if (BUILTIN_EXPECT(!addr || !sz, 0)) + return; + + size_t i; + size_t phyaddr; + size_t viraddr = (size_t) addr & PAGE_MASK; + uint32_t npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; + + // memory is propably not continously mapped! + for (i=0; iprefix.magic = BUDDY_MAGIC; + buddy->prefix.exponent = exp; + + // pointer arithmetic: we hide the prefix + return buddy+1; +} + +void kfree(void *addr) +{ + if (BUILTIN_EXPECT(!addr, 0)) + return; + + buddy_t* buddy = (buddy_t*) addr - 1; // get prefix + + // check magic + if (BUILTIN_EXPECT(buddy->prefix.magic != BUDDY_MAGIC, 0)) + return; + + buddy_put(buddy); +} From 954ccf13790d066f84599b7725760142cc9814d6 Mon Sep 17 00:00:00 2001 From: Steffen Vogel Date: Wed, 20 Nov 2013 11:38:59 +0100 Subject: [PATCH 15/21] added malloc.c to Makefile --- mm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/Makefile b/mm/Makefile index ded1bc75..f3a051b8 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -1,4 +1,4 @@ -C_source := memory.c vma.c +C_source := memory.c vma.c malloc.c MODULE := mm include $(TOPDIR)/Makefile.inc From 0153fb538d2678ab10e27f7838569ed0b514c4da Mon Sep 17 00:00:00 2001 From: Steffen Vogel Date: Wed, 20 Nov 2013 11:40:04 +0100 Subject: [PATCH 16/21] removed old kmalloc() --- kernel/tasks.c | 26 ++++++++++++++++++++ mm/memory.c | 67 -------------------------------------------------- 2 files changed, 26 insertions(+), 67 deletions(-) diff --git a/kernel/tasks.c b/kernel/tasks.c index 3f503842..da0e2a7c 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -97,6 +97,32 @@ uint32_t get_highest_priority(void) return msb(runqueues[CORE_ID].prio_bitmap); } +/** @brief Create a new stack for a new task + * + * @return start address of the new stack + */ +static void* create_stack(void) +{ + return palloc(KERNEL_STACK_SIZE, MAP_KERNEL_SPACE); +} + +/** @brief Delete stack of a finished task + * + * @param addr Pointer to the stack + * @return + * - 0 on success + * - -EINVAL on failure + */ +static int destroy_stack(task_t* task) +{ + if (BUILTIN_EXPECT(!task || !task->stack, 0)) + return -EINVAL; + + pfree(task->stack, KERNEL_STACK_SIZE); + + return 0; +} + int multitasking_init(void) { if (BUILTIN_EXPECT(task_table[0].status != TASK_IDLE, 0)) { kputs("Task 0 is not an idle task\n"); diff --git a/mm/memory.c b/mm/memory.c index 72001314..97be8c32 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -354,70 +354,3 @@ int mmu_init(void) return ret; } -void* mem_allocation(size_t sz, uint32_t flags) -{ - size_t phyaddr, viraddr; - uint32_t npages = sz >> PAGE_SHIFT; - - if (sz & (PAGE_SIZE-1)) - npages++; - - phyaddr = get_pages(npages); - if (BUILTIN_EXPECT(!phyaddr, 0)) - return 0; - - viraddr = map_region(0, phyaddr, npages, flags); - - return (void*) viraddr; -} - -void* kmalloc(size_t sz) -{ - return mem_allocation(sz, MAP_KERNEL_SPACE); -} - -void kfree(void* addr, size_t sz) -{ - uint32_t index, npages, i; - size_t phyaddr; - - if (BUILTIN_EXPECT(!addr && !sz, 0)) - return; - - npages = sz >> PAGE_SHIFT; - if (sz & (PAGE_SIZE-1)) - npages++; - - spinlock_lock(&bitmap_lock); - for(i=0; i> PAGE_SHIFT; - page_clear_mark(index); - } - spinlock_unlock(&bitmap_lock); - - vm_free((size_t) addr, npages); - - atomic_int32_sub(&total_allocated_pages, npages); - atomic_int32_add(&total_available_pages, npages); -} - -void* create_stack(void) -{ - return kmalloc(KERNEL_STACK_SIZE); -} - -int destroy_stack(task_t* task) -{ - if (BUILTIN_EXPECT(!task || !task->stack, 0)) - return -EINVAL; - - kfree(task->stack, KERNEL_STACK_SIZE); - - return 0; -} From 9018781eee286e451b0d4b409898eb0f340fd7e6 Mon Sep 17 00:00:00 2001 From: Steffen Vogel Date: Wed, 20 Nov 2013 11:45:04 +0100 Subject: [PATCH 17/21] replaced old kfree() calls with new signature --- arch/x86/kernel/kb.c | 2 +- drivers/char/socket.c | 4 ++-- drivers/char/stdio.c | 8 ++++---- fs/initrd.c | 13 ++++++------- kernel/syscall.c | 10 +++++----- kernel/tasks.c | 12 ++++++------ libkern/stdio.c | 2 +- 7 files changed, 25 insertions(+), 26 deletions(-) diff --git a/arch/x86/kernel/kb.c b/arch/x86/kernel/kb.c index 47ffea41..ed2fbf6b 100644 --- a/arch/x86/kernel/kb.c +++ b/arch/x86/kernel/kb.c @@ -37,7 +37,7 @@ void kb_init(size_t size, tid_t tid) { } void kb_finish(void) { - kfree(kb_buffer.buffer, (kb_buffer.maxsize * sizeof(char))); + kfree(kb_buffer.buffer); kb_buffer.buffer = NULL; kb_buffer.size = 0; kb_buffer.maxsize = 0; diff --git a/drivers/char/socket.c b/drivers/char/socket.c index 8210c249..a8ca241b 100755 --- a/drivers/char/socket.c +++ b/drivers/char/socket.c @@ -70,7 +70,7 @@ static ssize_t socket_write(fildes_t* file, uint8_t* buffer, size_t size) return -ENOMEM; memcpy(tmp, buffer, size); ret = lwip_write(file->offset, tmp, size); - kfree(tmp, size); + kfree(tmp); #endif if (ret < 0) ret = -errno; @@ -147,7 +147,7 @@ int socket_init(vfs_node_t* node, const char* name) } while(blist); - kfree(new_node, sizeof(vfs_node_t)); + kfree(new_node); return -ENOMEM; } diff --git a/drivers/char/stdio.c b/drivers/char/stdio.c index 54c309da..bf403ccb 100644 --- a/drivers/char/stdio.c +++ b/drivers/char/stdio.c @@ -153,7 +153,7 @@ int null_init(vfs_node_t* node, const char* name) } while(blist); - kfree(new_node, sizeof(vfs_node_t)); + kfree(new_node); return -ENOMEM; } @@ -212,7 +212,7 @@ int stdin_init(vfs_node_t* node, const char* name) } while(blist); - kfree(new_node, sizeof(vfs_node_t)); + kfree(new_node); return -ENOMEM; } @@ -271,7 +271,7 @@ int stdout_init(vfs_node_t* node, const char* name) } while(blist); - kfree(new_node, sizeof(vfs_node_t)); + kfree(new_node); return -ENOMEM; } @@ -330,7 +330,7 @@ int stderr_init(vfs_node_t* node, const char* name) } while(blist); - kfree(new_node, sizeof(vfs_node_t)); + kfree(new_node); return -ENOMEM; } diff --git a/fs/initrd.c b/fs/initrd.c index fe1cadca..a9ba0cdb 100644 --- a/fs/initrd.c +++ b/fs/initrd.c @@ -210,7 +210,7 @@ static int initrd_open(fildes_t* file, const char* name) if (file->node->type == FS_FILE) { if ((file->flags & O_CREAT) && (file->flags & O_EXCL)) return -EEXIST; - + /* in the case of O_TRUNC kfree all the nodes */ if (file->flags & O_TRUNC) { uint32_t i; @@ -221,8 +221,7 @@ static int initrd_open(fildes_t* file, const char* name) /* the first blist pointer have do remain valid. */ for(i=0; idata[i]) { - kfree(blist->data[i], - sizeof(data_block_t)); + kfree(blist->data[i]); } } if (blist->next) { @@ -234,12 +233,12 @@ static int initrd_open(fildes_t* file, const char* name) do { for(i=0; idata[i]) { - kfree(blist->data[i], sizeof(data_block_t)); + kfree(blist->data[i]); } } lastblist = blist; blist = blist->next; - kfree(lastblist, sizeof(block_list_t)); + kfree(lastblist); } while(blist); } @@ -425,9 +424,9 @@ static vfs_node_t* initrd_mkdir(vfs_node_t* node, const char* name) blist = blist->next; } while(blist); - kfree(dir_block, sizeof(dir_block_t)); + kfree(dir_block); out: - kfree(new_node, sizeof(vfs_node_t)); + kfree(new_node); return NULL; } diff --git a/kernel/syscall.c b/kernel/syscall.c index bd086047..07ecfebf 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -105,7 +105,7 @@ static int sys_open(const char* name, int flags, int mode) /* file doesn't exist! */ if (check < 0) { /* tidy up the fildescriptor */ - kfree(curr_task->fildes_table[fd], sizeof(fildes_t)); + kfree(curr_task->fildes_table[fd]); curr_task->fildes_table[fd] = NULL; return check; } @@ -196,7 +196,7 @@ static int sys_socket(int domain, int type, int protocol) /* file doesn't exist! */ if (curr_task->fildes_table[fd]->node == NULL) { /* tidy up the fildescriptor */ - kfree(curr_task->fildes_table[fd], sizeof(fildes_t)); + kfree(curr_task->fildes_table[fd]); curr_task->fildes_table[fd] = NULL; return -ENOENT; } @@ -240,7 +240,7 @@ static int sys_accept(int s, struct sockaddr* addr, socklen_t* addrlen) /* file doesn't exist! */ if (curr_task->fildes_table[fd]->node == NULL) { /* tidy up the fildescriptor */ - kfree(curr_task->fildes_table[fd], sizeof(fildes_t)); + kfree(curr_task->fildes_table[fd]); curr_task->fildes_table[fd] = NULL; return -ENOENT; } @@ -273,7 +273,7 @@ static int sys_close(int fd) /* close command failed -> return check = errno */ if (BUILTIN_EXPECT(check < 0, 0)) return check; - kfree(curr_task->fildes_table[fd], sizeof(fildes_t)); + kfree(curr_task->fildes_table[fd]); curr_task->fildes_table[fd] = NULL; } else { curr_task->fildes_table[fd]->count--; @@ -356,7 +356,7 @@ static int sys_dup(int fd) * free the memory which was allocated in get_fildes() * cause will link it to another existing memory space */ - kfree(curr_task->fildes_table[new_fd], sizeof(fildes_t)); + kfree(curr_task->fildes_table[new_fd]); /* and link it to another existing memory space */ curr_task->fildes_table[new_fd] = curr_task->fildes_table[fd]; diff --git a/kernel/tasks.c b/kernel/tasks.c index da0e2a7c..3857d516 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -239,7 +239,7 @@ static void NORETURN do_exit(int arg) { // close command failed -> return check = errno if (BUILTIN_EXPECT(status < 0, 0)) kprintf("Task %u was not able to close file descriptor %i. close_fs returned %d", curr_task->id, fd, -status); - kfree(curr_task->fildes_table[fd], sizeof(fildes_t)); + kfree(curr_task->fildes_table[fd]); curr_task->fildes_table[fd] = NULL; } else { curr_task->fildes_table[fd]->count--; @@ -248,7 +248,7 @@ static void NORETURN do_exit(int arg) { } } //finally the table has to be cleared. - kfree(curr_task->fildes_table, sizeof(filp_t)*NR_OPEN); + kfree(curr_task->fildes_table); } kprintf("Terminate task: %u, return value %d\n", curr_task->id, arg); @@ -505,7 +505,7 @@ static int kernel_entry(void* args) ret = kernel_args->func(kernel_args->args); - kfree(kernel_args, sizeof(kernel_args_t)); + kfree(kernel_args); return ret; } @@ -764,7 +764,7 @@ static int load_task(load_args_t* largs) offset -= sizeof(int); *((int*) (stack+offset)) = largs->argc; - kfree(largs, sizeof(load_args_t)); + kfree(largs); // clear fpu state curr_task->flags &= ~(TASK_FPU_USED|TASK_FPU_INIT); @@ -796,7 +796,7 @@ static int user_entry(void* arg) ret = load_task((load_args_t*) arg); - kfree(arg, sizeof(load_args_t)); + kfree(arg); return ret; } @@ -920,7 +920,7 @@ int sys_execve(const char* fname, char** argv, char** env) ret = load_task(load_args); - kfree(load_args, sizeof(load_args_t)); + kfree(load_args); return ret; } diff --git a/libkern/stdio.c b/libkern/stdio.c index 2d8ca43f..d0a77076 100644 --- a/libkern/stdio.c +++ b/libkern/stdio.c @@ -130,7 +130,7 @@ int kmsg_init(vfs_node_t * node, const char *name) } } while (blist); - kfree(new_node, sizeof(vfs_node_t)); + kfree(new_node); return -ENOMEM; } From 7a3e77c82dba79269b051d61a231a2cd88538b10 Mon Sep 17 00:00:00 2001 From: Steffen Vogel Date: Wed, 20 Nov 2013 11:47:53 +0100 Subject: [PATCH 18/21] palloc()/pfree() replace our old kmalloc()/kfree() with PAGE_SIZE granularity --- include/metalsvm/stdlib.h | 52 +++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/include/metalsvm/stdlib.h b/include/metalsvm/stdlib.h index 1796fee0..9903c60f 100644 --- a/include/metalsvm/stdlib.h +++ b/include/metalsvm/stdlib.h @@ -29,9 +29,7 @@ #ifndef __STDLIB_H__ #define __STDLIB_H__ -#include -#include -#include +#include #ifdef __cplusplus extern "C" { @@ -55,46 +53,42 @@ extern "C" { void NORETURN abort(void); -/** @brief Kernel's memory allocator function. +/** @brief General page allocator function * - * This will just call mem_allocation with - * the flags MAP_KERNEL_SPACE and MAP_HEAP. - * - * @return Pointer to the new memory range - */ -void* kmalloc(size_t); - -/** @brief Kernel's more general memory allocator function. - * - * This function lets you choose flags for the newly allocated memory. + * This function allocates and maps whole pages. + * To avoid fragmentation you should use kmalloc() and kfree()! * * @param sz Desired size of the new memory - * @param flags Flags to specify + * @param flags Flags to for map_region(), vma_add() * * @return Pointer to the new memory range */ -void* mem_allocation(size_t sz, uint32_t flags); +void* palloc(size_t sz, uint32_t flags); -/** @brief Free memory +/** @brief Free general kernel memory * - * The kernel malloc doesn't track how - * much memory was allocated for which pointer, + * The pmalloc() doesn't track how much memory was allocated for which pointer, * so you have to specify how much memory shall be freed. - */ -void kfree(void*, size_t); - -/** @brief Create a new stack for a new task * - * @return start address of the new stack + * @param sz The size which should freed */ -void* create_stack(void); +void pfree(void* addr, size_t sz); -/** @brief Delete stack of a finished task +/** @brief The memory allocator function * - * @param addr Pointer to the stack - * @return 0 on success + * This allocator uses a buddy system to manage free memory. + * + * @return Pointer to the new memory range */ -int destroy_stack(task_t* addr); +void* kmalloc(size_t sz); + +/** @brief The memory free function + * + * Releases memory allocated by malloc() + * + * @param addr The address to the memory block allocated by malloc() + */ +void kfree(void* addr); /** @brief String to long * From 60f8f53169fd5717a19fdcb4e8e4b8f0fd8aa5a9 Mon Sep 17 00:00:00 2001 From: Steffen Vogel Date: Wed, 20 Nov 2013 12:14:13 +0100 Subject: [PATCH 19/21] added test for new buddy malloc implementation --- apps/paging.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/apps/paging.c b/apps/paging.c index 03c49a2b..2e2277f5 100644 --- a/apps/paging.c +++ b/apps/paging.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -53,6 +54,16 @@ static void test(size_t expr, char *fmt, ...) abort(); } +/** @brief Linear feedback shift register PRNG */ +static uint16_t rand() +{ + static uint16_t lfsr = 0xACE1u; + static uint16_t bit; + + bit = ((lfsr >> 0) ^ (lfsr >> 2) ^ (lfsr >> 3) ^ (lfsr >> 5) ) & 1; + return lfsr = (lfsr >> 1) | (bit << 15); +} + /** @brief BSD sum algorithm ('sum' Unix command) and used by QEmu */ uint16_t checksum(size_t start, size_t end) { size_t addr; @@ -211,14 +222,60 @@ static void vma(void) vma_dump(); } +/** @brief Test of the kernel malloc allocator */ +static void malloc(void) +{ + int i; + int* p[20]; + int* a; + + // kmalloc() test + buddy_dump(); + a = kmalloc(SIZE); + test(a != NULL, "kmalloc(%lu) = %p", SIZE, a); + buddy_dump(); + + // simple write/read test + for (i=0; i Date: Tue, 26 Nov 2013 17:18:47 +0100 Subject: [PATCH 20/21] fixed some compiler warnings --- apps/paging.c | 2 +- kernel/init.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/paging.c b/apps/paging.c index 2e2277f5..ebae70a3 100644 --- a/apps/paging.c +++ b/apps/paging.c @@ -254,7 +254,7 @@ static void malloc(void) for (i=0; i<20; i++) { uint16_t sz = rand(); p[i] = kmalloc(sz); - test(p[i], "kmalloc(%u) = %p", sz, p[i]); + test(p[i] != NULL, "kmalloc(%u) = %p", sz, p[i]); } buddy_dump(); diff --git a/kernel/init.c b/kernel/init.c index 67b84784..10dd96c0 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -63,7 +63,7 @@ extern const void bss_end; int lowlevel_init(void) { // initialize .bss section - memset(&bss_start, 0x00, (char*) &bss_end - (char*) &bss_start); + memset((char*) &bss_start, 0x00, (char*) &bss_end - (char*) &bss_start); koutput_init(); From 949500fe6c244e8c0abbd08ef511e20a0ca5aba9 Mon Sep 17 00:00:00 2001 From: Steffen Vogel Date: Tue, 26 Nov 2013 17:20:25 +0100 Subject: [PATCH 21/21] moved mm subsystem tests to apps/memory.c --- apps/Makefile | 2 +- apps/{paging.c => memory.c} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename apps/{paging.c => memory.c} (100%) diff --git a/apps/Makefile b/apps/Makefile index 97536517..21a26efc 100644 --- a/apps/Makefile +++ b/apps/Makefile @@ -1,4 +1,4 @@ -C_source := tests.c echo.c netio.c jacobi.c laplace.c gfx_client.c gfx_generic.c paging.c +C_source := tests.c echo.c netio.c jacobi.c laplace.c gfx_client.c gfx_generic.c memory.c MODULE := apps include $(TOPDIR)/Makefile.inc diff --git a/apps/paging.c b/apps/memory.c similarity index 100% rename from apps/paging.c rename to apps/memory.c