diff --git a/Makefile.example b/Makefile.example index 53f4088..59672a9 100644 --- a/Makefile.example +++ b/Makefile.example @@ -102,6 +102,8 @@ include/eduos/config.inc: include/eduos/config.h @echo "; This file is generated automatically from the config.h file." > include/eduos/config.inc @echo "; Before editing this, you should consider editing config.h." >> include/eduos/config.inc @awk '/^#define KERNEL_STACK_SIZE/{ print "%define KERNEL_STACK_SIZE", $$3 }' include/eduos/config.h >> include/eduos/config.inc + @awk '/^#define VIDEO_MEM_ADDR/{ print "%define VIDEO_MEM_ADDR", $$3 }' include/eduos/config.h >> include/eduos/config.inc + @awk '/^#define CONFIG_VGA/{ print "%define CONFIG_VGA" }' include/eduos/config.h >> include/eduos/config.inc %.o : %.asm include/eduos/config.inc @echo [ASM] $@ diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 8030bb9..9db82f3 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -34,7 +34,8 @@ * This file contains the several functions to manage the page tables */ -#include +#include +#include #ifndef __PAGE_H__ #define __PAGE_H__ @@ -53,7 +54,7 @@ /// Physical address width (we dont support PAE) #define PHYS_BITS BITS /// Page map bits -#define PAGE_MAP_BITS 10 +#define PAGE_MAP_BITS 10 /// Number of page map indirections #define PAGE_LEVELS 2 @@ -90,11 +91,6 @@ #define PG_GLOBAL (1 << 8) /// This table is a self-reference and should skipped by page_map_copy() #define PG_SELF (1 << 9) -/// This page is used for bootstrapping the paging code. -#define PG_BOOT PG_SELF - -/// This page is reserved for copying -#define PAGE_TMP (PAGE_FLOOR((size_t) &kernel_start) - PAGE_SIZE) /** @brief Converts a virtual address to a physical * @@ -103,7 +99,7 @@ * @param addr Virtual address to convert * @return physical address */ -size_t page_virt_to_phys(size_t vir); +size_t virt_to_phys(size_t vir); /** @brief Initialize paging subsystem * @@ -114,29 +110,20 @@ size_t page_virt_to_phys(size_t vir); */ int page_init(void); -/** @brief Map one page into the boot page table - * - * @param viraddr - * @param phyaddr - * @param bits - * @return - */ -int page_map_bootmap(size_t viraddr, size_t phyaddr, size_t bits); - /** @brief Map a continuous region of pages * - * @param viraddr - * @param phyaddr - * @param npages - * @param bits + * @param viraddr Desired virtual address + * @param phyaddr Physical address to map from + * @param npages The region's size in number of pages + * @param bits Further page flags * @return */ int page_map(size_t viraddr, size_t phyaddr, size_t npages, size_t bits); /** @brief Unmap a continuous region of pages * - * @param viraddr - * @param npages + * @param viraddr The virtual start address + * @param npages The range's size in pages * @return */ int page_unmap(size_t viraddr, size_t npages); @@ -147,7 +134,7 @@ int page_unmap(size_t viraddr, size_t npages); * @retval 0 Success. Everything went fine. * @retval <0 Error. Something went wrong. */ -int page_map_copy(task_t *dest); +int page_map_copy(struct task *dest); /** @brief Free a whole page map tree */ int page_map_drop(void); diff --git a/arch/x86/kernel/entry.asm b/arch/x86/kernel/entry.asm index 8f80afa..a8c3fe6 100644 --- a/arch/x86/kernel/entry.asm +++ b/arch/x86/kernel/entry.asm @@ -33,6 +33,9 @@ [BITS 32] +extern kernel_start ; defined in linker script +extern kernel_end + ; We use a special name to map this section at the begin of our kernel ; => Multiboot expects its magic number at the beginning of the kernel. SECTION .mboot @@ -78,6 +81,40 @@ stublet: ; extensions (huge pages) enabled. global cpu_init cpu_init: +; initialize page tables +%ifdef CONFIG_VGA + push edi + mov eax, VIDEO_MEM_ADDR + and eax, 0xFFFFF000 ; page align lower half + mov edi, eax + shr edi, 10 ; (edi >> 12) * 4 (index for boot_pgt) + add edi, boot_pgt + or eax, 0x113 ; set present, global, writable and cache disable bits + mov DWORD [edi], eax + pop edi +%endif + push edi + push ebx + push ecx + mov ecx, kernel_start + mov ebx, kernel_end + add ebx, 0x1000 +L0: cmp ecx, ebx + jae L1 + mov eax, ecx + and eax, 0xFFFFF000 ; page align lower half + mov edi, eax + shr edi, 10 ; (edi >> 12) * 4 (index for boot_pgt) + add edi, boot_pgt + or eax, 0x103 ; set present, global, writable and + mov DWORD [edi], eax + add ecx, 0x1000 + jmp L0 +L1: + pop ecx + pop ebx + pop edi + ; Set CR3 mov eax, boot_pgd mov cr3, eax @@ -326,18 +363,12 @@ boot_stack: ; These tables do a simple identity paging and will ; be replaced in page_init() by more fine-granular mappings. ALIGN 4096 -global boot_map -boot_map: boot_pgd: - DD boot_pgt + 0x107 ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_USER - times 1022 DD 0 ; PAGE_MAP_ENTRIES - 2 + DD boot_pgt + 0x107 ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_USER + times 1022 DD 0 ; PAGE_MAP_ENTRIES - 2 DD boot_pgd + 0x303 ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_SELF (self-reference) boot_pgt: - %assign i 0 - %rep 1024 ; PAGE_MAP_ENTRIES - DD i + 0x203 ; PG_PRESENT | PG_BOOT | PG_RW - %assign i i + 4096 ; PAGE_SIZE - %endrep + times 1024 DD 0 ; add some hints to the ELF file SECTION .note.GNU-stack noalloc noexec nowrite progbits diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c index 20c0afd..4ef3983 100644 --- a/arch/x86/mm/page.c +++ b/arch/x86/mm/page.c @@ -28,7 +28,7 @@ /** * This is a 32/64 bit portable paging implementation for the x86 architecture - * using self-referenced page tablesi. + * using self-referenced page tables i. * See http://www.noteblok.net/2014/06/14/bachelor/ for a detailed description. * * @author Steffen Vogel @@ -47,14 +47,14 @@ /* Note that linker symbols are not variables, they have no memory * allocated for maintaining a value, rather their address is their value. */ extern const void kernel_start; -extern const void kernel_end; +//extern const void kernel_end; + +/// This page is reserved for copying +#define PAGE_TMP (PAGE_FLOOR((size_t) &kernel_start) - PAGE_SIZE) /** Lock for kernel space page tables */ static spinlock_t kslock = SPINLOCK_INIT; -/** This PGD table is initialized in entry.asm */ -extern size_t* boot_map; - /** A self-reference enables direct access to all page tables */ static size_t* self[PAGE_LEVELS] = { (size_t *) 0xFFC00000, @@ -67,7 +67,7 @@ static size_t * other[PAGE_LEVELS] = { (size_t *) 0xFFFFE000 }; -size_t page_virt_to_phys(size_t addr) +size_t virt_to_phys(size_t addr) { size_t vpn = addr >> PAGE_BITS; // virtual page number size_t entry = self[0][vpn]; // page table entry @@ -77,16 +77,6 @@ size_t page_virt_to_phys(size_t addr) return phy | off; } -int page_map_bootmap(size_t viraddr, size_t phyaddr, size_t bits) -{ - if (BUILTIN_EXPECT(viraddr >= PAGE_MAP_ENTRIES*PAGE_SIZE, 0)) - return -EINVAL; - - boot_map[PAGE_MAP_ENTRIES + (viraddr >> PAGE_BITS)] = phyaddr | bits | PG_PRESENT; - - return 0; -} - int page_map(size_t viraddr, size_t phyaddr, size_t npages, size_t bits) { int lvl, ret = -ENOMEM; @@ -266,6 +256,7 @@ int page_init(void) /* Replace default pagefault handler */ irq_uninstall_handler(14); irq_install_handler(14, page_fault_handler); +<<<<<<< HEAD /* Map kernel */ addr = (size_t) &kernel_start; @@ -276,6 +267,8 @@ int page_init(void) /* Map video memory */ page_map(VIDEO_MEM_ADDR, VIDEO_MEM_ADDR, 1, PG_RW | PG_PCD | PG_GLOBAL); #endif +======= +>>>>>>> refs/heads/stage5 /* Map multiboot information and modules */ if (mb_info) { @@ -297,13 +290,8 @@ int page_init(void) } } - /* Unmap bootstrap identity paging (see entry.asm, PG_BOOT) */ - for (i=0; i + */ + +#ifndef __MALLOC_H__ +#define __MALLOC_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/// Binary exponent of maximal size for kmalloc() +#define BUDDY_MAX 32 // 4 GB +/// Binary exponent of minimal buddy size +#define BUDDY_MIN 3 // 8 Byte >= sizeof(buddy_t) +/// Binary exponent of the size which we allocate with buddy_fill() +#define BUDDY_ALLOC 16 // 64 KByte = 16 * PAGE_SIZE + +#define BUDDY_LISTS (BUDDY_MAX-BUDDY_MIN+1) +#define BUDDY_MAGIC 0xBABE + +union buddy; + +/** @brief Buddy + * + * Every free memory block is stored in a linked list according to its size. + * We can use this free memory to store store this buddy_t union which represents + * this block (the buddy_t union is alligned to the front). + * Therefore the address of the buddy_t union is equal with the address + * of the underlying free memory block. + * + * Every allocated memory block is prefixed with its binary size exponent and + * a known magic number. This prefix is hidden by the user because its located + * before the actual memory address returned by kmalloc() + */ +typedef union buddy { + /// Pointer to the next buddy in the linked list. + union buddy* next; + struct { + /// The binary exponent of the block size + uint8_t exponent; + /// Must be equal to BUDDY_MAGIC for a valid memory block + uint16_t magic; + } prefix; +} buddy_t; + +/** @brief Dump free buddies */ +void buddy_dump(void); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/include/eduos/memory.h b/include/eduos/memory.h index 9a5766a..739e414 100644 --- a/include/eduos/memory.h +++ b/include/eduos/memory.h @@ -37,12 +37,34 @@ #define __MEMORY_H__ /** @brief Initialize the memory subsystem */ -int memory_init(); +int memory_init(void); /** @brief Request physical page frames */ size_t get_pages(size_t npages); +/** @brief Get a single page + * + * Convenience function: uses get_pages(1); + */ +static inline size_t get_page(void) { return get_pages(1); } + /** @brief release physical page frames */ int put_pages(size_t phyaddr, size_t npages); +/** @brief Put a single page + * + * Convenience function: uses put_pages(1); + */ +static inline int put_page(size_t phyaddr) { return put_pages(phyaddr, 1); } + +/** @brief Copy a physical page frame + * + * @param psrc physical address of source page frame + * @param pdest physical address of source page frame + * @return + * - 0 on success + * - -1 on failure + */ +int copy_page(size_t pdest, size_t psrc); + #endif diff --git a/include/eduos/stdlib.h b/include/eduos/stdlib.h index 6d87e91..2ff46c8 100644 --- a/include/eduos/stdlib.h +++ b/include/eduos/stdlib.h @@ -44,6 +44,43 @@ extern "C" { #endif +/** @brief General page allocator function + * + * This function allocates and maps whole pages. + * To avoid fragmentation you should use kmalloc() and kfree()! + * + * @param sz Desired size of the new memory + * @param flags Flags to for map_region(), vma_add() + * + * @return Pointer to the new memory range + */ +void* palloc(size_t sz, uint32_t flags); + +/** @brief Free general kernel memory + * + * The pmalloc() doesn't track how much memory was allocated for which pointer, + * so you have to specify how much memory shall be freed. + * + * @param sz The size which should freed + */ +void pfree(void* addr, size_t sz); + +/** @brief The memory allocator function + * + * This allocator uses a buddy system to manage free memory. + * + * @return Pointer to the new memory range + */ +void* kmalloc(size_t sz); + +/** @brief The memory free function + * + * Releases memory allocated by malloc() + * + * @param addr The address to the memory block allocated by malloc() + */ +void kfree(void* addr); + /** @brief Create a new stack for a new task * * @return start address of the new stack diff --git a/include/eduos/tasks_types.h b/include/eduos/tasks_types.h index 6efd018..012e20c 100644 --- a/include/eduos/tasks_types.h +++ b/include/eduos/tasks_types.h @@ -39,7 +39,7 @@ #include #include - +#include #include #include @@ -79,12 +79,16 @@ typedef struct task { size_t page_map; /// Lock for page tables spinlock_irqsave_t page_lock; + /// lock for the VMA_list + spinlock_t vma_lock; + /// list of VMAs + vma_t* vma_list; /// usage in number of pages (including page map tables) - atomic_int32_t user_usage; + atomic_int32_t user_usage; /// next task in the queue - struct task* next; + struct task* next; /// previous task in the queue - struct task* prev; + struct task* prev; } task_t; typedef struct { diff --git a/include/eduos/vma.h b/include/eduos/vma.h new file mode 100644 index 0000000..db97327 --- /dev/null +++ b/include/eduos/vma.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2011, Stefan Lankes, RWTH Aachen University + * 2014, Steffen Vogel, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @author Stefan Lankes + * @author Steffen Vogel + * @file include/eduos/vma.h + * @brief VMA related sructure and functions + */ + +#ifndef __VMA_H__ +#define __VMA_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/// Read access to this VMA is allowed +#define VMA_READ (1 << 0) +/// Write access to this VMA is allowed +#define VMA_WRITE (1 << 1) +/// Instructions fetches in this VMA are allowed +#define VMA_EXECUTE (1 << 2) +/// This VMA is cacheable +#define VMA_CACHEABLE (1 << 3) +/// This VMA is not accessable +#define VMA_NO_ACCESS (1 << 4) +/// This VMA should be part of the userspace +#define VMA_USER (1 << 5) +/// A collection of flags used for the kernel heap (kmalloc) +#define VMA_HEAP (VMA_READ|VMA_WRITE|VMA_CACHEABLE) + +// boundaries for VAS allocation +#define VMA_KERN_MIN 0xC0000 +#define VMA_KERN_MAX KERNEL_SPACE +#define VMA_USER_MIN KERNEL_SPACE + +// last three top level entries are reserved +#ifdef CONFIG_X86_32 + #define VMA_USER_MAX 0xFF400000 +#elif defined (CONFIG_X86_64) + #define VMA_USER_MAX 0xFFFFFE8000000000 +#endif + +struct vma; + +/** @brief VMA structure definition + * + * Each item in this linked list marks a used part of the virtual address space. + * Its used by vm_alloc() to find holes between them. + */ +typedef struct vma { + /// Start address of the memory area + size_t start; + /// End address of the memory area + size_t end; + /// Type flags field + uint32_t flags; + /// Pointer of next VMA element in the list + struct vma* next; + /// Pointer to previous VMA element in the list + struct vma* prev; +} vma_t; + +/** @brief Initalize the kernelspace VMA list + * + * Reserves several system-relevant virtual memory regions: + * - SMP boot page (SMP_SETUP_ADDR) + * - VGA video memory (VIDEO_MEM_ADDR) + * - The kernel (kernel_start - kernel_end) + * - Multiboot structure (mb_info) + * - Multiboot mmap (mb_info->mmap_*) + * - Multiboot modules (mb_info->mods_*) + * - Init Ramdisk + * + * @return + * - 0 on success + * - <0 on failure + */ +int vma_init(void); + +/** @brief Add a new virtual memory area to the list of VMAs + * + * @param start Start address of the new area + * @param end End address of the new area + * @param flags Type flags the new area shall have + * + * @return + * - 0 on success + * - -EINVAL (-22) or -EINVAL (-12) on failure + */ +int vma_add(size_t start, size_t end, uint32_t flags); + +/** @brief Search for a free memory area + * + * @param size Size of requestes VMA in bytes + * @param flags + * @return Type flags the new area shall have + * - 0 on failure + * - the start address of a free area + */ +size_t vma_alloc(size_t size, uint32_t flags); + +/** @brief Free an allocated memory area + * + * @param start Start address of the area to be freed + * @param end End address of the to be freed + * @return + * - 0 on success + * - -EINVAL (-22) on failure + */ +int vma_free(size_t start, size_t end); + +/** @brief Free all virtual memory areas + * + * @return + * - 0 on success + */ +int drop_vma_list(struct task* task); + +/** @brief Copy the VMA list of the current task to task + * + * @param task The task where the list should be copied to + * @return + * - 0 on success + */ +int copy_vma_list(struct task* src, struct task* dest); + +/** @brief Dump information about this task's VMAs into the terminal. */ +void vma_dump(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/kernel/main.c b/kernel/main.c index d69205c..dd8013b 100644 --- a/kernel/main.c +++ b/kernel/main.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -81,17 +82,21 @@ static int wrapper(void* arg) return jump_to_user_code((uint32_t) userfoo, (uint32_t) stack); #else // dirty hack, map userfoo to the user space - size_t phys = page_virt_to_phys(((size_t) userfoo) & PAGE_MASK); + size_t phys = virt_to_phys(((size_t) userfoo) & PAGE_MASK); size_t vuserfoo = 0x40000000; page_map(vuserfoo, phys, 2, PG_PRESENT | PG_USER); vuserfoo += (size_t)userfoo & 0xFFF; + vma_add(vuserfoo, vuserfoo + 2*PAGE_SIZE, VMA_USER|VMA_CACHEABLE|VMA_READ|VMA_EXECUTE); // dirty hack, map ustack to the user space - phys = page_virt_to_phys((size_t) ustack); + phys = virt_to_phys((size_t) ustack); size_t vstack = 0x80000000; page_map(vstack, phys, KERNEL_STACK_SIZE >> PAGE_BITS, PG_PRESENT | PG_RW | PG_USER); + vma_add(vstack, vstack+KERNEL_STACK_SIZE, VMA_USER|VMA_CACHEABLE|VMA_READ|VMA_WRITE); vstack = (vstack + KERNEL_STACK_SIZE - 16 - sizeof(size_t)); + vma_dump(); + return jump_to_user_code(vuserfoo, vstack); #endif } diff --git a/kernel/tasks.c b/kernel/tasks.c index 30d4019..d006a12 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -43,8 +43,8 @@ * A task's id will be its position in this array. */ static task_t task_table[MAX_TASKS] = { \ - [0] = {0, TASK_IDLE, NULL, NULL, 0, 0, SPINLOCK_IRQSAVE_INIT, ATOMIC_INIT(0), NULL, NULL}, \ - [1 ... MAX_TASKS-1] = {0, TASK_INVALID, NULL, NULL, 0, 0, SPINLOCK_IRQSAVE_INIT, ATOMIC_INIT(0), NULL, NULL}}; + [0] = {0, TASK_IDLE, NULL, NULL, 0, 0, SPINLOCK_IRQSAVE_INIT, SPINLOCK_INIT, NULL, ATOMIC_INIT(0), NULL, NULL}, \ + [1 ... MAX_TASKS-1] = {0, TASK_INVALID, NULL, NULL, 0, 0, SPINLOCK_IRQSAVE_INIT, SPINLOCK_INIT, NULL, ATOMIC_INIT(0), NULL, NULL}}; static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT; @@ -200,6 +200,8 @@ static int create_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio) task_table[i].last_stack_pointer = NULL; task_table[i].stack = create_stack(i); task_table[i].prio = prio; + spinlock_init(&task_table[i].vma_lock); + task_table[i].vma_list = NULL; spinlock_irqsave_init(&task_table[i].page_lock); atomic_int32_set(&task_table[i].user_usage, 0); diff --git a/mm/Makefile b/mm/Makefile index 0bf2672..6c2a4d8 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -1,4 +1,4 @@ -C_source := memory.c +C_source := memory.c malloc.c vma.c MODULE := mm include $(TOPDIR)/Makefile.inc diff --git a/mm/malloc.c b/mm/malloc.c new file mode 100644 index 0000000..4714533 --- /dev/null +++ b/mm/malloc.c @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2014, Steffen Vogel, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @author Steffen Vogel + */ + +#include +#include +#include +#include +#include + +/// A linked list for each binary size exponent +static buddy_t* buddy_lists[BUDDY_LISTS] = { [0 ... BUDDY_LISTS-1] = NULL }; +/// Lock for the buddy lists +static spinlock_t buddy_lock = SPINLOCK_INIT; + +/** @brief Check if larger free buddies are available */ +static inline int buddy_large_avail(uint8_t exp) +{ + while (exp(1< BUDDY_MAX) + exp = 0; + if (exp < BUDDY_MIN) + exp = BUDDY_MIN; + + return exp; +} + +/** @brief Get a free buddy by potentially splitting a larger one */ +static buddy_t* buddy_get(int exp) +{ + spinlock_lock(&buddy_lock); + buddy_t** list = &buddy_lists[exp-BUDDY_MIN]; + buddy_t* buddy = *list; + buddy_t* split; + + if (buddy) + // there is already a free buddy => + // we remove it from the list + *list = buddy->next; + else if (exp >= BUDDY_ALLOC && !buddy_large_avail(exp)) + // theres no free buddy larger than exp => + // we can allocate new memory + buddy = (buddy_t*) palloc(1<next = *list; + *list = split; + } + +out: + spinlock_unlock(&buddy_lock); + + return buddy; +} + +/** @brief Put a buddy back to its free list + * + * TODO: merge adjacent buddies (memory compaction) + */ +static void buddy_put(buddy_t* buddy) +{ + spinlock_lock(&buddy_lock); + buddy_t** list = &buddy_lists[buddy->prefix.exponent-BUDDY_MIN]; + buddy->next = *list; + *list = buddy; + spinlock_unlock(&buddy_lock); +} + +void buddy_dump(void) +{ + size_t free = 0; + int i; + for (i=0; inext) { + kprintf(" %p -> %p \n", buddy, buddy->next); + free += 1<> PAGE_BITS; + int err; + + //kprintf("palloc(%lu) (%lu pages)\n", sz, npages); + + // get free virtual address space + viraddr = vma_alloc(npages*PAGE_SIZE, VMA_HEAP); + if (BUILTIN_EXPECT(!viraddr, 0)) + return NULL; + + // get continous physical pages + phyaddr = get_pages(npages); + if (BUILTIN_EXPECT(!phyaddr, 0)) { + vma_free(viraddr, viraddr+npages*PAGE_SIZE); + return NULL; + } + + // map physical pages to VMA + err = page_map(viraddr, phyaddr, npages, PG_RW|PG_GLOBAL); + if (BUILTIN_EXPECT(err, 0)) { + vma_free(viraddr, viraddr+npages*PAGE_SIZE); + put_pages(phyaddr, npages); + return NULL; + } + + return (void*) viraddr; +} + +void pfree(void* addr, size_t sz) +{ + if (BUILTIN_EXPECT(!addr || !sz, 0)) + return; + + size_t i; + size_t phyaddr; + size_t viraddr = (size_t) addr & PAGE_MASK; + uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS; + + // memory is probably not continuously mapped! (userspace heap) + for (i=0; iprefix.magic = BUDDY_MAGIC; + buddy->prefix.exponent = exp; + + //kprintf("kmalloc(%lu) = %p\n", sz, buddy+1); + + // pointer arithmetic: we hide the prefix + return buddy+1; +} + +void kfree(void *addr) +{ + if (BUILTIN_EXPECT(!addr, 0)) + return; + + //kprintf("kfree(%lu)\n", addr); + + buddy_t* buddy = (buddy_t*) addr - 1; // get prefix + + // check magic + if (BUILTIN_EXPECT(buddy->prefix.magic != BUDDY_MAGIC, 0)) + return; + + buddy_put(buddy); +} diff --git a/mm/memory.c b/mm/memory.c index df9c5c6..4f83291 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -157,6 +157,43 @@ int put_pages(size_t phyaddr, size_t npages) return ret; } +int copy_page(size_t pdest, size_t psrc) +{ + int err; + + static size_t viraddr; + if (!viraddr) { // statically allocate virtual memory area + viraddr = vma_alloc(2 * PAGE_SIZE, VMA_HEAP); + if (BUILTIN_EXPECT(!viraddr, 0)) + return -ENOMEM; + } + + // map pages + size_t vsrc = viraddr; + err = page_map(vsrc, psrc, 1, PG_GLOBAL|PG_RW); + if (BUILTIN_EXPECT(err, 0)) { + page_unmap(viraddr, 1); + return -ENOMEM; + } + + size_t vdest = viraddr + PAGE_SIZE; + err = page_map(vdest, pdest, 1, PG_GLOBAL|PG_RW); + if (BUILTIN_EXPECT(err, 0)) { + page_unmap(viraddr + PAGE_SIZE, 1); + return -ENOMEM; + } + + kprintf("copy_page: copy page frame from: %#lx (%#lx) to %#lx (%#lx)\n", vsrc, psrc, vdest, pdest); // TODO remove + + // copy the whole page + memcpy((void*) vdest, (void*) vsrc, PAGE_SIZE); + + // householding + page_unmap(viraddr, 2); + + return 0; +} + int memory_init(void) { unsigned int i; @@ -166,6 +203,13 @@ int memory_init(void) // mark all memory as used memset(bitmap, 0xff, BITMAP_SIZE); + // enable paging and map Multiboot modules etc. + ret = page_init(); + if (BUILTIN_EXPECT(ret, 0)) { + kputs("Failed to initialize paging!\n"); + return ret; + } + // parse multiboot information for available memory if (mb_info) { if (mb_info->flags & MULTIBOOT_INFO_MEM_MAP) { @@ -246,12 +290,26 @@ int memory_init(void) atomic_int32_dec(&total_available_pages); } - // enable paging and map SMP, VGA, Multiboot modules etc. - ret = page_init(); + ret = vma_init(); if (BUILTIN_EXPECT(ret, 0)) { - kputs("Failed to initialize paging!\n"); + kprintf("Failed to initialize VMA regions: %d\n", ret); return ret; } + /* + * Modules like the init ram disk are already loaded. + * Therefore, we set these pages as used. + */ + if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) { + multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr); + for(i=0; imods_count; i++) { + for(addr=mmodule[i].mod_start; addr> PAGE_BITS); + atomic_int32_inc(&total_allocated_pages); + atomic_int32_dec(&total_available_pages); + } + } + } + return ret; } diff --git a/mm/vma.c b/mm/vma.c new file mode 100644 index 0000000..cb5c242 --- /dev/null +++ b/mm/vma.c @@ -0,0 +1,400 @@ +/* + * Copyright (c) 2014, Steffen Vogel, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* + * Note that linker symbols are not variables, they have no memory allocated for + * maintaining a value, rather their address is their value. + */ +extern const void kernel_start; +extern const void kernel_end; + +/* + * Kernel space VMA list and lock + * + * For bootstrapping we initialize the VMA list with one empty VMA + * (start == end) and expand this VMA by calls to vma_alloc() + */ +static vma_t vma_boot = { VMA_KERN_MIN, VMA_KERN_MIN, VMA_HEAP }; +static vma_t* vma_list = &vma_boot; +static spinlock_t vma_lock = SPINLOCK_INIT; + +// TODO: we might move the architecture specific VMA regions to a +// seperate function arch_vma_init() +int vma_init(void) +{ + int ret; + + // add Kernel + ret = vma_add(PAGE_CEIL((size_t) &kernel_start), + PAGE_FLOOR((size_t) &kernel_end), + VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE); + if (BUILTIN_EXPECT(ret, 0)) + goto out; + +#ifdef CONFIG_VGA + // add VGA video memory + ret = vma_add(VIDEO_MEM_ADDR, VIDEO_MEM_ADDR + PAGE_SIZE, VMA_READ|VMA_WRITE); + if (BUILTIN_EXPECT(ret, 0)) + goto out; +#endif + + // add Multiboot structures as modules + if (mb_info) { + ret = vma_add(PAGE_CEIL((size_t) mb_info), + PAGE_FLOOR((size_t) mb_info + sizeof(multiboot_info_t)), + VMA_READ|VMA_CACHEABLE); + if (BUILTIN_EXPECT(ret, 0)) + goto out; + + if (mb_info->flags & MULTIBOOT_INFO_MODS) { + multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr); + + ret = vma_add(PAGE_CEIL((size_t) mb_info->mods_addr), + PAGE_FLOOR((size_t) mb_info->mods_addr + mb_info->mods_count*sizeof(multiboot_module_t)), + VMA_READ|VMA_CACHEABLE); + + //TODO: Why do we get error code -22 (-EINVAL); + ret = 0; // TODO: Remove workaround + + int i; + for(i=0; imods_count; i++) { + ret = vma_add(PAGE_CEIL(mmodule[i].mod_start), + PAGE_FLOOR(mmodule[i].mod_end), + VMA_READ|VMA_WRITE|VMA_CACHEABLE); + if (BUILTIN_EXPECT(ret, 0)) + goto out; + } + } + } + +out: + return ret; +} + +size_t vma_alloc(size_t size, uint32_t flags) +{ + task_t* task = current_task; + spinlock_t* lock; + vma_t** list; + + //kprintf("vma_alloc: size = %#lx, flags = %#x\n", size, flags); + + size_t base, limit; // boundaries for search + size_t start, end; // boundaries of free gaps + + if (flags & VMA_USER) { + base = VMA_USER_MIN; + limit = VMA_USER_MAX; + list = &task->vma_list; + lock = &task->vma_lock; + } + else { + base = VMA_KERN_MIN; + limit = VMA_KERN_MAX; + list = &vma_list; + lock = &vma_lock; + } + + spinlock_lock(lock); + + // first fit search for free memory area + vma_t* pred = NULL; // vma before current gap + vma_t* succ = *list; // vma after current gap + do { + start = (pred) ? pred->end : base; + end = (succ) ? succ->start : limit; + + if (start + size < end && start >= base && start + size < limit) + goto found; // we found a gap which is large enough and in the bounds + + pred = succ; + succ = (pred) ? pred->next : NULL; + } while (pred || succ); + +fail: + spinlock_unlock(lock); // we were unlucky to find a free gap + + return 0; + +found: + if (pred && pred->flags == flags) + pred->end = start + size; // resize VMA + else { + // insert new VMA + vma_t* new = kmalloc(sizeof(vma_t)); + if (BUILTIN_EXPECT(!new, 0)) + goto fail; + + new->start = start; + new->end = start + size; + new->flags = flags; + new->next = succ; + new->prev = pred; + + if (succ) + succ->prev = new; + if (pred) + pred->next = new; + else + *list = new; + } + + spinlock_unlock(lock); + + return start; +} + +int vma_free(size_t start, size_t end) +{ + task_t* task = current_task; + spinlock_t* lock; + vma_t* vma; + vma_t** list = NULL; + + //kprintf("vma_free: start = %#lx, end = %#lx\n", start, end); + + if (BUILTIN_EXPECT(start >= end, 0)) + return -EINVAL; + + if (end < VMA_KERN_MAX) { + lock = &vma_lock; + list = &vma_list; + } + else if (start >= VMA_KERN_MAX) { + lock = &task->vma_lock; + list = &task->vma_list; + } + + if (BUILTIN_EXPECT(!list || !*list, 0)) + return -EINVAL; + + spinlock_lock(lock); + + // search vma + vma = *list; + while (vma) { + if (start >= vma->start && end <= vma->end) break; + vma = vma->next; + } + + if (BUILTIN_EXPECT(!vma, 0)) { + spinlock_unlock(lock); + return -EINVAL; + } + + // free/resize vma + if (start == vma->start && end == vma->end) { + if (vma == *list) + *list = vma->next; // update list head + if (vma->prev) + vma->prev->next = vma->next; + if (vma->next) + vma->next->prev = vma->prev; + kfree(vma); + } + else if (start == vma->start) + vma->start = end; + else if (end == vma->end) + vma->end = start; + else { + vma_t* new = kmalloc(sizeof(vma_t)); + if (BUILTIN_EXPECT(!new, 0)) { + spinlock_unlock(lock); + return -ENOMEM; + } + + new->end = vma->end; + vma->end = start; + new->start = end; + + new->next = vma->next; + vma->next = new; + new->prev = vma; + } + + spinlock_unlock(lock); + + return 0; +} + +int vma_add(size_t start, size_t end, uint32_t flags) +{ + task_t* task = current_task; + spinlock_t* lock; + vma_t** list; + + if (BUILTIN_EXPECT(start >= end, 0)) + return -EINVAL; + + if (flags & VMA_USER) { + list = &task->vma_list; + lock = &task->vma_lock; + + // check if address is in userspace + if (BUILTIN_EXPECT(start < VMA_KERN_MAX, 0)) + return -EINVAL; + } + else { + list = &vma_list; + lock = &vma_lock; + + // check if address is in kernelspace + if (BUILTIN_EXPECT(end >= VMA_KERN_MAX, 0)) + return -EINVAL; + } + + //kprintf("vma_add: start = %#lx, end = %#lx, flags = %#x\n", start, end, flags); + + spinlock_lock(lock); + + // search gap + vma_t* pred = NULL; + vma_t* succ = *list; + + while (pred || succ) { + if ((!pred || pred->end <= start) && + (!succ || succ->start >= end)) + break; + + pred = succ; + succ = (succ) ? succ->next : NULL; + } + + if (BUILTIN_EXPECT(*list && !pred && !succ, 0)) { + spinlock_unlock(lock); + return -EINVAL; + } + + // insert new VMA + vma_t* new = kmalloc(sizeof(vma_t)); + if (BUILTIN_EXPECT(!new, 0)) { + spinlock_unlock(lock); + return -ENOMEM; + } + + new->start = start; + new->end = end; + new->flags = flags; + new->next = succ; + new->prev = pred; + + if (succ) + succ->prev = new; + if (pred) + pred->next = new; + else + *list = new; + + spinlock_unlock(lock); + + return 0; +} + +int copy_vma_list(task_t* src, task_t* dest) +{ + spinlock_init(&dest->vma_lock); + + spinlock_lock(&src->vma_lock); + spinlock_lock(&dest->vma_lock); + + vma_t* last = NULL; + vma_t* old; + for (old=src->vma_list; old; old=old->next) { + vma_t *new = kmalloc(sizeof(vma_t)); + if (BUILTIN_EXPECT(!new, 0)) { + spinlock_unlock(&dest->vma_lock); + spinlock_unlock(&src->vma_lock); + return -ENOMEM; + } + + new->start = old->start; + new->end = old->end; + new->flags = old->flags; + new->prev = last; + + if (last) + last->next = new; + else + dest->vma_list = new; + + last = new; + } + + spinlock_unlock(&dest->vma_lock); + spinlock_unlock(&src->vma_lock); + + return 0; +} + +int drop_vma_list(task_t *task) +{ + vma_t* vma; + + spinlock_lock(&task->vma_lock); + + while ((vma = task->vma_list)) { + task->vma_list = vma->next; + kfree(vma); + } + + spinlock_unlock(&task->vma_lock); + + return 0; +} + +void vma_dump(void) +{ + void print_vma(vma_t *vma) { + while (vma) { + kprintf("0x%lx - 0x%lx: size=%x, flags=%c%c%c\n", vma->start, vma->end, vma->end - vma->start, + (vma->flags & VMA_READ) ? 'r' : '-', + (vma->flags & VMA_WRITE) ? 'w' : '-', + (vma->flags & VMA_EXECUTE) ? 'x' : '-'); + vma = vma->next; + } + } + + task_t* task = current_task; + + kputs("Kernelspace VMAs:\n"); + spinlock_lock(&vma_lock); + print_vma(vma_list); + spinlock_unlock(&vma_lock); + + kputs("Userspace VMAs:\n"); + spinlock_lock(&task->vma_lock); + print_vma(task->vma_list); + spinlock_unlock(&task->vma_lock); +}