/* * Copyright 2012 Stefan Lankes, Chair for Operating Systems, * RWTH Aachen University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * This file is part of MetalSVM. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef CONFIG_ROCKCREEK #include #include #include #include #endif /* * Virtual Memory Layout of the standard configuration * (1 GB kernel space) * * 0x00000000 - 0x000FFFFF: reserved for IO devices (16MB) * 0x00100000 - 0x0DEADFFF: Kernel (size depends on the configuration) (221MB) * 0x0DEAE000 - 0x3FFFFFFF: Kernel heap * */ /* * Note that linker symbols are not variables, they have no memory allocated for * maintaining a value, rather their address is their value. */ extern const void kernel_start; extern const void kernel_end; // boot task's page directory and page directory lock extern page_dir_t boot_pgd; static spinlock_t kslock = SPINLOCK_INIT; static int paging_enabled = 0; page_dir_t* get_boot_pgd(void) { return &boot_pgd; } int create_pgd(task_t* task, int copy) { // Currently, we support only kernel tasks // => all tasks are able to use the same pgd if (BUILTIN_EXPECT(!paging_enabled, 0)) return -EINVAL; task->pgd = get_boot_pgd(); return 0; } /* * drops all page frames and the PGD of a user task */ int drop_pgd(void) { #if 0 page_dir_t* pgd = per_core(current_task)->pgd; size_t phy_pgd = virt_to_phys((size_t) pgd); task_t* task = per_core(current_task); uint32_t i; if (BUILTIN_EXPECT(pgd == &boot_pgd, 0)) return -EINVAL; spinlock_lock(&task->pgd_lock); for(i=0; i<1024; i++) { if (pgd->entries[i] & PG_USER) { put_page(pgd->entries[i] & PAGE_MASK); pgd->entries[i] = 0; } } // freeing the page directory put_page(phy_pgd); task->pgd = NULL; spinlock_unlock(&task->pgd_lock); #endif return 0; } size_t virt_to_phys(size_t viraddr) { task_t* task = per_core(current_task); uint16_t idx_pd4 = (viraddr >> 39) & 0x1FF; uint16_t idx_dirp = (viraddr >> 30) & 0x1FF; uint16_t idx_dir = (viraddr >> 21) & 0x1FF; uint16_t idx_table = (viraddr >> 12) & 0x1FF; page_table_t* pgt; size_t ret = 0; if (!paging_enabled) return viraddr; if (BUILTIN_EXPECT(!task || !task->pgd, 0)) return 0; spinlock_irqsave_lock(&task->pgd_lock); // Currently, we allocate pages only in kernel space. // => physical address of the page table is identical of the virtual address pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK); if (!pgt) goto out; pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK); if (!pgt) goto out; pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK); if (!pgt) goto out; ret = (size_t) (pgt->entries[idx_table] & PAGE_MASK); if (!ret) goto out; ret = ret | (viraddr & 0xFFF); // add page offset out: //kprintf("vir %p to phy %p\n", viraddr, ret); spinlock_irqsave_unlock(&task->pgd_lock); return ret; } size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags) { task_t* task = per_core(current_task); page_table_t* pgt; size_t i, ret; if (BUILTIN_EXPECT(!task || !task->pgd, 0)) return 0; if (BUILTIN_EXPECT(!paging_enabled && (viraddr != phyaddr), 0)) return 0; if (flags & MAP_KERNEL_SPACE) spinlock_lock(&kslock); else spinlock_irqsave_lock(&task->pgd_lock); if (!viraddr) { viraddr = vm_alloc(npages, flags); if (BUILTIN_EXPECT(!viraddr, 0)) { kputs("map_region: found no valid virtual address\n"); ret = 0; goto out; } } ret = viraddr; for(i=0; i> 39) & 0x1FF; uint16_t idx_dirp = (viraddr >> 30) & 0x1FF; uint16_t idx_dir = (viraddr >> 21) & 0x1FF; uint16_t idx_table = (viraddr >> 12) & 0x1FF; pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK); if (!pgt) { kputs("map_region: out of memory\n"); ret = 0; goto out; } pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK); if (!pgt) { kputs("map_region: out of memory\n"); ret = 0; goto out; } pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK); if (!pgt) { kputs("map_region: out of memory\n"); ret = 0; goto out; } /* convert physical address to virtual */ // Currently, we allocate pages only in kernel space. // => physical address of the page table is identical of the virtual address //if (paging_enabled) // pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK); if (pgt->entries[idx_table] && !(flags & MAP_REMAP)) { kprintf("0x%x is already mapped\n", viraddr); ret = 0; goto out; } if (flags & MAP_USER_SPACE) pgt->entries[idx_table] = USER_PAGE|(phyaddr & PAGE_MASK); else pgt->entries[idx_table] = KERN_PAGE|(phyaddr & PAGE_MASK); if (flags & MAP_NO_CACHE) pgt->entries[idx_table] |= PG_PCD; if (flags & MAP_NO_ACCESS) pgt->entries[idx_table] &= ~PG_PRESENT; if (flags & MAP_WT) pgt->entries[idx_table] |= PG_PWT; if (flags & MAP_USER_SPACE) atomic_int32_inc(&task->user_usage); tlb_flush_one_page(viraddr); } out: if (flags & MAP_KERNEL_SPACE) spinlock_unlock(&kslock); else spinlock_irqsave_unlock(&task->pgd_lock); return ret; } int change_page_permissions(size_t start, size_t end, uint32_t flags) { #if 0 uint32_t index1, index2, newflags; size_t viraddr = start & PAGE_MASK; size_t phyaddr; page_table_t* pgt; page_dir_t* pgd; task_t* task = per_core(current_task); if (BUILTIN_EXPECT(!paging_enabled, 0)) return -EINVAL; pgd = per_core(current_task)->pgd; if (BUILTIN_EXPECT(!pgd, 0)) return -EINVAL; spinlock_lock(&task->pgd_lock); while (viraddr < end) { index1 = viraddr >> 22; index2 = (viraddr >> 12) & 0x3FF; while ((viraddr < end) && (index2 < 1024)) { pgt = (page_table_t*) (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); if (pgt && pgt->entries[index2]) { phyaddr = pgt->entries[index2] & PAGE_MASK; newflags = pgt->entries[index2] & 0xFFF; // get old flags if (!(newflags & PG_SVM_INIT)) { if ((newflags & PG_SVM_STRONG) && !(newflags & PG_PRESENT) && (flags & (VMA_READ|VMA_WRITE) && !(flags & VMA_NOACCESS))) newflags |= PG_PRESENT; else if ((newflags & PG_SVM_STRONG) && (newflags & PG_PRESENT) && (flags & VMA_NOACCESS)) newflags &= ~PG_PRESENT; } // update flags if (!(flags & VMA_WRITE)) { newflags &= ~PG_RW; #ifdef CONFIG_ROCKCREEK if (newflags & (PG_SVM_STRONG|PG_SVM_LAZYRELEASE)) newflags &= ~PG_MPE; #endif } else { newflags |= PG_RW; #ifdef CONFIG_ROCKCREEK if (newflags & (PG_SVM_STRONG|PG_SVM_LAZYRELEASE)) newflags |= PG_MPE; #endif } pgt->entries[index2] = (newflags & 0xFFF) | (phyaddr & PAGE_MASK); tlb_flush_one_page(viraddr); } index2++; viraddr += PAGE_SIZE; } } spinlock_unlock(&task->pgd_lock); #endif return -EINVAL; } /* * Use the first fit algorithm to find a valid address range * * TODO: O(n) => bad performance, we need a better approach */ size_t vm_alloc(uint32_t npages, uint32_t flags) { task_t* task = per_core(current_task); size_t viraddr, i, j, ret = 0; size_t start, end; page_table_t* pgt; if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0)) return 0; if (flags & MAP_KERNEL_SPACE) { start = (((size_t) &kernel_end) + 10*PAGE_SIZE) & PAGE_MASK; end = (KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK; } else { start = KERNEL_SPACE & PAGE_MASK; end = PAGE_MASK; } if (BUILTIN_EXPECT(!npages, 0)) return 0; if (flags & MAP_KERNEL_SPACE) spinlock_lock(&kslock); else spinlock_irqsave_lock(&task->pgd_lock); viraddr = i = start; j = 0; do { uint16_t idx_pd4 = (viraddr >> 39) & 0x1FF; uint16_t idx_dirp = (viraddr >> 30) & 0x1FF; uint16_t idx_dir = (viraddr >> 21) & 0x1FF; uint16_t idx_table = (viraddr >> 12) & 0x1FF; // Currently, we allocate pages only in kernel space. // => physical address of the page table is identical of the virtual address pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK); if (!pgt) { i += (size_t)PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE; j += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES; continue; } pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK); if (!pgt) { i += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE; j += PGT_ENTRIES*PGT_ENTRIES; continue; } pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK); if (!pgt) { i += PGT_ENTRIES*PAGE_SIZE; j += PGT_ENTRIES; continue; } if (!(pgt->entries[idx_table])) { i += PAGE_SIZE; j++; } else { // restart search j = 0; viraddr = i + PAGE_SIZE; i = i + PAGE_SIZE; } } while((j < npages) && (i<=end)); if ((j >= npages) && (viraddr < end)) ret = viraddr; if (flags & MAP_KERNEL_SPACE) spinlock_unlock(&kslock); else spinlock_irqsave_unlock(&task->pgd_lock); return ret; } int unmap_region(size_t viraddr, uint32_t npages) { task_t* task = per_core(current_task); page_table_t* pgt; size_t i; uint16_t idx_pd4, idx_dirp; uint16_t idx_dir, idx_table; if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0)) return -EINVAL; if (viraddr <= KERNEL_SPACE) spinlock_lock(&kslock); else spinlock_irqsave_lock(&task->pgd_lock); i = 0; while(i> 39) & 0x1FF; idx_dirp = (viraddr >> 30) & 0x1FF; idx_dir = (viraddr >> 21) & 0x1FF; idx_table = (viraddr >> 12) & 0x1FF; // Currently, we allocate pages only in kernel space. // => physical address of the page table is identical of the virtual address pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK); if (!pgt) { viraddr += (size_t) PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE; i += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES; continue; } pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK); if (!pgt) { viraddr += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE; i += PGT_ENTRIES*PGT_ENTRIES; continue; } pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK); if (!pgt) { viraddr += PGT_ENTRIES*PAGE_SIZE; i += PGT_ENTRIES; continue; } if (pgt->entries[idx_table]) pgt->entries[idx_table] &= ~PG_PRESENT; viraddr +=PAGE_SIZE; i++; if (viraddr > KERNEL_SPACE) atomic_int32_dec(&task->user_usage); tlb_flush_one_page(viraddr); } if (viraddr <= KERNEL_SPACE) spinlock_unlock(&kslock); else spinlock_irqsave_unlock(&task->pgd_lock); return 0; } int vm_free(size_t viraddr, uint32_t npages) { task_t* task = per_core(current_task); page_table_t* pgt; size_t i; uint16_t idx_pd4, idx_dirp; uint16_t idx_dir, idx_table; if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0)) return -EINVAL; if (viraddr <= KERNEL_SPACE) spinlock_lock(&kslock); else spinlock_irqsave_lock(&task->pgd_lock); i = 0; while(i> 39) & 0x1FF; idx_dirp = (viraddr >> 30) & 0x1FF; idx_dir = (viraddr >> 21) & 0x1FF; idx_table = (viraddr >> 12) & 0x1FF; // Currently, we allocate pages only in kernel space. // => physical address of the page table is identical of the virtual address pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK); if (!pgt) { viraddr += (size_t) PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE; i += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES; continue; } pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK); if (!pgt) { viraddr += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE; i += PGT_ENTRIES*PGT_ENTRIES; continue; } pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK); if (!pgt) { viraddr += PGT_ENTRIES*PAGE_SIZE; i += PGT_ENTRIES; continue; } if (pgt->entries[idx_table]) pgt->entries[idx_table] = 0; viraddr +=PAGE_SIZE; i++; tlb_flush_one_page(viraddr); } if (viraddr <= KERNEL_SPACE) spinlock_unlock(&kslock); else spinlock_irqsave_unlock(&task->pgd_lock); return 0; } static void pagefault_handler(struct state *s) { task_t* task = per_core(current_task); //page_dir_t* pgd = task->pgd; //page_table_t* pgt = NULL; size_t viraddr = read_cr2(); //size_t phyaddr; #if 0 if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) { viraddr = viraddr & PAGE_MASK; phyaddr = get_page(); if (BUILTIN_EXPECT(!phyaddr, 0)) goto default_handler; if (map_region(viraddr, phyaddr, 1, MAP_USER_SPACE) == viraddr) { memset((void*) viraddr, 0x00, PAGE_SIZE); return; } kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr); put_page(phyaddr); } #endif //default_handler: kprintf("PAGE FAULT: Task %u got page fault at %p (irq %llu, cs:rip 0x%llx:0x%llx)\n", task->id, viraddr, s->int_no, s->cs, s->rip); kprintf("Register state: rax = 0x%llx, rbx = 0x%llx, rcx = 0x%llx, rdx = 0x%llx, rdi = 0x%llx, rsi = 0x%llx, rbp = 0x%llx, rsp = 0x%llx\n", s->rax, s->rbx, s->rcx, s->rdx, s->rdi, s->rsi, s->rbp, s->rsp); while(1); irq_enable(); abort(); } int arch_paging_init(void) { uint32_t i, npages; // uninstall default handler and install our own irq_uninstall_handler(14); irq_install_handler(14, pagefault_handler); // kernel is already maped into the kernel space (see entry64.asm) // this includes .data, .bss, .text, video memory and the multiboot structure #if MAX_CORES > 1 // Reserve page for smp boot code if (!map_region(SMP_SETUP_ADDR, SMP_SETUP_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE)) { kputs("could not reserve page for smp boot code\n"); return -ENOMEM; } #endif #ifdef CONFIG_MULTIBOOT #if 0 /* * Map reserved memory regions into the kernel space */ if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) { multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr; multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length); while (mmap < mmap_end) { if (mmap->type != MULTIBOOT_MEMORY_AVAILABLE) { npages = mmap->len / PAGE_SIZE; if ((mmap->addr+mmap->len) % PAGE_SIZE) npages++; map_region(mmap->addr, mmap->addr, npages, MAP_KERNEL_SPACE|MAP_NO_CACHE); } mmap++; } } #endif /* * Modules like the init ram disk are already loaded. * Therefore, we map these moduels into the kernel space. */ if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) { multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr); npages = mb_info->mods_count * sizeof(multiboot_module_t) >> PAGE_SHIFT; if (mb_info->mods_count * sizeof(multiboot_module_t) & (PAGE_SIZE-1)) npages++; map_region((size_t) (mb_info->mods_addr), (size_t) (mb_info->mods_addr), npages, MAP_REMAP|MAP_KERNEL_SPACE); for(i=0; imods_count; i++, mmodule++) { // map physical address to the same virtual address npages = (mmodule->mod_end - mmodule->mod_start) >> PAGE_SHIFT; if (mmodule->mod_end & (PAGE_SIZE-1)) npages++; kprintf("Map module %s at 0x%x (%u pages)\n", (char*) mmodule->cmdline, mmodule->mod_start, npages); map_region((size_t) (mmodule->mod_start), (size_t) (mmodule->mod_start), npages, MAP_REMAP|MAP_KERNEL_SPACE); } } #endif /* signalize that we are able to use paging */ paging_enabled = 1; /* * we turned on paging * => now, we are able to register our task */ register_task(); // APIC registers into the kernel address space map_apic(); return 0; }