metalsvm/arch/x86/mm/page64.c

646 lines
16 KiB
C
Raw Permalink Normal View History

/*
* Copyright 2012 Stefan Lankes, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#include <metalsvm/stddef.h>
#include <metalsvm/stdio.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/mmu.h>
#include <metalsvm/vma.h>
#include <metalsvm/string.h>
#include <metalsvm/page.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/processor.h>
#include <metalsvm/tasks.h>
#include <metalsvm/errno.h>
#include <asm/irq.h>
#include <asm/multiboot.h>
#include <asm/apic.h>
/*
* Virtual Memory Layout of the standard configuration
* (1 GB kernel space)
*
* 0x000000000000 - 0x0000000FFFFF: reserved for IO devices (16MB)
* 0x000000100000 - 0x00000DEADFFF: Kernel (size depends on the configuration) (221MB)
* 0x00000DEAE000 - 0x00003FFFFFFF: Kernel heap
* 0xFF8000000000 - 0xFFFFFFFFFFFF: Paging structures are mapped in this region (1GB)
*/
/*
* Note that linker symbols are not variables, they have no memory allocated for
* maintaining a value, rather their address is their value.
*/
extern const void kernel_start;
extern const void kernel_end;
// boot task's page directory and page directory lock
extern page_map_t boot_pml4;
static spinlock_t kslock = SPINLOCK_INIT;
static int paging_enabled = 0;
/*static page_map_t boot_pml4 = {{[0 ... MAP_ENTRIES-1] = 0}};
static page_map_t boot_pdpt = {{[0 ... MAP_ENTRIES-1] = 0}};
static page_map_t boot_pgd = {{[0 ... MAP_ENTRIES-1] = 0}};
static page_map_t boot_pgt = {{[0 ... MAP_ENTRIES-1] = 0}};*/
page_map_t* get_boot_page_map(void)
{
return &boot_pml4;
}
int create_page_map(task_t* task, int copy)
{
// TODO: Currently, we support only kernel tasks
// => all tasks are able to use the same pgd
2011-02-18 21:20:15 +01:00
if (BUILTIN_EXPECT(!paging_enabled, 0))
return -EINVAL;
task->page_map = get_boot_page_map();
return 0;
}
int drop_page_map(void)
{
#if 0
page_map_t* pgd = per_core(current_task)->page_map;
size_t phy_pgd = virt_to_phys((size_t) pgd);
task_t* task = per_core(current_task);
uint32_t i;
if (BUILTIN_EXPECT(pgd == &boot_pgd, 0))
return -EINVAL;
spinlock_lock(&task->page_lock);
for(i=0; i<1024; i++) {
if (pgd->entries[i] & PG_USER) {
put_page(pgd->entries[i] & PAGE_MASK);
pgd->entries[i] = 0;
}
}
// freeing the page directory
put_page(phy_pgd);
task->page_map = NULL;
spinlock_unlock(&task->page_lock);
#endif
2011-02-18 21:20:15 +01:00
return 0;
}
2011-02-24 10:15:58 +01:00
size_t virt_to_phys(size_t viraddr)
{
2011-02-24 10:15:58 +01:00
task_t* task = per_core(current_task);
page_map_t* pdpt, * pgd , * pgt;
uint16_t index_pml4 = (viraddr >> 39) & 0x1FF;
uint16_t index_pdpt = (viraddr >> 30) & 0x1FF;
uint16_t index_pgd = (viraddr >> 21) & 0x1FF;
uint16_t index_pgt = (viraddr >> 12) & 0x1FF;
size_t ret = 0;
2011-02-18 21:20:15 +01:00
if (!paging_enabled)
return viraddr;
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return 0;
spinlock_irqsave_lock(&task->page_lock);
// TODO: Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pdpt = (page_map_t*) (task->page_map->entries[index_pml4] & PAGE_MASK);
if (!pdpt)
goto out;
pgd = (page_map_t*) (pdpt->entries[index_pdpt] & PAGE_MASK);
if (!pgd)
goto out;
pgt = (page_map_t*) (pgd->entries[index_pgd] & PAGE_MASK);
if (!pgt)
goto out;
ret = (size_t) (pgt->entries[index_pgt] & PAGE_MASK);
if (!ret)
goto out;
ret = ret | (viraddr & 0xFFF); // add page offset
out:
//kprintf("vir %p to phy %p\n", viraddr, ret);
spinlock_irqsave_unlock(&task->page_lock);
return ret;
}
2011-02-24 10:15:58 +01:00
size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags)
{
2011-02-24 10:15:58 +01:00
task_t* task = per_core(current_task);
page_map_t* pdpt, * pgd, * pgt;
uint16_t index_pml4, index_pdpt;
uint16_t index_pgd, index_pgt;
size_t i, ret;
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return 0;
if (BUILTIN_EXPECT(!paging_enabled && (viraddr != phyaddr), 0))
return 0;
if (flags & MAP_KERNEL_SPACE)
2012-09-10 15:37:45 +02:00
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->page_lock);
if (!viraddr) {
2011-02-24 10:15:58 +01:00
viraddr = vm_alloc(npages, flags);
if (BUILTIN_EXPECT(!viraddr, 0)) {
kputs("map_region: found no valid virtual address\n");
2012-09-10 15:37:45 +02:00
ret = 0;
goto out;
}
}
ret = viraddr;
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE, phyaddr+=PAGE_SIZE) {
index_pml4 = (viraddr >> 39) & 0x1FF;
index_pdpt = (viraddr >> 30) & 0x1FF;
index_pgd = (viraddr >> 21) & 0x1FF;
index_pgt = (viraddr >> 12) & 0x1FF;
pdpt = (page_map_t*) (task->page_map->entries[index_pml4] & PAGE_MASK);
if (!pgt) {
kputs("map_region: out of memory\n");
2012-09-10 15:37:45 +02:00
ret = 0;
goto out;
}
pgd = (page_map_t*) (pdpt->entries[index_pdpt] & PAGE_MASK);
if (!pgd) {
kputs("map_region: out of memory\n");
2012-09-10 15:37:45 +02:00
ret = 0;
goto out;
}
pgt = (page_map_t*) (pgd->entries[index_pgd] & PAGE_MASK);
if (!pgt) {
kputs("map_region: out of memory\n");
2012-09-10 15:37:45 +02:00
ret = 0;
goto out;
}
/* convert physical address to virtual */
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
//if (paging_enabled)
// pgt = (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK);
if (pgt->entries[index_pgt] && !(flags & MAP_REMAP)) {
2012-04-02 14:55:29 +02:00
kprintf("0x%x is already mapped\n", viraddr);
2012-09-10 15:37:45 +02:00
ret = 0;
goto out;
}
if (flags & MAP_USER_SPACE)
pgt->entries[index_pgt] = USER_PAGE|(phyaddr & PAGE_MASK);
else
pgt->entries[index_pgt] = KERN_PAGE|(phyaddr & PAGE_MASK);
if (flags & MAP_NO_CACHE)
pgt->entries[index_pgt] |= PG_PCD;
if (flags & MAP_NO_ACCESS)
pgt->entries[index_pgt] &= ~PG_PRESENT;
if (flags & MAP_WT)
pgt->entries[index_pgt] |= PG_PWT;
if (flags & MAP_USER_SPACE)
atomic_int32_inc(&task->user_usage);
2013-10-21 08:52:11 +02:00
if (flags & MAP_NO_EXECUTION)
pgt->entries[index_pgt] |= PG_XD;
tlb_flush_one_page(viraddr);
}
2012-09-10 15:37:45 +02:00
out:
if (flags & MAP_KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->page_lock);
return ret;
}
int change_page_permissions(size_t start, size_t end, uint32_t flags)
{
#if 0
uint32_t index1, index2, newflags;
size_t viraddr = start & PAGE_MASK;
size_t phyaddr;
page_map_t* pgt;
page_map_t* pgd;
task_t* task = per_core(current_task);
if (BUILTIN_EXPECT(!paging_enabled, 0))
return -EINVAL;
pgd = per_core(current_task)->page_map;
if (BUILTIN_EXPECT(!pgd, 0))
return -EINVAL;
spinlock_lock(&task->page_lock);
while (viraddr < end)
{
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
while ((viraddr < end) && (index2 < 1024)) {
pgt = (page_map_t*) (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (pgt && pgt->entries[index2]) {
phyaddr = pgt->entries[index2] & PAGE_MASK;
newflags = pgt->entries[index2] & 0xFFF; // get old flags
if (!(newflags & PG_SVM_INIT)) {
if ((newflags & PG_SVM_STRONG) && !(newflags & PG_PRESENT) && (flags & (VMA_READ|VMA_WRITE) && !(flags & VMA_NOACCESS)))
newflags |= PG_PRESENT;
else if ((newflags & PG_SVM_STRONG) && (newflags & PG_PRESENT) && (flags & VMA_NOACCESS))
newflags &= ~PG_PRESENT;
}
// update flags
if (!(flags & VMA_WRITE)) {
newflags &= ~PG_RW;
} else {
newflags |= PG_RW;
}
pgt->entries[index2] = (newflags & 0xFFF) | (phyaddr & PAGE_MASK);
tlb_flush_one_page(viraddr);
}
index2++;
viraddr += PAGE_SIZE;
}
}
spinlock_unlock(&task->page_lock);
#endif
return -EINVAL;
}
/*
* Use the first fit algorithm to find a valid address range
*
* TODO: O(n) => bad performance, we need a better approach
*/
2011-02-24 10:15:58 +01:00
size_t vm_alloc(uint32_t npages, uint32_t flags)
{
2011-02-24 10:15:58 +01:00
task_t* task = per_core(current_task);
size_t viraddr, i, j, ret = 0;
size_t start, end;
page_map_t* pdpt, * pgd, * pgt;
uint16_t index_pml4, index_pdpt;
uint16_t index_pgd, index_pgt;
if (BUILTIN_EXPECT(!task || !task->page_map || !paging_enabled, 0))
return 0;
if (flags & MAP_KERNEL_SPACE) {
2012-06-12 09:24:38 +02:00
start = (((size_t) &kernel_end) + 10*PAGE_SIZE) & PAGE_MASK;
end = (KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK;
} else {
start = KERNEL_SPACE & PAGE_MASK;
end = PAGE_MASK;
}
if (BUILTIN_EXPECT(!npages, 0))
return 0;
2012-09-10 15:37:45 +02:00
if (flags & MAP_KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->page_lock);
viraddr = i = start;
j = 0;
do {
index_pml4 = (viraddr >> 39) & 0x1FF;
index_pdpt = (viraddr >> 30) & 0x1FF;
index_pgd = (viraddr >> 21) & 0x1FF;
index_pgt = (viraddr >> 12) & 0x1FF;
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pdpt = (page_map_t*) (task->page_map->entries[index_pml4] & PAGE_MASK);
if (!pdpt) {
i += (size_t)MAP_ENTRIES*MAP_ENTRIES*MAP_ENTRIES*PAGE_SIZE;
j += MAP_ENTRIES*MAP_ENTRIES*MAP_ENTRIES;
continue;
}
pgd = (page_map_t*) (pdpt->entries[index_pdpt] & PAGE_MASK);
if (!pgd) {
i += MAP_ENTRIES*MAP_ENTRIES*PAGE_SIZE;
j += MAP_ENTRIES*MAP_ENTRIES;
continue;
}
pgt = (page_map_t*) (pgd->entries[index_pgd] & PAGE_MASK);
if (!pgt) {
i += MAP_ENTRIES*PAGE_SIZE;
j += MAP_ENTRIES;
continue;
}
if (!(pgt->entries[index_pgt])) {
2012-06-12 09:24:38 +02:00
i += PAGE_SIZE;
j++;
} else {
// restart search
j = 0;
viraddr = i + PAGE_SIZE;
i = i + PAGE_SIZE;
}
} while((j < npages) && (i<=end));
if ((j >= npages) && (viraddr < end))
ret = viraddr;
2012-09-10 15:37:45 +02:00
if (flags & MAP_KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->page_lock);
return ret;
}
int unmap_region(size_t viraddr, uint32_t npages)
{
task_t* task = per_core(current_task);
page_map_t* pdpt, * pgd, * pgt;
size_t i;
uint16_t index_pml4, index_pdpt;
uint16_t index_pgd, index_pgt;
if (BUILTIN_EXPECT(!task || !task->page_map || !paging_enabled, 0))
return -EINVAL;
if (viraddr <= KERNEL_SPACE)
2012-09-10 15:37:45 +02:00
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->page_lock);
i = 0;
while(i<npages)
{
index_pml4 = (viraddr >> 39) & 0x1FF;
index_pdpt = (viraddr >> 30) & 0x1FF;
index_pgd = (viraddr >> 21) & 0x1FF;
index_pgt = (viraddr >> 12) & 0x1FF;
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pdpt = (page_map_t*) (task->page_map->entries[index_pml4] & PAGE_MASK);
if (!pdpt) {
viraddr += (size_t) MAP_ENTRIES*MAP_ENTRIES*MAP_ENTRIES*PAGE_SIZE;
i += MAP_ENTRIES*MAP_ENTRIES*MAP_ENTRIES;
continue;
}
pgd = (page_map_t*) (pdpt->entries[index_pdpt] & PAGE_MASK);
if (!pgd) {
viraddr += MAP_ENTRIES*MAP_ENTRIES*PAGE_SIZE;
i += MAP_ENTRIES*MAP_ENTRIES;
continue;
}
pgt = (page_map_t*) (pgd->entries[index_pgd] & PAGE_MASK);
if (!pgt) {
viraddr += MAP_ENTRIES*PAGE_SIZE;
i += MAP_ENTRIES;
continue;
}
if (pgt->entries[index_pgt])
pgt->entries[index_pgt] &= ~PG_PRESENT;
viraddr +=PAGE_SIZE;
i++;
if (viraddr > KERNEL_SPACE)
atomic_int32_dec(&task->user_usage);
tlb_flush_one_page(viraddr);
}
2012-09-10 15:37:45 +02:00
if (viraddr <= KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->page_lock);
return 0;
}
2011-02-24 10:15:58 +01:00
int vm_free(size_t viraddr, uint32_t npages)
{
2011-02-24 10:15:58 +01:00
task_t* task = per_core(current_task);
page_map_t* pdpt, * pgd, * pgt;
size_t i;
uint16_t index_pml4, index_pdpt;
uint16_t index_pgd, index_pgt;
if (BUILTIN_EXPECT(!task || !task->page_map || !paging_enabled, 0))
return -EINVAL;
if (viraddr <= KERNEL_SPACE)
2012-09-10 15:37:45 +02:00
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->page_lock);
i = 0;
while(i<npages)
{
index_pml4 = (viraddr >> 39) & 0x1FF;
index_pdpt = (viraddr >> 30) & 0x1FF;
index_pgd = (viraddr >> 21) & 0x1FF;
index_pgt = (viraddr >> 12) & 0x1FF;
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pdpt = (page_map_t*) (task->page_map->entries[index_pml4] & PAGE_MASK);
if (!pdpt) {
viraddr += (size_t) MAP_ENTRIES*MAP_ENTRIES*MAP_ENTRIES*PAGE_SIZE;
i += MAP_ENTRIES*MAP_ENTRIES*MAP_ENTRIES;
continue;
}
pgd = (page_map_t*) (pdpt->entries[index_pdpt] & PAGE_MASK);
if (!pgd) {
viraddr += MAP_ENTRIES*MAP_ENTRIES*PAGE_SIZE;
i += MAP_ENTRIES*MAP_ENTRIES;
continue;
}
pgt = (page_map_t*) (pgd->entries[index_pgd] & PAGE_MASK);
if (!pgt) {
viraddr += MAP_ENTRIES*PAGE_SIZE;
i += MAP_ENTRIES;
continue;
}
if (pgt->entries[index_pgt])
pgt->entries[index_pgt] = 0;
viraddr +=PAGE_SIZE;
i++;
tlb_flush_one_page(viraddr);
}
2012-09-10 15:37:45 +02:00
if (viraddr <= KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->page_lock);
return 0;
}
static void pagefault_handler(struct state *s)
{
task_t* task = per_core(current_task);
//page_map_t* pgd = task->page_map;
//page_map_t* pgt = NULL;
size_t viraddr = read_cr2();
//size_t phyaddr;
#if 0
if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) {
viraddr = viraddr & PAGE_MASK;
phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
goto default_handler;
2011-04-22 09:31:33 +02:00
if (map_region(viraddr, phyaddr, 1, MAP_USER_SPACE) == viraddr) {
memset((void*) viraddr, 0x00, PAGE_SIZE);
return;
}
kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr);
put_page(phyaddr);
}
2011-08-24 09:39:17 +02:00
#endif
2012-07-22 20:10:16 +02:00
//default_handler:
kprintf("PAGE FAULT: Task %u got page fault at %p (irq %llu, cs:rip 0x%llx:0x%llx)\n", task->id, viraddr, s->int_no, s->cs, s->rip);
kprintf("Register state: rax = 0x%llx, rbx = 0x%llx, rcx = 0x%llx, rdx = 0x%llx, rdi = 0x%llx, rsi = 0x%llx, rbp = 0x%llx, rsp = 0x%llx\n",
s->rax, s->rbx, s->rcx, s->rdx, s->rdi, s->rsi, s->rbp, s->rsp);
while(1);
2011-08-02 06:19:26 -07:00
irq_enable();
abort();
}
int arch_paging_init(void)
{
uint32_t i, npages;
// uninstall default handler and install our own
irq_uninstall_handler(14);
irq_install_handler(14, pagefault_handler);
// kernel is already maped into the kernel space (see entry64.asm)
// this includes .data, .bss, .text, video memory and the multiboot structure
#if MAX_CORES > 1
// Reserve page for smp boot code
if (!map_region(SMP_SETUP_ADDR, SMP_SETUP_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE)) {
kputs("could not reserve page for smp boot code\n");
return -ENOMEM;
}
#endif
#ifdef CONFIG_MULTIBOOT
2011-04-07 20:36:43 +02:00
#if 0
/*
* Map reserved memory regions into the kernel space
*/
2011-02-16 22:35:46 +01:00
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) {
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr;
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
while (mmap < mmap_end) {
if (mmap->type != MULTIBOOT_MEMORY_AVAILABLE) {
npages = mmap->len / PAGE_SIZE;
if ((mmap->addr+mmap->len) % PAGE_SIZE)
npages++;
2011-02-24 10:15:58 +01:00
map_region(mmap->addr, mmap->addr, npages, MAP_KERNEL_SPACE|MAP_NO_CACHE);
}
mmap++;
}
}
2011-04-07 20:36:43 +02:00
#endif
/*
* Modules like the init ram disk are already loaded.
* Therefore, we map these moduels into the kernel space.
*/
2011-02-16 22:35:46 +01:00
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
npages = mb_info->mods_count * sizeof(multiboot_module_t) >> PAGE_SHIFT;
if (mb_info->mods_count * sizeof(multiboot_module_t) & (PAGE_SIZE-1))
npages++;
2012-06-12 09:24:38 +02:00
map_region((size_t) (mb_info->mods_addr), (size_t) (mb_info->mods_addr), npages, MAP_REMAP|MAP_KERNEL_SPACE);
for(i=0; i<mb_info->mods_count; i++, mmodule++) {
// map physical address to the same virtual address
npages = (mmodule->mod_end - mmodule->mod_start) >> PAGE_SHIFT;
if (mmodule->mod_end & (PAGE_SIZE-1))
npages++;
2012-06-10 12:10:54 +02:00
kprintf("Map module %s at 0x%x (%u pages)\n", (char*) mmodule->cmdline, mmodule->mod_start, npages);
2012-06-12 09:24:38 +02:00
map_region((size_t) (mmodule->mod_start), (size_t) (mmodule->mod_start), npages, MAP_REMAP|MAP_KERNEL_SPACE);
}
}
#endif
/* signalize that we are able to use paging */
paging_enabled = 1;
/*
* we turned on paging
2012-06-10 23:40:22 +02:00
* => now, we are able to register our task
*/
2012-06-10 23:40:22 +02:00
register_task();
// APIC registers into the kernel address space
map_apic();
return 0;
}