metalsvm/arch/x86/mm/page64.c
2012-09-10 15:37:45 +02:00

650 lines
16 KiB
C

/*
* Copyright 2012 Stefan Lankes, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#include <metalsvm/stddef.h>
#include <metalsvm/stdio.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/mmu.h>
#include <metalsvm/vma.h>
#include <metalsvm/string.h>
#include <metalsvm/page.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/processor.h>
#include <metalsvm/tasks.h>
#include <metalsvm/errno.h>
#include <asm/irq.h>
#include <asm/multiboot.h>
#include <asm/apic.h>
#ifdef CONFIG_ROCKCREEK
#include <asm/RCCE_lib.h>
#include <asm/SCC_API.h>
#include <asm/svm.h>
#include <asm/icc.h>
#endif
/*
* Virtual Memory Layout of the standard configuration
* (1 GB kernel space)
*
* 0x00000000 - 0x000FFFFF: reserved for IO devices (16MB)
* 0x00100000 - 0x0DEADFFF: Kernel (size depends on the configuration) (221MB)
* 0x0DEAE000 - 0x3FFFFFFF: Kernel heap
*
*/
/*
* Note that linker symbols are not variables, they have no memory allocated for
* maintaining a value, rather their address is their value.
*/
extern const void kernel_start;
extern const void kernel_end;
// boot task's page directory and page directory lock
extern page_dir_t boot_pgd;
static spinlock_t kslock = SPINLOCK_INIT;
static int paging_enabled = 0;
page_dir_t* get_boot_pgd(void)
{
return &boot_pgd;
}
int create_pgd(task_t* task, int copy)
{
// Currently, we support only kernel tasks
// => all tasks are able to use the same pgd
if (BUILTIN_EXPECT(!paging_enabled, 0))
return -EINVAL;
task->pgd = get_boot_pgd();
return 0;
}
/*
* drops all page frames and the PGD of a user task
*/
int drop_pgd(void)
{
#if 0
page_dir_t* pgd = per_core(current_task)->pgd;
size_t phy_pgd = virt_to_phys((size_t) pgd);
task_t* task = per_core(current_task);
uint32_t i;
if (BUILTIN_EXPECT(pgd == &boot_pgd, 0))
return -EINVAL;
spinlock_lock(&task->pgd_lock);
for(i=0; i<1024; i++) {
if (pgd->entries[i] & PG_USER) {
put_page(pgd->entries[i] & PAGE_MASK);
pgd->entries[i] = 0;
}
}
// freeing the page directory
put_page(phy_pgd);
task->pgd = NULL;
spinlock_unlock(&task->pgd_lock);
#endif
return 0;
}
size_t virt_to_phys(size_t viraddr)
{
task_t* task = per_core(current_task);
uint16_t idx_pd4 = (viraddr >> 39) & 0x1FF;
uint16_t idx_dirp = (viraddr >> 30) & 0x1FF;
uint16_t idx_dir = (viraddr >> 21) & 0x1FF;
uint16_t idx_table = (viraddr >> 12) & 0x1FF;
page_table_t* pgt;
size_t ret = 0;
if (!paging_enabled)
return viraddr;
if (BUILTIN_EXPECT(!task || !task->pgd, 0))
return 0;
spinlock_irqsave_lock(&task->pgd_lock);
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt)
goto out;
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt)
goto out;
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt)
goto out;
ret = (size_t) (pgt->entries[idx_table] & PAGE_MASK);
if (!ret)
goto out;
ret = ret | (viraddr & 0xFFF); // add page offset
out:
//kprintf("vir %p to phy %p\n", viraddr, ret);
spinlock_irqsave_unlock(&task->pgd_lock);
return ret;
}
size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags)
{
task_t* task = per_core(current_task);
page_table_t* pgt;
size_t i, ret;
if (BUILTIN_EXPECT(!task || !task->pgd, 0))
return 0;
if (BUILTIN_EXPECT(!paging_enabled && (viraddr != phyaddr), 0))
return 0;
if (flags & MAP_KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
if (!viraddr) {
viraddr = vm_alloc(npages, flags);
if (BUILTIN_EXPECT(!viraddr, 0)) {
kputs("map_region: found no valid virtual address\n");
ret = 0;
goto out;
}
}
ret = viraddr;
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE, phyaddr+=PAGE_SIZE) {
uint16_t idx_pd4 = (viraddr >> 39) & 0x1FF;
uint16_t idx_dirp = (viraddr >> 30) & 0x1FF;
uint16_t idx_dir = (viraddr >> 21) & 0x1FF;
uint16_t idx_table = (viraddr >> 12) & 0x1FF;
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt) {
kputs("map_region: out of memory\n");
ret = 0;
goto out;
}
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
kputs("map_region: out of memory\n");
ret = 0;
goto out;
}
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt) {
kputs("map_region: out of memory\n");
ret = 0;
goto out;
}
/* convert physical address to virtual */
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
//if (paging_enabled)
// pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK);
if (pgt->entries[idx_table] && !(flags & MAP_REMAP)) {
kprintf("0x%x is already mapped\n", viraddr);
ret = 0;
goto out;
}
if (flags & MAP_USER_SPACE)
pgt->entries[idx_table] = USER_PAGE|(phyaddr & PAGE_MASK);
else
pgt->entries[idx_table] = KERN_PAGE|(phyaddr & PAGE_MASK);
if (flags & MAP_NO_CACHE)
pgt->entries[idx_table] |= PG_PCD;
if (flags & MAP_NO_ACCESS)
pgt->entries[idx_table] &= ~PG_PRESENT;
if (flags & MAP_WT)
pgt->entries[idx_table] |= PG_PWT;
if (flags & MAP_USER_SPACE)
atomic_int32_inc(&task->user_usage);
tlb_flush_one_page(viraddr);
}
out:
if (flags & MAP_KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return ret;
}
int change_page_permissions(size_t start, size_t end, uint32_t flags)
{
#if 0
uint32_t index1, index2, newflags;
size_t viraddr = start & PAGE_MASK;
size_t phyaddr;
page_table_t* pgt;
page_dir_t* pgd;
task_t* task = per_core(current_task);
if (BUILTIN_EXPECT(!paging_enabled, 0))
return -EINVAL;
pgd = per_core(current_task)->pgd;
if (BUILTIN_EXPECT(!pgd, 0))
return -EINVAL;
spinlock_lock(&task->pgd_lock);
while (viraddr < end)
{
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
while ((viraddr < end) && (index2 < 1024)) {
pgt = (page_table_t*) (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (pgt && pgt->entries[index2]) {
phyaddr = pgt->entries[index2] & PAGE_MASK;
newflags = pgt->entries[index2] & 0xFFF; // get old flags
if (!(newflags & PG_SVM_INIT)) {
if ((newflags & PG_SVM_STRONG) && !(newflags & PG_PRESENT) && (flags & (VMA_READ|VMA_WRITE) && !(flags & VMA_NOACCESS)))
newflags |= PG_PRESENT;
else if ((newflags & PG_SVM_STRONG) && (newflags & PG_PRESENT) && (flags & VMA_NOACCESS))
newflags &= ~PG_PRESENT;
}
// update flags
if (!(flags & VMA_WRITE)) {
newflags &= ~PG_RW;
#ifdef CONFIG_ROCKCREEK
if (newflags & (PG_SVM_STRONG|PG_SVM_LAZYRELEASE))
newflags &= ~PG_MPE;
#endif
} else {
newflags |= PG_RW;
#ifdef CONFIG_ROCKCREEK
if (newflags & (PG_SVM_STRONG|PG_SVM_LAZYRELEASE))
newflags |= PG_MPE;
#endif
}
pgt->entries[index2] = (newflags & 0xFFF) | (phyaddr & PAGE_MASK);
tlb_flush_one_page(viraddr);
}
index2++;
viraddr += PAGE_SIZE;
}
}
spinlock_unlock(&task->pgd_lock);
#endif
return -EINVAL;
}
/*
* Use the first fit algorithm to find a valid address range
*
* TODO: O(n) => bad performance, we need a better approach
*/
size_t vm_alloc(uint32_t npages, uint32_t flags)
{
task_t* task = per_core(current_task);
size_t viraddr, i, j, ret = 0;
size_t start, end;
page_table_t* pgt;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return 0;
if (flags & MAP_KERNEL_SPACE) {
start = (((size_t) &kernel_end) + 10*PAGE_SIZE) & PAGE_MASK;
end = (KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK;
} else {
start = KERNEL_SPACE & PAGE_MASK;
end = PAGE_MASK;
}
if (BUILTIN_EXPECT(!npages, 0))
return 0;
if (flags & MAP_KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
viraddr = i = start;
j = 0;
do {
uint16_t idx_pd4 = (viraddr >> 39) & 0x1FF;
uint16_t idx_dirp = (viraddr >> 30) & 0x1FF;
uint16_t idx_dir = (viraddr >> 21) & 0x1FF;
uint16_t idx_table = (viraddr >> 12) & 0x1FF;
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt) {
i += (size_t)PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
j += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
i += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
j += PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt) {
i += PGT_ENTRIES*PAGE_SIZE;
j += PGT_ENTRIES;
continue;
}
if (!(pgt->entries[idx_table])) {
i += PAGE_SIZE;
j++;
} else {
// restart search
j = 0;
viraddr = i + PAGE_SIZE;
i = i + PAGE_SIZE;
}
} while((j < npages) && (i<=end));
if ((j >= npages) && (viraddr < end))
ret = viraddr;
if (flags & MAP_KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return ret;
}
int unmap_region(size_t viraddr, uint32_t npages)
{
task_t* task = per_core(current_task);
page_table_t* pgt;
size_t i;
uint16_t idx_pd4, idx_dirp;
uint16_t idx_dir, idx_table;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return -EINVAL;
if (viraddr <= KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
i = 0;
while(i<npages)
{
idx_pd4 = (viraddr >> 39) & 0x1FF;
idx_dirp = (viraddr >> 30) & 0x1FF;
idx_dir = (viraddr >> 21) & 0x1FF;
idx_table = (viraddr >> 12) & 0x1FF;
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt) {
viraddr += (size_t) PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES;
continue;
}
if (pgt->entries[idx_table])
pgt->entries[idx_table] &= ~PG_PRESENT;
viraddr +=PAGE_SIZE;
i++;
if (viraddr > KERNEL_SPACE)
atomic_int32_dec(&task->user_usage);
tlb_flush_one_page(viraddr);
}
if (viraddr <= KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
int vm_free(size_t viraddr, uint32_t npages)
{
task_t* task = per_core(current_task);
page_table_t* pgt;
size_t i;
uint16_t idx_pd4, idx_dirp;
uint16_t idx_dir, idx_table;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return -EINVAL;
if (viraddr <= KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
i = 0;
while(i<npages)
{
idx_pd4 = (viraddr >> 39) & 0x1FF;
idx_dirp = (viraddr >> 30) & 0x1FF;
idx_dir = (viraddr >> 21) & 0x1FF;
idx_table = (viraddr >> 12) & 0x1FF;
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt) {
viraddr += (size_t) PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES;
continue;
}
if (pgt->entries[idx_table])
pgt->entries[idx_table] = 0;
viraddr +=PAGE_SIZE;
i++;
tlb_flush_one_page(viraddr);
}
if (viraddr <= KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
static void pagefault_handler(struct state *s)
{
task_t* task = per_core(current_task);
//page_dir_t* pgd = task->pgd;
//page_table_t* pgt = NULL;
size_t viraddr = read_cr2();
//size_t phyaddr;
#if 0
if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) {
viraddr = viraddr & PAGE_MASK;
phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
goto default_handler;
if (map_region(viraddr, phyaddr, 1, MAP_USER_SPACE) == viraddr) {
memset((void*) viraddr, 0x00, PAGE_SIZE);
return;
}
kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr);
put_page(phyaddr);
}
#endif
//default_handler:
kprintf("PAGE FAULT: Task %u got page fault at %p (irq %llu, cs:rip 0x%llx:0x%llx)\n", task->id, viraddr, s->int_no, s->cs, s->rip);
kprintf("Register state: rax = 0x%llx, rbx = 0x%llx, rcx = 0x%llx, rdx = 0x%llx, rdi = 0x%llx, rsi = 0x%llx, rbp = 0x%llx, rsp = 0x%llx\n",
s->rax, s->rbx, s->rcx, s->rdx, s->rdi, s->rsi, s->rbp, s->rsp);
while(1);
irq_enable();
abort();
}
int arch_paging_init(void)
{
uint32_t i, npages;
// uninstall default handler and install our own
irq_uninstall_handler(14);
irq_install_handler(14, pagefault_handler);
// kernel is already maped into the kernel space (see entry64.asm)
// this includes .data, .bss, .text, video memory and the multiboot structure
#if MAX_CORES > 1
// Reserve page for smp boot code
if (!map_region(SMP_SETUP_ADDR, SMP_SETUP_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE)) {
kputs("could not reserve page for smp boot code\n");
return -ENOMEM;
}
#endif
#ifdef CONFIG_MULTIBOOT
#if 0
/*
* Map reserved memory regions into the kernel space
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) {
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr;
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
while (mmap < mmap_end) {
if (mmap->type != MULTIBOOT_MEMORY_AVAILABLE) {
npages = mmap->len / PAGE_SIZE;
if ((mmap->addr+mmap->len) % PAGE_SIZE)
npages++;
map_region(mmap->addr, mmap->addr, npages, MAP_KERNEL_SPACE|MAP_NO_CACHE);
}
mmap++;
}
}
#endif
/*
* Modules like the init ram disk are already loaded.
* Therefore, we map these moduels into the kernel space.
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
npages = mb_info->mods_count * sizeof(multiboot_module_t) >> PAGE_SHIFT;
if (mb_info->mods_count * sizeof(multiboot_module_t) & (PAGE_SIZE-1))
npages++;
map_region((size_t) (mb_info->mods_addr), (size_t) (mb_info->mods_addr), npages, MAP_REMAP|MAP_KERNEL_SPACE);
for(i=0; i<mb_info->mods_count; i++, mmodule++) {
// map physical address to the same virtual address
npages = (mmodule->mod_end - mmodule->mod_start) >> PAGE_SHIFT;
if (mmodule->mod_end & (PAGE_SIZE-1))
npages++;
kprintf("Map module %s at 0x%x (%u pages)\n", (char*) mmodule->cmdline, mmodule->mod_start, npages);
map_region((size_t) (mmodule->mod_start), (size_t) (mmodule->mod_start), npages, MAP_REMAP|MAP_KERNEL_SPACE);
}
}
#endif
/* signalize that we are able to use paging */
paging_enabled = 1;
/*
* we turned on paging
* => now, we are able to register our task
*/
register_task();
// APIC registers into the kernel address space
map_apic();
return 0;
}