2010-12-10 06:16:58 +00:00
|
|
|
/*
|
2012-05-29 20:47:45 +02:00
|
|
|
* Copyright 2012 Stefan Lankes, Chair for Operating Systems,
|
2010-12-10 06:16:58 +00:00
|
|
|
* RWTH Aachen University
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*
|
|
|
|
* This file is part of MetalSVM.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <metalsvm/stddef.h>
|
|
|
|
#include <metalsvm/stdio.h>
|
|
|
|
#include <metalsvm/stdlib.h>
|
2014-01-09 16:20:18 +01:00
|
|
|
#include <metalsvm/memory.h>
|
2011-02-24 09:36:05 +01:00
|
|
|
#include <metalsvm/vma.h>
|
2010-12-10 06:16:58 +00:00
|
|
|
#include <metalsvm/string.h>
|
|
|
|
#include <metalsvm/page.h>
|
|
|
|
#include <metalsvm/spinlock.h>
|
|
|
|
#include <metalsvm/processor.h>
|
|
|
|
#include <metalsvm/tasks.h>
|
|
|
|
#include <metalsvm/errno.h>
|
|
|
|
#include <asm/irq.h>
|
|
|
|
#include <asm/multiboot.h>
|
2011-07-18 15:51:26 +02:00
|
|
|
#include <asm/apic.h>
|
2010-12-10 06:16:58 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Virtual Memory Layout of the standard configuration
|
|
|
|
* (1 GB kernel space)
|
|
|
|
*
|
2013-10-11 16:21:53 +02:00
|
|
|
* 0x000000000000 - 0x0000000FFFFF: reserved for IO devices (16MB)
|
|
|
|
* 0x000000100000 - 0x00000DEADFFF: Kernel (size depends on the configuration) (221MB)
|
|
|
|
* 0x00000DEAE000 - 0x00003FFFFFFF: Kernel heap
|
2014-01-09 16:52:03 +01:00
|
|
|
* 0xFF0000000000 - 0xFF7FFFFFFFFF: Paging structures for copying a page map (max 512GB)
|
2013-11-14 12:22:52 +01:00
|
|
|
* 0xFF8000000000 - 0xFFFFFFFFFFFF: Paging structures are mapped in this region (max 512GB)
|
2010-12-10 06:16:58 +00:00
|
|
|
*/
|
|
|
|
|
2013-12-03 15:26:21 +01:00
|
|
|
/// Boot task's page map
|
2013-10-11 16:21:53 +02:00
|
|
|
extern page_map_t boot_pml4;
|
2013-12-03 15:26:21 +01:00
|
|
|
/// Kernel space page map lock
|
2011-03-04 13:47:06 +01:00
|
|
|
static spinlock_t kslock = SPINLOCK_INIT;
|
2013-10-11 16:21:53 +02:00
|
|
|
|
2014-01-09 16:52:03 +01:00
|
|
|
/** @brief Get the corresponding page map entry to a given virtual address */
|
|
|
|
static inline page_entry_t* virt_to_entry(size_t addr, int level)
|
2010-12-10 06:16:58 +00:00
|
|
|
{
|
2014-01-09 16:52:03 +01:00
|
|
|
return (page_entry_t*) ((((ssize_t) addr | (-1L << VIRT_BITS)) >> ((level+1) * PAGE_MAP_BITS)) & ~0x7);
|
2010-12-10 06:16:58 +00:00
|
|
|
}
|
|
|
|
|
2014-01-09 16:52:03 +01:00
|
|
|
/** @brief Get the corresponding virtual address to a page map entry */
|
|
|
|
static inline size_t entry_to_virt(page_entry_t* entry, int level)
|
|
|
|
{
|
|
|
|
return VIRT_SEXT((size_t) entry << ((level+1) * PAGE_MAP_BITS));
|
|
|
|
}
|
2013-12-03 15:26:21 +01:00
|
|
|
|
2014-01-09 16:52:03 +01:00
|
|
|
/** @brief Converts a virtual address to a physical
|
|
|
|
*
|
|
|
|
* A non mapped virtual address causes a pagefault!
|
|
|
|
*
|
|
|
|
* @param viraddr Virtual address to convert
|
|
|
|
* @return physical address
|
|
|
|
*/
|
|
|
|
inline size_t virt_to_phys(size_t viraddr)
|
|
|
|
{
|
|
|
|
page_entry_t* entry = (page_entry_t*) (PAGE_MAP_PGT | (viraddr >> PAGE_MAP_BITS));
|
|
|
|
return (*entry & ~PAGE_FLAGS_MASK) | (viraddr & ~PAGE_MASK);
|
2013-12-03 15:26:21 +01:00
|
|
|
}
|
|
|
|
|
2014-01-09 13:32:00 +01:00
|
|
|
/** @brief Update page table bits (PG_*) by using arch independent flags (MAP_*) */
|
|
|
|
static inline size_t page_bits(int flags)
|
|
|
|
{
|
|
|
|
size_t bits = PG_PRESENT|PG_RW|PG_GLOBAL|PG_XD;
|
|
|
|
|
|
|
|
if (flags & MAP_NO_ACCESS) bits &= ~PG_PRESENT;
|
|
|
|
if (flags & MAP_READ_ONLY) bits &= ~PG_RW;
|
|
|
|
if (flags & MAP_CODE) bits &= ~PG_XD;
|
|
|
|
if (flags & MAP_USER_SPACE) bits &= ~PG_GLOBAL;
|
|
|
|
if (flags & MAP_USER_SPACE) bits |= PG_USER;
|
|
|
|
if (flags & MAP_WT) bits |= PG_PWT;
|
|
|
|
if (flags & MAP_NO_CACHE) bits |= PG_PCD;
|
|
|
|
if (flags & MAP_MPE) bits |= PG_MPE;
|
|
|
|
if (flags & MAP_SVM_INIT) bits |= PG_SVM_INIT;
|
|
|
|
if (flags & MAP_SVM_LAZYRELEASE) bits |= PG_SVM_LAZYRELEASE;
|
|
|
|
if (flags & MAP_SVM_STRONG) bits |= PG_SVM_STRONG;
|
|
|
|
|
|
|
|
return bits;
|
2013-12-03 15:26:21 +01:00
|
|
|
}
|
2013-12-03 16:34:34 +01:00
|
|
|
|
2014-01-09 16:52:03 +01:00
|
|
|
page_map_t* get_boot_page_map(void)
|
|
|
|
{
|
|
|
|
return &boot_pml4;
|
|
|
|
}
|
|
|
|
|
2013-12-03 16:34:34 +01:00
|
|
|
/** @brief Recursive traversal through the page map tree
|
|
|
|
*
|
|
|
|
* @param start The first address whose page map entry we will call on
|
|
|
|
* @param end The exclusive end address whose page map entry we will call on
|
|
|
|
* @param pre Callback which is called for every page map entry (pre-order traversal)
|
|
|
|
* @param post Callback which is called for every page map entry (post-order traversal)
|
|
|
|
*/
|
|
|
|
int page_iterate(size_t start, size_t end, page_cb_t pre, page_cb_t post)
|
|
|
|
{
|
|
|
|
page_entry_t* entry[PAGE_MAP_LEVELS];
|
|
|
|
page_entry_t* last[PAGE_MAP_LEVELS];
|
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
if (BUILTIN_EXPECT(start >= end, 0))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2013-12-03 16:34:34 +01:00
|
|
|
// setup subtree boundaries
|
|
|
|
int i;
|
|
|
|
for (i=0; i<PAGE_MAP_LEVELS; i++) {
|
|
|
|
entry[i] = virt_to_entry(start, i);
|
2014-01-09 13:44:20 +01:00
|
|
|
last[i] = virt_to_entry(end - 1, i);
|
2013-12-03 16:34:34 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// nested iterator function (sees the scope of parent)
|
|
|
|
int iterate(int level) {
|
|
|
|
int ret;
|
2014-01-09 13:44:20 +01:00
|
|
|
while (entry[level] <= last[level]) {
|
|
|
|
if (pre) { // call pre-order callback if available
|
2013-12-03 16:34:34 +01:00
|
|
|
ret = pre(entry[level], level);
|
2014-01-09 13:44:20 +01:00
|
|
|
if (BUILTIN_EXPECT(ret < 0, 0))
|
|
|
|
return ret;
|
|
|
|
}
|
2013-12-03 16:34:34 +01:00
|
|
|
|
|
|
|
// recurse if
|
|
|
|
// - we are not in the PGT
|
|
|
|
// - and the inferior page table is present
|
2014-01-09 13:44:20 +01:00
|
|
|
// - and the current entry represents no huge page
|
|
|
|
if (level && (*entry[level] & PG_PRESENT) && !(*entry[level] & PG_PSE)) {
|
|
|
|
ret = iterate(level-1);
|
|
|
|
if (BUILTIN_EXPECT(ret < 0, 0))
|
|
|
|
return ret;
|
|
|
|
}
|
2013-12-03 16:34:34 +01:00
|
|
|
// or skip the entries we've omit...
|
|
|
|
else {
|
|
|
|
size_t next = (size_t) (entry[level]+1);
|
|
|
|
for (i=0; i<level; i++)
|
2014-01-09 13:44:20 +01:00
|
|
|
entry[i] = (page_entry_t*) (next << (PAGE_MAP_BITS*(level-i)));
|
2013-12-03 16:34:34 +01:00
|
|
|
}
|
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
if (post) { // call post-order callback if available
|
2013-12-03 16:34:34 +01:00
|
|
|
ret = post(entry[level], level);
|
2014-01-09 13:44:20 +01:00
|
|
|
if (BUILTIN_EXPECT(ret < 0, 0))
|
|
|
|
return ret;
|
|
|
|
}
|
2013-12-03 16:34:34 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
// return if we've reached the end of table
|
2013-12-03 16:34:34 +01:00
|
|
|
entry[level]++;
|
2014-01-09 13:44:20 +01:00
|
|
|
if (((size_t) entry[level] & ~PAGE_MASK) == 0x000) // TODO
|
2013-12-03 16:34:34 +01:00
|
|
|
return 0;
|
|
|
|
}
|
2014-01-09 13:44:20 +01:00
|
|
|
|
2013-12-03 16:34:34 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
// we start at the highest order table (PML4 or PGD)
|
2013-12-03 16:34:34 +01:00
|
|
|
return iterate(PAGE_MAP_LEVELS-1);
|
|
|
|
}
|
2013-12-03 16:37:53 +01:00
|
|
|
|
|
|
|
void page_dump(size_t from, size_t to)
|
|
|
|
{
|
2014-01-09 12:49:04 +01:00
|
|
|
task_t* task = per_core(current_task);
|
|
|
|
|
|
|
|
size_t flags = 0;
|
2013-12-03 16:37:53 +01:00
|
|
|
size_t start = 0;
|
|
|
|
|
2014-01-09 12:49:04 +01:00
|
|
|
void print(size_t start, size_t end, size_t flags) {
|
2013-12-03 16:37:53 +01:00
|
|
|
size_t size = end - start;
|
|
|
|
|
2014-01-09 12:49:04 +01:00
|
|
|
kprintf("%#018lx-%#018lx %#14x %c%c%c%c%c%c\n", start, end, size,
|
|
|
|
(flags & PG_XD) ? '-' : 'x',
|
2013-12-03 16:37:53 +01:00
|
|
|
(flags & PG_GLOBAL) ? 'g' : '-',
|
|
|
|
(flags & PG_DIRTY) ? 'd' : '-',
|
|
|
|
(flags & PG_ACCESSED) ? 'a' : '-',
|
|
|
|
(flags & PG_USER) ? 'u' : '-',
|
2014-01-09 12:49:04 +01:00
|
|
|
(flags & PG_RW) ? 'w' : '-'
|
2013-12-03 16:37:53 +01:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
int cb(page_entry_t* entry, int level) {
|
|
|
|
size_t end;
|
|
|
|
|
|
|
|
if (*entry & PG_PRESENT) {
|
|
|
|
if (!level || (*entry & PG_PSE)) {
|
|
|
|
if (!flags) {
|
2014-01-09 12:49:04 +01:00
|
|
|
flags = *entry & PAGE_FLAGS_MASK;
|
2013-12-03 16:37:53 +01:00
|
|
|
start = entry_to_virt(entry, level);
|
|
|
|
}
|
2014-01-09 12:49:04 +01:00
|
|
|
else if (flags != (*entry & PAGE_FLAGS_MASK)) {
|
2013-12-03 16:37:53 +01:00
|
|
|
end = entry_to_virt(entry, level);
|
|
|
|
print(start, end, flags);
|
|
|
|
start = end;
|
2014-01-09 12:49:04 +01:00
|
|
|
flags = *entry & PAGE_FLAGS_MASK;
|
2013-12-03 16:37:53 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (flags) {
|
|
|
|
end = entry_to_virt(entry, level);
|
|
|
|
print(start, end, flags);
|
|
|
|
flags = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-01-09 12:49:04 +01:00
|
|
|
// lock tables
|
|
|
|
spinlock_lock(&kslock);
|
|
|
|
spinlock_irqsave_lock(&task->page_lock);
|
|
|
|
|
|
|
|
kprintf("%-18s-%18s %14s %-6s\n", "start", "end", "size", "flags"); // header
|
2013-12-03 16:37:53 +01:00
|
|
|
page_iterate(from, to, cb, NULL);
|
|
|
|
|
2014-01-09 12:49:04 +01:00
|
|
|
// unlock tables
|
|
|
|
spinlock_unlock(&kslock);
|
|
|
|
spinlock_irqsave_unlock(&task->page_lock);
|
|
|
|
|
2013-12-03 16:37:53 +01:00
|
|
|
// workaround to print last mapping
|
2014-01-09 12:49:04 +01:00
|
|
|
if (flags)
|
|
|
|
print(start, PAGE_FLOOR(to), flags);
|
2013-12-03 16:37:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void page_stats(size_t from, size_t to, int reset)
|
|
|
|
{
|
2014-01-09 12:49:04 +01:00
|
|
|
task_t* task = per_core(current_task);
|
|
|
|
|
|
|
|
int i, stats[13] = { 0 };
|
|
|
|
const char* labels[] = { [0] = "present", "writable", "user accessable", "write through", "cache disabled", // IA-32 "legacy" bits
|
|
|
|
"accessed", "dirty", "huge pages", "global", "svm", "svm lazy", "svm init",
|
|
|
|
[12] = "exec disabled" // IA-32e / PAE bits
|
|
|
|
};
|
2013-12-03 16:37:53 +01:00
|
|
|
|
|
|
|
int cb(page_entry_t* entry, int level) {
|
|
|
|
if (*entry & PG_PRESENT) {
|
|
|
|
if (!level || (*entry & PG_PSE)) {
|
|
|
|
// increment stat counters
|
2014-01-09 12:49:04 +01:00
|
|
|
int i;
|
|
|
|
for (i=0; i<12; i++) { // IA-32 "legacy" bits
|
|
|
|
if (*entry & (1 << i))
|
|
|
|
stats[i]++;
|
|
|
|
}
|
|
|
|
for (i=0; i<1; i++) { // IA-32e / PAE bits
|
2014-01-09 15:20:22 +01:00
|
|
|
if (*entry & (1 << (63-i)))
|
2014-01-09 12:49:04 +01:00
|
|
|
stats[i+PAGE_BITS]++;
|
|
|
|
}
|
2013-12-03 16:37:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// reset accessed and dirty bits
|
|
|
|
if (reset) {
|
2014-01-09 12:49:04 +01:00
|
|
|
*entry &= ~(PG_ACCESSED|PG_DIRTY);
|
2013-12-03 16:37:53 +01:00
|
|
|
tlb_flush_one_page(entry_to_virt(entry, level)); // see IA32 Vol3 4.8
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-01-09 12:49:04 +01:00
|
|
|
// lock tables
|
|
|
|
spinlock_lock(&kslock);
|
|
|
|
spinlock_irqsave_lock(&task->page_lock);
|
|
|
|
|
2013-12-03 16:37:53 +01:00
|
|
|
page_iterate(from, to, cb, NULL);
|
|
|
|
|
2014-01-09 12:49:04 +01:00
|
|
|
// unlock tables
|
|
|
|
spinlock_unlock(&kslock);
|
|
|
|
spinlock_irqsave_unlock(&task->page_lock);
|
|
|
|
|
2013-12-03 16:37:53 +01:00
|
|
|
kprintf("total pages:\n");
|
2014-01-09 12:49:04 +01:00
|
|
|
for (i=0; i<13; i++)
|
2013-12-03 16:37:53 +01:00
|
|
|
kprintf(" - %s:%*lu\n", labels[i], 25-strlen(labels[i]), stats[i]);
|
|
|
|
}
|
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
int copy_page_map(task_t* new_task, int copy)
|
2010-12-15 12:08:37 +00:00
|
|
|
{
|
2014-01-09 13:44:20 +01:00
|
|
|
task_t* cur_task = per_core(current_task);
|
2013-11-14 13:09:31 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
size_t phyaddr;
|
2014-01-09 16:52:03 +01:00
|
|
|
size_t ret;
|
2013-11-14 13:09:31 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
int cb(page_entry_t* src, int level) {
|
|
|
|
page_entry_t* dest = src - (1L<<36); // TODO
|
2011-02-18 21:20:15 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
if (*src & PG_PRESENT) {
|
|
|
|
if (*src & PG_USER) {
|
2014-01-09 16:52:03 +01:00
|
|
|
if (copy) { // deep copy page frame
|
2014-01-09 13:44:20 +01:00
|
|
|
size_t phyaddr = get_page();
|
|
|
|
if (BUILTIN_EXPECT(!phyaddr, 0))
|
|
|
|
return -ENOMEM;
|
2013-11-14 13:08:56 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
atomic_int32_inc(&cur_task->user_usage);
|
|
|
|
|
|
|
|
copy_page(phyaddr, *src & ~PAGE_FLAGS_MASK);
|
|
|
|
*dest = phyaddr | (*src & PAGE_FLAGS_MASK);
|
2013-11-14 13:08:56 +01:00
|
|
|
}
|
|
|
|
}
|
2014-01-09 13:44:20 +01:00
|
|
|
else // shallow copy kernel table
|
|
|
|
*dest = *src;
|
2013-11-14 13:08:56 +01:00
|
|
|
}
|
2014-01-09 16:52:03 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
return 0;
|
2013-11-14 13:08:56 +01:00
|
|
|
}
|
|
|
|
|
2013-11-14 12:25:52 +01:00
|
|
|
// fixed mapping for paging structures
|
2013-12-03 15:26:21 +01:00
|
|
|
page_map_t *current = (page_map_t*) PAGE_MAP_PML4;
|
2014-01-09 13:44:20 +01:00
|
|
|
page_map_t *new = palloc(PAGE_SIZE, 0);
|
|
|
|
if (BUILTIN_EXPECT(!new, 0))
|
|
|
|
return -ENOMEM;
|
2013-11-14 12:25:52 +01:00
|
|
|
|
2014-01-09 15:20:22 +01:00
|
|
|
phyaddr = virt_to_phys((size_t) new);
|
2013-11-14 12:25:52 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
// lock tables
|
|
|
|
spinlock_lock(&kslock);
|
|
|
|
spinlock_irqsave_lock(&cur_task->page_lock);
|
2013-11-14 12:25:52 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
// map new table
|
|
|
|
current->entries[PAGE_MAP_ENTRIES-2] = phyaddr | PG_TABLE;
|
2013-11-14 12:25:52 +01:00
|
|
|
tlb_flush(); // ouch :(
|
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
// setup self reference for new table
|
|
|
|
new->entries[PAGE_MAP_ENTRIES-1] = phyaddr | PG_TABLE;
|
|
|
|
|
|
|
|
ret = page_iterate(0, PAGE_MAP_PGT - (1L<<39), cb, NULL); // TODO: check boundaries
|
|
|
|
|
|
|
|
// unlock tables
|
|
|
|
spinlock_irqsave_unlock(&cur_task->page_lock);
|
2013-11-14 12:25:52 +01:00
|
|
|
spinlock_unlock(&kslock);
|
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
// unmap new tables
|
2013-11-14 12:25:52 +01:00
|
|
|
current->entries[PAGE_MAP_ENTRIES-2] = 0;
|
2014-01-09 13:44:20 +01:00
|
|
|
tlb_flush(); // ouch :(
|
|
|
|
|
|
|
|
new_task->page_map = new;
|
2013-11-14 12:25:52 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
kprintf("copy_page_map: allocated %i page tables\n", ret); // TODO: remove
|
2013-11-14 12:25:52 +01:00
|
|
|
|
|
|
|
return ret;
|
2011-02-24 18:32:58 +01:00
|
|
|
}
|
|
|
|
|
2013-10-11 16:21:53 +02:00
|
|
|
int drop_page_map(void)
|
2011-02-24 18:32:58 +01:00
|
|
|
{
|
2011-03-04 11:38:40 +01:00
|
|
|
task_t* task = per_core(current_task);
|
2011-02-24 18:32:58 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
int cb(page_entry_t* entry, int level) {
|
|
|
|
if (*entry & PG_USER) {
|
|
|
|
kprintf("drop_page_map:cb: entry = %p, level = %u\n", entry, level); // TODO: remove
|
2011-02-24 18:32:58 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
if (put_page(*entry & ~PAGE_FLAGS_MASK))
|
|
|
|
atomic_int32_dec(&task->user_usage);
|
2011-03-04 11:38:40 +01:00
|
|
|
}
|
2011-02-24 18:32:58 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
return 0;
|
|
|
|
}
|
2011-02-24 18:32:58 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
kprintf("drop_page_map: task = %u\n", task->id); // TODO: remove
|
2011-02-24 18:32:58 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
// check assertions
|
|
|
|
if (BUILTIN_EXPECT(task->page_map == get_boot_page_map(), 0))
|
|
|
|
return -EINVAL;
|
|
|
|
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
|
|
|
|
return -EINVAL;
|
2010-12-15 12:08:37 +00:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
// lock tables
|
|
|
|
spinlock_irqsave_lock(&task->page_lock);
|
2010-12-10 06:16:58 +00:00
|
|
|
|
2014-01-09 16:52:03 +01:00
|
|
|
page_iterate(0, PAGE_MAP_PGT, NULL, cb);
|
2010-12-10 06:16:58 +00:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
pfree(task->page_map, PAGE_SIZE);
|
2010-12-10 06:16:58 +00:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
// unlock tables
|
|
|
|
spinlock_irqsave_unlock(&task->page_lock);
|
2010-12-10 06:16:58 +00:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
return 0;
|
|
|
|
}
|
2010-12-10 06:16:58 +00:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
static int set_page_flags(size_t viraddr, uint32_t npages, int flags)
|
|
|
|
{
|
|
|
|
task_t* task = per_core(current_task);
|
2010-12-10 06:16:58 +00:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
size_t bits = page_bits(flags);
|
|
|
|
size_t start = viraddr;
|
|
|
|
size_t end = start + npages * PAGE_SIZE;
|
2010-12-10 06:16:58 +00:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
int cb(page_entry_t* entry, int level) {
|
|
|
|
if (level) {
|
|
|
|
if (flags & MAP_USER_SPACE)
|
|
|
|
*entry |= PG_USER;
|
2010-12-10 06:16:58 +00:00
|
|
|
}
|
2011-02-08 18:37:56 +00:00
|
|
|
else
|
2014-01-09 13:44:20 +01:00
|
|
|
*entry = (*entry & ~PAGE_FLAGS_MASK) | bits;
|
2011-02-08 18:37:56 +00:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
tlb_flush_one_page(entry_to_virt(entry, level));
|
2011-11-16 03:12:09 -08:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
return 0;
|
|
|
|
}
|
2010-12-10 06:16:58 +00:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
// check assertions
|
|
|
|
if (BUILTIN_EXPECT(start < KERNEL_SPACE && end >= KERNEL_SPACE, 0))
|
|
|
|
return 0;
|
|
|
|
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
|
|
|
|
return 0;
|
2011-09-03 13:25:49 -07:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
// lock tables
|
|
|
|
if (viraddr < KERNEL_SPACE)
|
|
|
|
spinlock_lock(&kslock);
|
|
|
|
else
|
|
|
|
spinlock_irqsave_lock(&task->page_lock);
|
2011-03-04 22:44:53 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
int ret = page_iterate(start, end, cb, NULL);
|
2011-03-04 11:38:40 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
// unlock tables
|
|
|
|
if (viraddr < KERNEL_SPACE)
|
|
|
|
spinlock_lock(&kslock);
|
2012-09-10 15:37:45 +02:00
|
|
|
else
|
2014-01-09 13:44:20 +01:00
|
|
|
spinlock_irqsave_lock(&task->page_lock);
|
2011-02-24 19:06:32 +01:00
|
|
|
|
2010-12-10 06:16:58 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags)
|
2011-02-24 09:36:05 +01:00
|
|
|
{
|
2011-03-04 11:38:40 +01:00
|
|
|
task_t* task = per_core(current_task);
|
2011-02-24 09:36:05 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
if (!viraddr) {
|
|
|
|
int vma_flags = VMA_HEAP;
|
|
|
|
if (flags & MAP_USER_SPACE)
|
|
|
|
vma_flags |= VMA_USER;
|
2011-02-24 09:36:05 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
viraddr = vma_alloc(npages * PAGE_SIZE, vma_flags);
|
|
|
|
}
|
2011-02-24 09:36:05 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
size_t bits = page_bits(flags);
|
|
|
|
size_t start = viraddr;
|
|
|
|
size_t end = start + npages * PAGE_SIZE;
|
2011-08-15 07:16:12 -07:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
int cb(page_entry_t* entry, int level) {
|
|
|
|
if (level) { // PGD, PDPT, PML4..
|
|
|
|
if (*entry & PG_PRESENT) {
|
|
|
|
if (flags & MAP_USER_SPACE) {
|
|
|
|
/*
|
|
|
|
* We are changing page map entries which cover
|
|
|
|
* the kernel. So before altering them we need to
|
|
|
|
* make a private copy for the task
|
|
|
|
*/
|
|
|
|
if (!(*entry & PG_USER)) {
|
|
|
|
size_t phyaddr = get_page();
|
|
|
|
if (BUILTIN_EXPECT(!phyaddr, 0))
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
atomic_int32_inc(&task->user_usage);
|
|
|
|
|
|
|
|
copy_page(phyaddr, *entry & ~PAGE_FLAGS_MASK);
|
|
|
|
*entry = phyaddr | (*entry & PAGE_FLAGS_MASK) | PG_USER;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We just need to flush the table itself.
|
|
|
|
* TLB entries for the kernel remain valid
|
|
|
|
* because we've not changed them.
|
|
|
|
*/
|
|
|
|
tlb_flush_one_page(entry_to_virt(entry, 0));
|
|
|
|
}
|
2011-08-29 01:49:19 -07:00
|
|
|
}
|
2014-01-09 13:44:20 +01:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
size_t phyaddr = get_page();
|
|
|
|
if (BUILTIN_EXPECT(!phyaddr, 0))
|
|
|
|
return -ENOMEM;
|
2011-02-24 09:36:05 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
atomic_int32_inc(&task->user_usage);
|
2011-02-24 09:36:05 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
*entry = phyaddr | bits;
|
2011-02-24 09:36:05 +01:00
|
|
|
}
|
2014-01-09 13:44:20 +01:00
|
|
|
}
|
|
|
|
else { // PGT
|
|
|
|
if ((*entry & PG_PRESENT) && !(flags & MAP_REMAP))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
*entry = phyaddr | bits;
|
|
|
|
|
|
|
|
if (flags & MAP_USER_SPACE)
|
|
|
|
atomic_int32_inc(&task->user_usage);
|
|
|
|
|
|
|
|
if (flags & MAP_REMAP)
|
|
|
|
tlb_flush_one_page(entry_to_virt(entry, level));
|
2011-02-24 09:36:05 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
phyaddr += PAGE_SIZE;
|
2011-02-24 09:36:05 +01:00
|
|
|
}
|
2014-01-09 13:44:20 +01:00
|
|
|
|
|
|
|
return 0;
|
2011-02-24 09:36:05 +01:00
|
|
|
}
|
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
kprintf("map_region: map %u pages from %#lx to %#lx with flags: %#x\n", npages, viraddr, phyaddr, flags); // TODO: remove
|
2011-02-24 09:36:05 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
// check assertions
|
|
|
|
if (BUILTIN_EXPECT(start < KERNEL_SPACE && end >= KERNEL_SPACE, 0))
|
|
|
|
return 0;
|
|
|
|
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
|
|
|
|
return 0;
|
|
|
|
if (BUILTIN_EXPECT(!viraddr, 0))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
// lock tables
|
|
|
|
if (viraddr < KERNEL_SPACE)
|
|
|
|
spinlock_lock(&kslock);
|
|
|
|
else
|
|
|
|
spinlock_irqsave_lock(&task->page_lock);
|
|
|
|
|
|
|
|
int ret = page_iterate(start, end, cb, NULL);
|
|
|
|
|
|
|
|
// unlock tables
|
|
|
|
if (viraddr < KERNEL_SPACE)
|
|
|
|
spinlock_unlock(&kslock);
|
|
|
|
else
|
|
|
|
spinlock_irqsave_unlock(&task->page_lock);
|
2011-02-24 09:36:05 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
return (ret == 0) ? viraddr : 0;
|
|
|
|
}
|
2010-12-10 06:16:58 +00:00
|
|
|
|
2011-03-04 22:44:53 +01:00
|
|
|
int unmap_region(size_t viraddr, uint32_t npages)
|
|
|
|
{
|
|
|
|
task_t* task = per_core(current_task);
|
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
size_t start = viraddr;
|
|
|
|
size_t end = start + npages * PAGE_SIZE;
|
2011-03-04 22:44:53 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
kprintf("unmap_region: unmap %u pages from %#lx\n", npages, viraddr); // TODO: remove
|
2011-03-04 22:44:53 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
int cb(page_entry_t* entry, int level) {
|
|
|
|
if (level) { // PGD, PDPT, PML4
|
|
|
|
page_map_t* map = (page_map_t*) entry_to_virt(entry, 0);
|
|
|
|
int used = 0;
|
|
|
|
|
|
|
|
int i;
|
|
|
|
for (i=0; i<PAGE_MAP_ENTRIES; i++) {
|
|
|
|
if (map->entries[i] & PG_PRESENT)
|
|
|
|
used++;
|
|
|
|
}
|
2011-03-04 22:44:53 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
if (!used) {
|
|
|
|
*entry &= ~PG_PRESENT;
|
|
|
|
tlb_flush_one_page(entry_to_virt(entry, 0));
|
|
|
|
|
|
|
|
if (put_page(*entry & ~PAGE_FLAGS_MASK))
|
|
|
|
atomic_int32_dec(&task->user_usage);
|
|
|
|
}
|
2012-06-10 08:05:24 +02:00
|
|
|
}
|
2014-01-09 13:44:20 +01:00
|
|
|
else { // PGT
|
|
|
|
*entry = 0;
|
2012-06-10 08:05:24 +02:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
tlb_flush_one_page(entry_to_virt(entry, level));
|
2012-06-10 08:05:24 +02:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
if (viraddr >= KERNEL_SPACE)
|
|
|
|
atomic_int32_dec(&task->user_usage);
|
|
|
|
}
|
2011-03-04 22:44:53 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// check assertions
|
|
|
|
if (BUILTIN_EXPECT(start < KERNEL_SPACE && end >= KERNEL_SPACE, 0))
|
|
|
|
return 0;
|
|
|
|
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
|
|
|
|
return 0;
|
2013-10-11 16:21:53 +02:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
// lock tables
|
|
|
|
if (viraddr < KERNEL_SPACE)
|
|
|
|
spinlock_lock(&kslock);
|
|
|
|
else
|
|
|
|
spinlock_irqsave_lock(&task->page_lock);
|
2011-03-04 22:44:53 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
int ret = page_iterate(start, end, NULL, cb);
|
2011-03-04 22:44:53 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
// unlock tables
|
|
|
|
if (viraddr < KERNEL_SPACE)
|
2012-09-10 15:37:45 +02:00
|
|
|
spinlock_unlock(&kslock);
|
|
|
|
else
|
2013-10-11 16:21:53 +02:00
|
|
|
spinlock_irqsave_unlock(&task->page_lock);
|
2011-03-04 22:44:53 +01:00
|
|
|
|
2014-01-09 13:44:20 +01:00
|
|
|
return ret;
|
2011-03-04 22:44:53 +01:00
|
|
|
}
|
|
|
|
|
2010-12-10 06:16:58 +00:00
|
|
|
static void pagefault_handler(struct state *s)
|
|
|
|
{
|
2011-04-20 21:23:22 +02:00
|
|
|
task_t* task = per_core(current_task);
|
|
|
|
size_t viraddr = read_cr2();
|
|
|
|
|
2014-01-09 14:06:09 +01:00
|
|
|
// on demand userspace heap mapping
|
|
|
|
if ((task->heap) && (viraddr >= task->heap->start) && (viraddr < task->heap->end)) {
|
|
|
|
viraddr &= PAGE_MASK;
|
2013-12-03 15:26:21 +01:00
|
|
|
|
|
|
|
size_t phyaddr = get_page();
|
2014-01-09 14:06:09 +01:00
|
|
|
if (BUILTIN_EXPECT(!phyaddr, 0)) {
|
|
|
|
kprintf("out of memory: task = %u\n", task->id);
|
|
|
|
goto default_handler;
|
|
|
|
}
|
2013-11-14 13:12:35 +01:00
|
|
|
|
2014-01-09 14:06:09 +01:00
|
|
|
viraddr = map_region(viraddr, phyaddr, 1, MAP_USER_SPACE);
|
|
|
|
if (BUILTIN_EXPECT(!viraddr, 0)) {
|
|
|
|
kprintf("map_region: could not map %#lx to %#lx, task = %u\n", viraddr, phyaddr, task->id);
|
|
|
|
put_page(phyaddr);
|
2013-11-14 13:12:35 +01:00
|
|
|
|
2014-01-09 14:06:09 +01:00
|
|
|
goto default_handler;
|
|
|
|
}
|
2013-11-14 13:12:35 +01:00
|
|
|
|
2014-01-09 14:06:09 +01:00
|
|
|
memset((void*) viraddr, 0x00, PAGE_SIZE); // fill with zeros
|
2013-11-14 13:12:35 +01:00
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
2011-08-16 03:29:54 -07:00
|
|
|
|
2014-01-09 13:32:00 +01:00
|
|
|
default_handler:
|
|
|
|
kprintf("Page Fault Exception (%d) at cs:rip = %#x:%#lx, core = %u, task = %u, addr = %#lx, error = %#x [ %s %s %s %s %s ]\n"
|
|
|
|
"Register state: rflags = %#lx, rax = %#lx, rbx = %#lx, rcx = %#lx, rdx = %#lx, rdi = %#lx, rsi = %#lx, rbp = %#llx, rsp = %#lx\n",
|
|
|
|
s->int_no, s->cs, s->rip, CORE_ID, task->id, viraddr, s->error,
|
|
|
|
(s->error & 0x4) ? "user" : "supervisor",
|
|
|
|
(s->error & 0x10) ? "instruction" : "data",
|
|
|
|
(s->error & 0x2) ? "write" : ((s->error & 0x10) ? "fetch" : "read"),
|
|
|
|
(s->error & 0x1) ? "protection" : "not present",
|
|
|
|
(s->error & 0x8) ? "reserved bit" : "\b",
|
|
|
|
s->rflags, s->rax, s->rbx, s->rcx, s->rdx, s->rdi, s->rsi, s->rbp, s->rsp);
|
2013-11-14 13:12:35 +01:00
|
|
|
|
2011-08-02 06:19:26 -07:00
|
|
|
irq_enable();
|
2010-12-10 06:16:58 +00:00
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
int arch_paging_init(void)
|
|
|
|
{
|
2012-06-10 08:05:24 +02:00
|
|
|
uint32_t i, npages;
|
2010-12-10 06:16:58 +00:00
|
|
|
|
2013-11-20 12:03:24 +01:00
|
|
|
// replace default pagefault handler
|
2010-12-10 06:16:58 +00:00
|
|
|
irq_uninstall_handler(14);
|
|
|
|
irq_install_handler(14, pagefault_handler);
|
|
|
|
|
2013-12-03 15:52:16 +01:00
|
|
|
// setup recursive paging
|
2014-01-09 13:32:00 +01:00
|
|
|
boot_pml4.entries[PAGE_MAP_ENTRIES-1] = (size_t) &boot_pml4 | PG_TABLE;
|
2013-12-03 15:52:16 +01:00
|
|
|
|
2013-11-14 12:22:52 +01:00
|
|
|
/*
|
|
|
|
* In longmode the kernel is already maped into the kernel space (see entry64.asm)
|
|
|
|
* this includes .data, .bss, .text, VGA, the multiboot & multiprocessing (APIC) structures
|
|
|
|
*/
|
2010-12-10 06:16:58 +00:00
|
|
|
|
2011-07-18 09:13:33 +02:00
|
|
|
#if MAX_CORES > 1
|
2013-11-20 12:03:24 +01:00
|
|
|
// reserve page for smp boot code
|
2014-01-09 12:49:04 +01:00
|
|
|
if (!map_region(SMP_SETUP_ADDR, SMP_SETUP_ADDR, 1, MAP_NO_CACHE | MAP_REMAP)) {
|
2011-07-18 09:13:33 +02:00
|
|
|
kputs("could not reserve page for smp boot code\n");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2010-12-10 06:16:58 +00:00
|
|
|
#ifdef CONFIG_MULTIBOOT
|
2011-04-07 20:36:43 +02:00
|
|
|
#if 0
|
2013-11-20 12:03:24 +01:00
|
|
|
// map reserved memory regions into the kernel space
|
2011-02-16 22:35:46 +01:00
|
|
|
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) {
|
2010-12-10 06:16:58 +00:00
|
|
|
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr;
|
|
|
|
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
|
|
|
|
|
|
|
|
while (mmap < mmap_end) {
|
|
|
|
if (mmap->type != MULTIBOOT_MEMORY_AVAILABLE) {
|
|
|
|
npages = mmap->len / PAGE_SIZE;
|
|
|
|
if ((mmap->addr+mmap->len) % PAGE_SIZE)
|
|
|
|
npages++;
|
2014-01-09 12:49:04 +01:00
|
|
|
map_region(mmap->addr, mmap->addr, npages, MAP_NO_CACHE | MAP_REMAP);
|
2010-12-10 06:16:58 +00:00
|
|
|
}
|
|
|
|
mmap++;
|
|
|
|
}
|
|
|
|
}
|
2011-04-07 20:36:43 +02:00
|
|
|
#endif
|
2010-12-10 06:16:58 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Modules like the init ram disk are already loaded.
|
2013-11-14 12:22:52 +01:00
|
|
|
* Therefore, we map these modules into the kernel space.
|
2010-12-10 06:16:58 +00:00
|
|
|
*/
|
2011-02-16 22:35:46 +01:00
|
|
|
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) {
|
2012-05-24 10:49:45 +02:00
|
|
|
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
|
2014-01-09 12:49:04 +01:00
|
|
|
npages = PAGE_FLOOR(mb_info->mods_count*sizeof(multiboot_module_t)) >> PAGE_BITS;
|
2010-12-10 06:16:58 +00:00
|
|
|
|
2014-01-09 12:49:04 +01:00
|
|
|
map_region((size_t) mmodule, (size_t) mmodule, npages, MAP_REMAP);
|
2012-02-02 22:54:09 +01:00
|
|
|
|
2010-12-10 06:16:58 +00:00
|
|
|
for(i=0; i<mb_info->mods_count; i++, mmodule++) {
|
|
|
|
// map physical address to the same virtual address
|
2014-01-09 12:49:04 +01:00
|
|
|
npages = PAGE_FLOOR(mmodule->mod_end - mmodule->mod_start) >> PAGE_BITS;
|
|
|
|
kprintf("Map module %s at %#x (%u pages)\n", (char*)(size_t) mmodule->cmdline, mmodule->mod_start, npages);
|
|
|
|
map_region((size_t) (mmodule->mod_start), (size_t) (mmodule->mod_start), npages, MAP_REMAP);
|
2010-12-10 06:16:58 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2013-11-20 12:03:24 +01:00
|
|
|
// we turned on paging => now, we are able to register our task
|
2012-06-10 23:40:22 +02:00
|
|
|
register_task();
|
2010-12-10 06:16:58 +00:00
|
|
|
|
2011-07-18 15:51:26 +02:00
|
|
|
// APIC registers into the kernel address space
|
|
|
|
map_apic();
|
|
|
|
|
2010-12-10 06:16:58 +00:00
|
|
|
return 0;
|
|
|
|
}
|