another rewrite of the paging code without nested functions

This commit is contained in:
Steffen Vogel 2014-02-18 13:08:22 +01:00
parent 6e8ecad91f
commit 3203d53a83
4 changed files with 337 additions and 342 deletions

View file

@ -34,59 +34,44 @@
/// Page offset bits
#define PAGE_BITS 12
/// The size of a single page in bytes
#define PAGE_SIZE ( 1L << PAGE_BITS)
#ifdef CONFIG_X86_32
/// Number of page map indirections
#define PAGE_MAP_LEVELS 2
/// Page map bits
#define PAGE_MAP_BITS 10
/// Total operand width in bits
#define BITS 32
/// Linear/virtual address width
#define VIRT_BITS BITS
/// Physical address width (we dont support PAE)
#define PHYS_BITS BITS
#elif defined(CONFIG_X86_64)
/// Number of page map indirections
#define PAGE_MAP_LEVELS 4
/// Page map bits
#define PAGE_MAP_BITS 9
#define PAGE_MAP_BITS 10
/// Number of page map indirections
#define PAGE_MAP_LEVELS 2
/// Mask the page address without page map flags
#define PAGE_MASK 0xFFFFF000
#elif defined(CONFIG_X86_64)
/// Total operand width in bits
#define BITS 64
/// Linear/virtual address width
#define VIRT_BITS 48
/// Physical address width (maximum value)
#define PHYS_BITS 52
/// Page map bits
#define PAGE_MAP_BITS 9
/// Number of page map indirections
#define PAGE_MAP_LEVELS 4
/// Mask the page address without page map flags
#define PAGE_MASK 0x000FFFFFFFFFF000
#endif
/// The size of a single page in bytes
#define PAGE_SIZE ( 1L << PAGE_BITS)
/// The number of entries in a page map table
#define PAGE_MAP_ENTRIES ( 1L << PAGE_MAP_BITS)
/// Mask the page address
#define PAGE_MASK (-1L << PAGE_BITS)
/// Mask the entry in a page table
#define PAGE_ENTRY_MASK (-1L << (PAGE_BITS-PAGE_MAP_BITS))
/// Mask for all flag bits in a page map entry (including ignored bits)
#define PAGE_FLAGS_MASK (~(-1L << PAGE_BITS) | (-1L << VIRT_BITS))
#define PAGE_MAP_ENTRIES (1L << PAGE_MAP_BITS)
/// Align to next page
#define PAGE_FLOOR(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
/// Align to page
#define PAGE_CEIL(addr) ( (addr) & PAGE_MASK)
/// Sign extension to get a valid canonical address (hack: by using aritmethic shifts)
#define VIRT_SEXT(addr) ((ssize_t) addr << (BITS-VIRT_BITS) >> (BITS-VIRT_BITS))
// base addresses of page map tables
#ifdef CONFIG_X86_32
#define PAGE_MAP_PGD 0xFFFFF000
#define PAGE_MAP_PGT 0xFFC00000
#elif defined(CONFIG_X86_64)
#define PAGE_MAP_PML4 0xFFFFFFFFFFFFF000
#define PAGE_MAP_PDPT 0xFFFFFFFFFFE00000
#define PAGE_MAP_PGD 0xFFFFFFFFC0000000
#define PAGE_MAP_PGT 0xFFFFFF8000000000
#endif
/// Page is present
#define PG_PRESENT (1 << 0)
@ -125,28 +110,12 @@
/// This is a whole set of flags (PRESENT,RW,GLOBAL) for kernelspace pages
#define PG_PAGE (PG_PRESENT|PG_RW|PG_GLOBAL|PG_XD)
/** @brief A single entry in a page map */
/** @brief A single entry in a page map
*
* Usually used as a pointer to a mapped page map entry.
*/
typedef size_t page_entry_t;
/** @brief General page map structure
*
* This page map structure is a general type for all indirecton levels.
* As all page map levels containing the same amount of entries.
* All page maps must be page aligned!
*/
typedef struct page_map {
page_entry_t entries[PAGE_MAP_ENTRIES];
} __attribute__ ((aligned (PAGE_SIZE))) page_map_t;
/** @brief A callback type for the page map iterator
*
* @param entry A pointer to the current page map entry
* @return
* - 0 if we want to skip underlying page tables
* - >0 if want to recurse into underlying page tables
*/
typedef int (*page_cb_t)(page_entry_t* entry, int level);
/** @brief Converts a virtual address to a physical
*
* @param viraddr Virtual address to convert
@ -227,7 +196,7 @@ int arch_paging_init(void);
*
* @return Returns the address of the boot task's page dir array.
*/
page_map_t* get_boot_page_map(void);
page_entry_t* get_boot_page_map(void);
/** @brief Setup a new page directory for a new user-level task
*
@ -240,10 +209,10 @@ page_map_t* get_boot_page_map(void);
*/
int copy_page_map(struct task* task, int copy);
/** @brief Delete all page map structures of the current task
/** @brief Deletes all user page map structures of the current task
*
* Puts PML4, PDPT, PGD, PGT tables back to buffer and
* sets the task's page map pointer to NULL
* All allocated physical page frames are released in the bitmap
* The task->page_map is replaces by the boot_page_map()
*
* @return
* - 0 on success
@ -264,15 +233,18 @@ int drop_page_map(void);
* - 0 on success
* - -EINVAL (-22) on failure.
*/
int change_page_permissions(size_t start, size_t end, uint32_t flags);
int set_page_flags(size_t viraddr, uint32_t npages, int flags);
/** @brief Dump mapped memory */
void page_dump(size_t start, size_t end);
/** @brief Dump mapped memory
*
* @param mask Only watch for changes in these page flags (PG_PRESENT is set by default)
*/
void page_dump(size_t mask);
/** @brief Print stats about page flags
*
* @param reset Reset accessed and dirty bits in page tables
*/
void page_stats(size_t start, size_t end, int reset);
void page_stats(int reset);
#endif

View file

@ -50,7 +50,7 @@ size_t* get_current_stack(void)
#endif
// use new page table
write_cr3(virt_to_phys((size_t)curr_task->page_map));
write_cr3(virt_to_phys((size_t) curr_task->page_map));
return curr_task->last_stack_pointer;
}

View file

@ -37,138 +37,83 @@
* Virtual Memory Layout of the standard configuration
* (1 GB kernel space)
*
* 0x000000000000 - 0x0000000FFFFF: reserved for IO devices (16MB)
* 0x000000100000 - 0x00000DEADFFF: Kernel (size depends on the configuration) (221MB)
* 0x00000DEAE000 - 0x00003FFFFFFF: Kernel heap
* 0xFF0000000000 - 0xFF7FFFFFFFFF: Paging structures for copying a page map (max 512GB)
* 0xFF8000000000 - 0xFFFFFFFFFFFF: Paging structures are mapped in this region (max 512GB)
* 0x0000000000000000 - 0x00000000000FFFFF: reserved for IO devices (16MB)
* 0x0000000000100000 - 0x00000000008C2000: Kernel (~8MB)
* 0x00000000008c3000 - 0x0000000000973000: Init Ramdisk (~2MB)
*
* 0x0001000000000000 - 0xffff000000000000: Memory hole (48 bit VAS limitation)
*
* 0xFFFFFE8000000000 - 0xFFFFFEFFFFFFFFFF: Page map dest for copy_page_map() (512GB)
* 0xFFFFFF0000000000 - 0xFFFFFF7FFFFFFFFF: Page map source for copy_page_map() (512GB)
* 0xFFFFFF8000000000 - 0xFFFFFFFFFFFFFFFF: Self-referenced page maps of the current task (512GB)
*/
/// Boot task's page map
extern page_map_t boot_pml4;
/// Boot task's page map (setup by entryXX.asm)
extern page_entry_t boot_pml4[PAGE_MAP_ENTRIES];
/// Kernel space page map lock
static spinlock_t kslock = SPINLOCK_INIT;
/// Mapping of self referenced page map (at the end of the VAS)
static page_entry_t* const current_map = (page_entry_t*) (-1*PAGE_SIZE);
static page_entry_t* const src_map = (page_entry_t*) (-2*PAGE_SIZE);
static page_entry_t* const dest_map = (page_entry_t*) (-3*PAGE_SIZE);
*
page_map_t* get_boot_page_map(void)
page_entry_t* get_boot_page_map(void)
{
return &boot_pml4;
return boot_pml4;
}
/** @brief Recursive traversal through the page map tree
*
* @param start The first address whose page map entry we will call on
* @param end The exclusive end address whose page map entry we will call on
* @param pre Callback which is called for every page map entry (pre-order traversal)
* @param post Callback which is called for every page map entry (post-order traversal)
*/
int page_iterate(size_t start, size_t end, page_cb_t pre, page_cb_t post)
{
page_entry_t* entry[PAGE_MAP_LEVELS];
page_entry_t* last[PAGE_MAP_LEVELS];
if (BUILTIN_EXPECT(start >= end, 0))
return -EINVAL;
// setup subtree boundaries
int i;
for (i=0; i<PAGE_MAP_LEVELS; i++) {
entry[i] = virt_to_entry(start, i);
last[i] = virt_to_entry(end - 1, i);
}
// nested iterator function (sees the scope of parent)
int iterate(int level) {
int ret;
while (entry[level] <= last[level]) {
if (pre) { // call pre-order callback if available
ret = pre(entry[level], level);
if (BUILTIN_EXPECT(ret < 0, 0))
return ret;
}
// recurse if
// - we are not in the PGT
// - and the inferior page table is present
// - and the current entry represents no huge page
if (level && (*entry[level] & PG_PRESENT) && !(*entry[level] & PG_PSE)) {
ret = iterate(level-1);
if (BUILTIN_EXPECT(ret < 0, 0))
return ret;
}
// or skip the entries we've omit...
else {
size_t next = (size_t) (entry[level]+1);
for (i=0; i<level; i++)
entry[i] = (page_entry_t*) (next << (PAGE_MAP_BITS*(level-i)));
}
if (post) { // call post-order callback if available
ret = post(entry[level], level);
if (BUILTIN_EXPECT(ret < 0, 0))
return ret;
}
// return if we've reached the end of table
entry[level]++;
if (((size_t) entry[level] & ~PAGE_MASK) == 0x000) // TODO
return 0;
}
return 0;
}
// we start at the highest order table (PML4 or PGD)
return iterate(PAGE_MAP_LEVELS-1);
}
void page_dump(size_t from, size_t to)
void page_dump(size_t mask)
{
task_t* task = per_core(current_task);
mask |= PG_PRESENT;
size_t flags = 0;
size_t start = 0;
size_t end;
void print(size_t start, size_t end, size_t flags) {
size_t size = end - start;
kprintf("%#018lx-%#018lx %#14x %c%c%c%c%c%c\n", start, end, size,
(flags & PG_XD) ? '-' : 'x',
(flags & PG_GLOBAL) ? 'g' : '-',
(flags & PG_DIRTY) ? 'd' : '-',
(flags & PG_ACCESSED) ? 'a' : '-',
(flags & PG_USER) ? 'u' : '-',
(flags & PG_RW) ? 'w' : '-'
(mask & flags & PG_XD) ? '-' : 'x',
(mask & flags & PG_GLOBAL) ? 'g' : '-',
(mask & flags & PG_DIRTY) ? 'd' : '-',
(mask & flags & PG_ACCESSED) ? 'a' : '-',
(mask & flags & PG_USER) ? 'u' : '-',
(mask & flags & PG_RW) ? 'w' : '-'
);
}
int cb(page_entry_t* entry, int level) {
size_t end;
void traverse(int level, page_entry_t* entry) {
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
for (; entry != stop; entry++) {
if (*entry & PG_PRESENT) {
if (level && !(*entry & PG_PSE)) // do "pre-order" traversal
// TODO: handle "inheritance" of page table flags (see get_page_flags())
traverse(level-1, get_child_entry(entry));
else {
if (!flags) {
flags = *entry & ~PAGE_MASK & mask;
start = entry_to_virt(entry, level);
}
else if (flags != (*entry & ~PAGE_MASK & mask)) {
end = entry_to_virt(entry, level);
print(start, end, flags);
if (*entry & PG_PRESENT) {
if (!level || (*entry & PG_PSE)) {
if (!flags) {
flags = *entry & PAGE_FLAGS_MASK;
start = entry_to_virt(entry, level);
}
else if (flags != (*entry & PAGE_FLAGS_MASK)) {
end = entry_to_virt(entry, level);
print(start, end, flags);
start = end;
flags = *entry & PAGE_FLAGS_MASK;
flags = *entry & ~PAGE_MASK & mask;
start = end;
}
}
}
else if (flags) {
end = entry_to_virt(entry, level);
print(start, end, flags);
flags = 0;
}
}
else if (flags) {
end = entry_to_virt(entry, level);
print(start, end, flags);
flags = 0;
}
return 0;
}
// lock tables
@ -176,18 +121,18 @@ void page_dump(size_t from, size_t to)
spinlock_irqsave_lock(&task->page_lock);
kprintf("%-18s-%18s %14s %-6s\n", "start", "end", "size", "flags"); // header
page_iterate(from, to, cb, NULL);
traverse(PAGE_MAP_LEVELS-1, current_map);
if (flags) // workaround to print last mapping
print(start, 0L, flags);
// unlock tables
spinlock_unlock(&kslock);
spinlock_irqsave_unlock(&task->page_lock);
// workaround to print last mapping
if (flags)
print(start, PAGE_FLOOR(to), flags);
spinlock_unlock(&kslock);
}
void page_stats(size_t from, size_t to, int reset)
void page_stats(int reset)
{
task_t* task = per_core(current_task);
@ -197,40 +142,42 @@ void page_stats(size_t from, size_t to, int reset)
[12] = "exec disabled" // IA-32e / PAE bits
};
int cb(page_entry_t* entry, int level) {
if (*entry & PG_PRESENT) {
if (!level || (*entry & PG_PSE)) {
// increment stat counters
int i;
for (i=0; i<12; i++) { // IA-32 "legacy" bits
if (*entry & (1 << i))
stats[i]++;
}
for (i=0; i<1; i++) { // IA-32e / PAE bits
if (*entry & (1 << (63-i)))
stats[i+PAGE_BITS]++;
}
}
void traverse(int level, page_entry_t* entry) {
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
for (; entry != stop; entry++) {
if (*entry & PG_PRESENT) {
if (level && !(*entry & PG_PSE))
traverse(level-1, get_child_entry(entry));
else {
// increment stat counters
int i;
for (i=0; i<12; i++) { // IA-32 "legacy" bits
if (*entry & (1 << i))
stats[i]++;
}
for (i=0; i<1; i++) { // IA-32e / PAE bits
if (*entry & (1 << (63-i)))
stats[i+PAGE_BITS]++;
}
// reset accessed and dirty bits
if (reset) {
*entry &= ~(PG_ACCESSED|PG_DIRTY);
tlb_flush_one_page(entry_to_virt(entry, level)); // see IA32 Vol3 4.8
if (reset) { // reset accessed and dirty bits
*entry &= ~(PG_ACCESSED|PG_DIRTY);
tlb_flush_one_page(entry_to_virt(entry, level)); // see IA32 Vol3 4.8
}
}
}
}
return 0;
}
// lock tables
spinlock_lock(&kslock);
spinlock_irqsave_lock(&task->page_lock);
page_iterate(from, to, cb, NULL);
traverse(PAGE_MAP_LEVELS-1, current_map);
// unlock tables
spinlock_unlock(&kslock);
spinlock_irqsave_unlock(&task->page_lock);
spinlock_unlock(&kslock);
kprintf("total pages:\n");
for (i=0; i<13; i++)
@ -241,64 +188,77 @@ int copy_page_map(task_t* new_task, int copy)
{
task_t* cur_task = per_core(current_task);
size_t phyaddr;
size_t ret;
int traverse(int level, page_entry_t* src, page_entry_t* dest) {
page_entry_t* stop = src + PAGE_MAP_ENTRIES;
for (; src != stop; src++, dest++) {
if (*src & PG_PRESENT) {
if (*src & PG_USER) { // deep copy page frame
kprintf("copy_page_map: deep src = %p, dest = %p, level = %u\n", src, dest, level); // TODO: remove
int cb(page_entry_t* src, int level) {
page_entry_t* dest = src - (1L<<36); // TODO
if (*src & PG_PRESENT) {
if (*src & PG_USER) {
if (copy) { // deep copy page frame
size_t phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
return -ENOMEM;
atomic_int32_inc(&cur_task->user_usage);
copy_page(phyaddr, *src & ~PAGE_FLAGS_MASK);
*dest = phyaddr | (*src & PAGE_FLAGS_MASK);
copy_page(phyaddr, *src & PAGE_MASK);
*dest = phyaddr | (*src & ~PAGE_MASK);
// do "pre-order" traversal
if (level && !(*src & PG_PSE)) {
int ret = traverse(level-1, get_child_entry(src),
get_child_entry(dest));
if (ret < 0)
return ret;
}
}
else // shallow copy kernel table
*dest = *src;
}
else // shallow copy kernel table
*dest = *src;
else // table does not exists
*dest = 0;
}
return 0;
}
// fixed mapping for paging structures
page_map_t *current = (page_map_t*) PAGE_MAP_PML4;
page_map_t *new = palloc(PAGE_SIZE, 0);
if (BUILTIN_EXPECT(!new, 0))
page_entry_t* src_virt = (copy) ? cur_task->page_map : get_boot_page_map();
page_entry_t* dest_virt = (page_entry_t*) palloc(PAGE_SIZE, MAP_KERNEL_SPACE);
if (BUILTIN_EXPECT(!dest_virt, 0))
return -ENOMEM;
phyaddr = virt_to_phys((size_t) new);
size_t src_phys = virt_to_phys((size_t) src_virt);
size_t dest_phys = virt_to_phys((size_t) dest_virt);
// lock tables
spinlock_lock(&kslock);
spinlock_irqsave_lock(&cur_task->page_lock);
// map new table
current->entries[PAGE_MAP_ENTRIES-2] = phyaddr | PG_TABLE;
kprintf("copy_page_map: copy = %u, src = %p (%p, %p), dest = %p (%p, %p)\n",
copy, src_virt, src_phys, src_map, dest_virt, dest_phys, dest_map); // TODO: remove
// temporary map src and dest tables
current_map[PAGE_MAP_ENTRIES-2] = (src_phys & PAGE_MASK) | (PG_TABLE & ~PG_RW); // source is read-only!
current_map[PAGE_MAP_ENTRIES-3] = (dest_phys & PAGE_MASK) | PG_TABLE;
tlb_flush(); // ouch :(
// setup self reference for new table
new->entries[PAGE_MAP_ENTRIES-1] = phyaddr | PG_TABLE;
int ret = traverse(PAGE_MAP_LEVELS-1, src_map, dest_map);
ret = page_iterate(0, PAGE_MAP_PGT - (1L<<39), cb, NULL); // TODO: check boundaries
// setup self reference for new table
dest_map[PAGE_MAP_ENTRIES-1] = dest_phys | PG_TABLE;
// unmap temporary tables
current_map[PAGE_MAP_ENTRIES-2] = 0;
current_map[PAGE_MAP_ENTRIES-3] = 0;
tlb_flush(); // ouch :(
// unlock tables
spinlock_irqsave_unlock(&cur_task->page_lock);
spinlock_unlock(&kslock);
// unmap new tables
current->entries[PAGE_MAP_ENTRIES-2] = 0;
tlb_flush(); // ouch :(
new_task->page_map = new;
kprintf("copy_page_map: allocated %i page tables\n", ret); // TODO: remove
new_task->page_map = dest_virt;
return ret;
}
@ -307,15 +267,21 @@ int drop_page_map(void)
{
task_t* task = per_core(current_task);
int cb(page_entry_t* entry, int level) {
if (*entry & PG_USER) {
kprintf("drop_page_map:cb: entry = %p, level = %u\n", entry, level); // TODO: remove
void traverse(int level, page_entry_t* entry) {
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
for (; entry != stop; entry++) {
if (*entry & PG_PRESENT) {
// do "post-order" traversal
if (level && !(*entry & PG_PSE))
traverse(level-1, get_child_entry(entry));
if (put_page(*entry & ~PAGE_FLAGS_MASK))
atomic_int32_dec(&task->user_usage);
if (*entry & PG_USER) {
kprintf("drop_page_map: entry = %p. level = %u\n", entry, level);
if (put_page(*entry & PAGE_MASK))
atomic_int32_dec(&task->user_usage);
}
}
}
return 0;
}
kprintf("drop_page_map: task = %u\n", task->id); // TODO: remove
@ -329,9 +295,15 @@ int drop_page_map(void)
// lock tables
spinlock_irqsave_lock(&task->page_lock);
page_iterate(0, PAGE_MAP_PGT, NULL, cb);
kprintf("user_usage: %u (task = %u)\n", atomic_int32_read(&task->user_usage), task->id);
pfree(task->page_map, PAGE_SIZE);
traverse(PAGE_MAP_LEVELS-1, current_map);
put_page((size_t) task->page_map);
// we replace the page table
task->page_map = get_boot_page_map();
tlb_flush();
// unlock tables
spinlock_irqsave_unlock(&task->page_lock);
@ -339,54 +311,77 @@ int drop_page_map(void)
return 0;
}
static int set_page_flags(size_t viraddr, uint32_t npages, int flags)
int set_page_flags(size_t viraddr, uint32_t npages, int flags)
{
task_t* task = per_core(current_task);
page_entry_t* first[PAGE_MAP_LEVELS];
page_entry_t* last[PAGE_MAP_LEVELS];
size_t bits = page_bits(flags);
size_t start = viraddr;
size_t end = start + npages * PAGE_SIZE;
int cb(page_entry_t* entry, int level) {
if (level) {
if (flags & MAP_USER_SPACE)
*entry |= PG_USER;
void traverse(int level, page_entry_t* entry) {
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
for (; entry != stop; entry++) {
if (entry < last[level] && entry >= first[level]) {
if ((*entry & PG_PRESENT) && !(*entry & PG_PSE)) {
if (level) {
if (flags & MAP_USER_SPACE)
*entry |= PG_USER;
#ifdef CONFIG_X86_64
if (flags & MAP_CODE)
*entry &= ~PG_XD;
#endif
// do "pre-order" traversal
traverse(level-1, get_child_entry(entry));
}
else
*entry = (*entry & PAGE_MASK) | bits;
tlb_flush_one_page(entry_to_virt(entry, level));
}
}
}
else
*entry = (*entry & ~PAGE_FLAGS_MASK) | bits;
tlb_flush_one_page(entry_to_virt(entry, level));
return 0;
}
// check assertions
if (BUILTIN_EXPECT(start < KERNEL_SPACE && end >= KERNEL_SPACE, 0))
return 0;
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return 0;
// calc page tree boundaries
int i;
for (i=0; i<PAGE_MAP_LEVELS; i++) {
first[i] = virt_to_entry(start, i);
last[i] = virt_to_entry(end - 1, i) + 1; // exclusive
}
// lock tables
if (viraddr < KERNEL_SPACE)
if (start < KERNEL_SPACE)
spinlock_lock(&kslock);
else
if (end >= KERNEL_SPACE)
spinlock_irqsave_lock(&task->page_lock);
int ret = page_iterate(start, end, cb, NULL);
traverse(PAGE_MAP_LEVELS-1, current_map);
// unlock tables
if (viraddr < KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->page_lock);
if (start < KERNEL_SPACE)
spinlock_unlock(&kslock);
if (end >= KERNEL_SPACE)
spinlock_irqsave_unlock(&task->page_lock);
return ret;
return 0;
}
size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags)
{
task_t* task = per_core(current_task);
page_entry_t* first[PAGE_MAP_LEVELS];
page_entry_t* last[PAGE_MAP_LEVELS];
// TODO: this behaviour should be deprecated
if (!viraddr) {
int vma_flags = VMA_HEAP;
if (flags & MAP_USER_SPACE)
@ -399,57 +394,66 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
size_t start = viraddr;
size_t end = start + npages * PAGE_SIZE;
int cb(page_entry_t* entry, int level) {
if (level) { // PGD, PDPT, PML4..
if (*entry & PG_PRESENT) {
if (flags & MAP_USER_SPACE) {
/*
* We are changing page map entries which cover
* the kernel. So before altering them we need to
* make a private copy for the task
*/
if (!(*entry & PG_USER)) {
int traverse(int level, page_entry_t* entry) {
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
for (; entry != stop; entry++) {
if (entry < last[level] && entry >= first[level]) {
if (level) { // PGD, PDPT, PML4..
if (*entry & PG_PRESENT) {
if ((flags & MAP_USER_SPACE) && !(*entry & PG_USER)) {
/* We are changing page map entries which cover
* the kernel. So before altering them we need to
* make a private copy for the task */
size_t phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
return -ENOMEM;
atomic_int32_inc(&task->user_usage);
copy_page(phyaddr, *entry & PAGE_MASK);
*entry = phyaddr | (*entry & ~PAGE_MASK) | PG_USER;
/* We just need to flush the table itself.
* TLB entries for the kernel remain valid
* because we've not changed them. */
tlb_flush_one_page(entry_to_virt(entry, 0));
}
}
else {
size_t phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
return -ENOMEM;
atomic_int32_inc(&task->user_usage);
if (flags & MAP_USER_SPACE)
atomic_int32_inc(&task->user_usage);
copy_page(phyaddr, *entry & ~PAGE_FLAGS_MASK);
*entry = phyaddr | (*entry & PAGE_FLAGS_MASK) | PG_USER;
*entry = phyaddr | bits;
/*
* We just need to flush the table itself.
* TLB entries for the kernel remain valid
* because we've not changed them.
*/
tlb_flush_one_page(entry_to_virt(entry, 0));
memset(get_child_entry(entry), 0x00, PAGE_SIZE); // fill with zeros
}
// do "pre-order" traversal if no hugepage
if (!(*entry & PG_PSE)) {
int ret = traverse(level-1, get_child_entry(entry));
if (ret < 0)
return ret;
}
}
else { // PGT
if ((*entry & PG_PRESENT) && !(flags & MAP_REMAP))
return -EINVAL;
*entry = phyaddr | bits;
if (flags & MAP_USER_SPACE)
atomic_int32_inc(&task->user_usage);
if (flags & MAP_REMAP)
tlb_flush_one_page(entry_to_virt(entry, level));
phyaddr += PAGE_SIZE;
}
}
else {
size_t phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
return -ENOMEM;
atomic_int32_inc(&task->user_usage);
*entry = phyaddr | bits;
}
}
else { // PGT
if ((*entry & PG_PRESENT) && !(flags & MAP_REMAP))
return -EINVAL;
*entry = phyaddr | bits;
if (flags & MAP_USER_SPACE)
atomic_int32_inc(&task->user_usage);
if (flags & MAP_REMAP)
tlb_flush_one_page(entry_to_virt(entry, level));
phyaddr += PAGE_SIZE;
}
return 0;
@ -457,92 +461,109 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
kprintf("map_region: map %u pages from %#lx to %#lx with flags: %#x\n", npages, viraddr, phyaddr, flags); // TODO: remove
// check assertions
if (BUILTIN_EXPECT(start < KERNEL_SPACE && end >= KERNEL_SPACE, 0))
return 0;
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return 0;
if (BUILTIN_EXPECT(!viraddr, 0))
return 0;
// calc page tree boundaries
int i;
for (i=0; i<PAGE_MAP_LEVELS; i++) {
first[i] = virt_to_entry(start, i);
last[i] = virt_to_entry(end - 1, i) + 1; // exclusive
}
// lock tables
if (viraddr < KERNEL_SPACE)
if (start < KERNEL_SPACE)
spinlock_lock(&kslock);
else
if (end >= KERNEL_SPACE)
spinlock_irqsave_lock(&task->page_lock);
int ret = page_iterate(start, end, cb, NULL);
int ret = traverse(PAGE_MAP_LEVELS-1, current_map);
// unlock tables
if (viraddr < KERNEL_SPACE)
if (start < KERNEL_SPACE)
spinlock_unlock(&kslock);
else
if (end >= KERNEL_SPACE)
spinlock_irqsave_unlock(&task->page_lock);
return (ret == 0) ? viraddr : 0;
return (ret) ? 0 : viraddr;
}
int unmap_region(size_t viraddr, uint32_t npages)
{
task_t* task = per_core(current_task);
page_entry_t* first[PAGE_MAP_LEVELS];
page_entry_t* last[PAGE_MAP_LEVELS];
size_t start = viraddr;
size_t end = start + npages * PAGE_SIZE;
kprintf("unmap_region: unmap %u pages from %#lx\n", npages, viraddr); // TODO: remove
int cb(page_entry_t* entry, int level) {
if (level) { // PGD, PDPT, PML4
page_map_t* map = (page_map_t*) entry_to_virt(entry, 0);
int used = 0;
/** @return number of page table entries which a present */
int traverse(int level, page_entry_t* entry) {
int used = 0;
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
for (; entry != stop; entry++) {
if (entry < last[level] && entry >= first[level]) {
if (level) { // PGD, PDPT, PML4
if ((*entry & PG_PRESENT) && !(*entry & PG_PSE)) {
// do "post-order" traversal if table is present and no hugepage
if (traverse(level-1, get_child_entry(entry)))
used++;
else { // child table is empty => delete it
*entry &= ~PG_PRESENT;
tlb_flush_one_page(entry_to_virt(entry, 0));
int i;
for (i=0; i<PAGE_MAP_ENTRIES; i++) {
if (map->entries[i] & PG_PRESENT)
if (*entry & PG_USER) {
if (put_page(*entry & PAGE_MASK))
atomic_int32_dec(&task->user_usage);
}
}
}
}
else { // PGT
*entry &= ~PG_PRESENT;
tlb_flush_one_page(entry_to_virt(entry, level));
if (*entry & PG_USER)
atomic_int32_dec(&task->user_usage);
}
}
else {
if (*entry & PG_PRESENT)
used++;
}
if (!used) {
*entry &= ~PG_PRESENT;
tlb_flush_one_page(entry_to_virt(entry, 0));
if (put_page(*entry & ~PAGE_FLAGS_MASK))
atomic_int32_dec(&task->user_usage);
}
}
else { // PGT
*entry = 0;
tlb_flush_one_page(entry_to_virt(entry, level));
if (viraddr >= KERNEL_SPACE)
atomic_int32_dec(&task->user_usage);
}
return 0;
return used;
}
// check assertions
if (BUILTIN_EXPECT(start < KERNEL_SPACE && end >= KERNEL_SPACE, 0))
return 0;
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return 0;
// calc page tree boundaries
int i;
for (i=0; i<PAGE_MAP_LEVELS; i++) {
first[i] = virt_to_entry(start, i);
last[i] = virt_to_entry(end - 1, i) + 1; // exclusive
}
// lock tables
if (viraddr < KERNEL_SPACE)
if (start < KERNEL_SPACE)
spinlock_lock(&kslock);
else
if (end >= KERNEL_SPACE)
spinlock_irqsave_lock(&task->page_lock);
int ret = page_iterate(start, end, NULL, cb);
traverse(PAGE_MAP_LEVELS-1, current_map);
// unlock tables
if (viraddr < KERNEL_SPACE)
if (start < KERNEL_SPACE)
spinlock_unlock(&kslock);
else
if (end > KERNEL_SPACE)
spinlock_irqsave_unlock(&task->page_lock);
return ret;
return 0;
}
static void pagefault_handler(struct state *s)
@ -597,7 +618,8 @@ int arch_paging_init(void)
irq_install_handler(14, pagefault_handler);
// setup recursive paging
boot_pml4.entries[PAGE_MAP_ENTRIES-1] = (size_t) &boot_pml4 | PG_TABLE;
page_entry_t* boot_map = get_boot_page_map();
boot_map[PAGE_MAP_ENTRIES-1] = (size_t) boot_map | PG_TABLE;
/*
* In longmode the kernel is already maped into the kernel space (see entry64.asm)
@ -658,3 +680,4 @@ int arch_paging_init(void)
return 0;
}

View file

@ -90,8 +90,8 @@ typedef struct task {
atomic_int32_t user_usage;
/// locks access to all page maps with PG_USER flag set
spinlock_irqsave_t page_lock;
/// pointer to page directory (32bit) or page map level 4 (64bit) table respectively
page_map_t* page_map;
/// virtual address of page map for CR3
page_entry_t* page_map;
/// lock for the VMA_list
spinlock_t vma_lock;
/// list of VMAs