another rewrite of the paging code without nested functions
This commit is contained in:
parent
6e8ecad91f
commit
3203d53a83
4 changed files with 337 additions and 342 deletions
|
@ -34,59 +34,44 @@
|
|||
|
||||
/// Page offset bits
|
||||
#define PAGE_BITS 12
|
||||
/// The size of a single page in bytes
|
||||
#define PAGE_SIZE ( 1L << PAGE_BITS)
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/// Number of page map indirections
|
||||
#define PAGE_MAP_LEVELS 2
|
||||
/// Page map bits
|
||||
#define PAGE_MAP_BITS 10
|
||||
/// Total operand width in bits
|
||||
#define BITS 32
|
||||
/// Linear/virtual address width
|
||||
#define VIRT_BITS BITS
|
||||
/// Physical address width (we dont support PAE)
|
||||
#define PHYS_BITS BITS
|
||||
#elif defined(CONFIG_X86_64)
|
||||
/// Number of page map indirections
|
||||
#define PAGE_MAP_LEVELS 4
|
||||
/// Page map bits
|
||||
#define PAGE_MAP_BITS 9
|
||||
#define PAGE_MAP_BITS 10
|
||||
/// Number of page map indirections
|
||||
#define PAGE_MAP_LEVELS 2
|
||||
/// Mask the page address without page map flags
|
||||
#define PAGE_MASK 0xFFFFF000
|
||||
#elif defined(CONFIG_X86_64)
|
||||
/// Total operand width in bits
|
||||
#define BITS 64
|
||||
/// Linear/virtual address width
|
||||
#define VIRT_BITS 48
|
||||
/// Physical address width (maximum value)
|
||||
#define PHYS_BITS 52
|
||||
/// Page map bits
|
||||
#define PAGE_MAP_BITS 9
|
||||
/// Number of page map indirections
|
||||
#define PAGE_MAP_LEVELS 4
|
||||
/// Mask the page address without page map flags
|
||||
#define PAGE_MASK 0x000FFFFFFFFFF000
|
||||
#endif
|
||||
|
||||
/// The size of a single page in bytes
|
||||
#define PAGE_SIZE ( 1L << PAGE_BITS)
|
||||
/// The number of entries in a page map table
|
||||
#define PAGE_MAP_ENTRIES ( 1L << PAGE_MAP_BITS)
|
||||
/// Mask the page address
|
||||
#define PAGE_MASK (-1L << PAGE_BITS)
|
||||
/// Mask the entry in a page table
|
||||
#define PAGE_ENTRY_MASK (-1L << (PAGE_BITS-PAGE_MAP_BITS))
|
||||
/// Mask for all flag bits in a page map entry (including ignored bits)
|
||||
#define PAGE_FLAGS_MASK (~(-1L << PAGE_BITS) | (-1L << VIRT_BITS))
|
||||
#define PAGE_MAP_ENTRIES (1L << PAGE_MAP_BITS)
|
||||
|
||||
/// Align to next page
|
||||
#define PAGE_FLOOR(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
|
||||
/// Align to page
|
||||
#define PAGE_CEIL(addr) ( (addr) & PAGE_MASK)
|
||||
/// Sign extension to get a valid canonical address (hack: by using aritmethic shifts)
|
||||
#define VIRT_SEXT(addr) ((ssize_t) addr << (BITS-VIRT_BITS) >> (BITS-VIRT_BITS))
|
||||
|
||||
// base addresses of page map tables
|
||||
#ifdef CONFIG_X86_32
|
||||
#define PAGE_MAP_PGD 0xFFFFF000
|
||||
#define PAGE_MAP_PGT 0xFFC00000
|
||||
#elif defined(CONFIG_X86_64)
|
||||
#define PAGE_MAP_PML4 0xFFFFFFFFFFFFF000
|
||||
#define PAGE_MAP_PDPT 0xFFFFFFFFFFE00000
|
||||
#define PAGE_MAP_PGD 0xFFFFFFFFC0000000
|
||||
#define PAGE_MAP_PGT 0xFFFFFF8000000000
|
||||
#endif
|
||||
|
||||
/// Page is present
|
||||
#define PG_PRESENT (1 << 0)
|
||||
|
@ -125,28 +110,12 @@
|
|||
/// This is a whole set of flags (PRESENT,RW,GLOBAL) for kernelspace pages
|
||||
#define PG_PAGE (PG_PRESENT|PG_RW|PG_GLOBAL|PG_XD)
|
||||
|
||||
/** @brief A single entry in a page map */
|
||||
/** @brief A single entry in a page map
|
||||
*
|
||||
* Usually used as a pointer to a mapped page map entry.
|
||||
*/
|
||||
typedef size_t page_entry_t;
|
||||
|
||||
/** @brief General page map structure
|
||||
*
|
||||
* This page map structure is a general type for all indirecton levels.
|
||||
* As all page map levels containing the same amount of entries.
|
||||
* All page maps must be page aligned!
|
||||
*/
|
||||
typedef struct page_map {
|
||||
page_entry_t entries[PAGE_MAP_ENTRIES];
|
||||
} __attribute__ ((aligned (PAGE_SIZE))) page_map_t;
|
||||
|
||||
/** @brief A callback type for the page map iterator
|
||||
*
|
||||
* @param entry A pointer to the current page map entry
|
||||
* @return
|
||||
* - 0 if we want to skip underlying page tables
|
||||
* - >0 if want to recurse into underlying page tables
|
||||
*/
|
||||
typedef int (*page_cb_t)(page_entry_t* entry, int level);
|
||||
|
||||
/** @brief Converts a virtual address to a physical
|
||||
*
|
||||
* @param viraddr Virtual address to convert
|
||||
|
@ -227,7 +196,7 @@ int arch_paging_init(void);
|
|||
*
|
||||
* @return Returns the address of the boot task's page dir array.
|
||||
*/
|
||||
page_map_t* get_boot_page_map(void);
|
||||
page_entry_t* get_boot_page_map(void);
|
||||
|
||||
/** @brief Setup a new page directory for a new user-level task
|
||||
*
|
||||
|
@ -240,10 +209,10 @@ page_map_t* get_boot_page_map(void);
|
|||
*/
|
||||
int copy_page_map(struct task* task, int copy);
|
||||
|
||||
/** @brief Delete all page map structures of the current task
|
||||
/** @brief Deletes all user page map structures of the current task
|
||||
*
|
||||
* Puts PML4, PDPT, PGD, PGT tables back to buffer and
|
||||
* sets the task's page map pointer to NULL
|
||||
* All allocated physical page frames are released in the bitmap
|
||||
* The task->page_map is replaces by the boot_page_map()
|
||||
*
|
||||
* @return
|
||||
* - 0 on success
|
||||
|
@ -264,15 +233,18 @@ int drop_page_map(void);
|
|||
* - 0 on success
|
||||
* - -EINVAL (-22) on failure.
|
||||
*/
|
||||
int change_page_permissions(size_t start, size_t end, uint32_t flags);
|
||||
int set_page_flags(size_t viraddr, uint32_t npages, int flags);
|
||||
|
||||
/** @brief Dump mapped memory */
|
||||
void page_dump(size_t start, size_t end);
|
||||
/** @brief Dump mapped memory
|
||||
*
|
||||
* @param mask Only watch for changes in these page flags (PG_PRESENT is set by default)
|
||||
*/
|
||||
void page_dump(size_t mask);
|
||||
|
||||
/** @brief Print stats about page flags
|
||||
*
|
||||
* @param reset Reset accessed and dirty bits in page tables
|
||||
*/
|
||||
void page_stats(size_t start, size_t end, int reset);
|
||||
void page_stats(int reset);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -50,7 +50,7 @@ size_t* get_current_stack(void)
|
|||
#endif
|
||||
|
||||
// use new page table
|
||||
write_cr3(virt_to_phys((size_t)curr_task->page_map));
|
||||
write_cr3(virt_to_phys((size_t) curr_task->page_map));
|
||||
|
||||
return curr_task->last_stack_pointer;
|
||||
}
|
||||
|
|
|
@ -37,138 +37,83 @@
|
|||
* Virtual Memory Layout of the standard configuration
|
||||
* (1 GB kernel space)
|
||||
*
|
||||
* 0x000000000000 - 0x0000000FFFFF: reserved for IO devices (16MB)
|
||||
* 0x000000100000 - 0x00000DEADFFF: Kernel (size depends on the configuration) (221MB)
|
||||
* 0x00000DEAE000 - 0x00003FFFFFFF: Kernel heap
|
||||
* 0xFF0000000000 - 0xFF7FFFFFFFFF: Paging structures for copying a page map (max 512GB)
|
||||
* 0xFF8000000000 - 0xFFFFFFFFFFFF: Paging structures are mapped in this region (max 512GB)
|
||||
* 0x0000000000000000 - 0x00000000000FFFFF: reserved for IO devices (16MB)
|
||||
* 0x0000000000100000 - 0x00000000008C2000: Kernel (~8MB)
|
||||
* 0x00000000008c3000 - 0x0000000000973000: Init Ramdisk (~2MB)
|
||||
*
|
||||
* 0x0001000000000000 - 0xffff000000000000: Memory hole (48 bit VAS limitation)
|
||||
*
|
||||
* 0xFFFFFE8000000000 - 0xFFFFFEFFFFFFFFFF: Page map dest for copy_page_map() (512GB)
|
||||
* 0xFFFFFF0000000000 - 0xFFFFFF7FFFFFFFFF: Page map source for copy_page_map() (512GB)
|
||||
* 0xFFFFFF8000000000 - 0xFFFFFFFFFFFFFFFF: Self-referenced page maps of the current task (512GB)
|
||||
*/
|
||||
|
||||
/// Boot task's page map
|
||||
extern page_map_t boot_pml4;
|
||||
/// Boot task's page map (setup by entryXX.asm)
|
||||
extern page_entry_t boot_pml4[PAGE_MAP_ENTRIES];
|
||||
|
||||
/// Kernel space page map lock
|
||||
static spinlock_t kslock = SPINLOCK_INIT;
|
||||
|
||||
/// Mapping of self referenced page map (at the end of the VAS)
|
||||
static page_entry_t* const current_map = (page_entry_t*) (-1*PAGE_SIZE);
|
||||
static page_entry_t* const src_map = (page_entry_t*) (-2*PAGE_SIZE);
|
||||
static page_entry_t* const dest_map = (page_entry_t*) (-3*PAGE_SIZE);
|
||||
|
||||
*
|
||||
|
||||
|
||||
page_map_t* get_boot_page_map(void)
|
||||
page_entry_t* get_boot_page_map(void)
|
||||
{
|
||||
return &boot_pml4;
|
||||
return boot_pml4;
|
||||
}
|
||||
|
||||
/** @brief Recursive traversal through the page map tree
|
||||
*
|
||||
* @param start The first address whose page map entry we will call on
|
||||
* @param end The exclusive end address whose page map entry we will call on
|
||||
* @param pre Callback which is called for every page map entry (pre-order traversal)
|
||||
* @param post Callback which is called for every page map entry (post-order traversal)
|
||||
*/
|
||||
int page_iterate(size_t start, size_t end, page_cb_t pre, page_cb_t post)
|
||||
{
|
||||
page_entry_t* entry[PAGE_MAP_LEVELS];
|
||||
page_entry_t* last[PAGE_MAP_LEVELS];
|
||||
|
||||
if (BUILTIN_EXPECT(start >= end, 0))
|
||||
return -EINVAL;
|
||||
|
||||
// setup subtree boundaries
|
||||
int i;
|
||||
for (i=0; i<PAGE_MAP_LEVELS; i++) {
|
||||
entry[i] = virt_to_entry(start, i);
|
||||
last[i] = virt_to_entry(end - 1, i);
|
||||
}
|
||||
|
||||
// nested iterator function (sees the scope of parent)
|
||||
int iterate(int level) {
|
||||
int ret;
|
||||
while (entry[level] <= last[level]) {
|
||||
if (pre) { // call pre-order callback if available
|
||||
ret = pre(entry[level], level);
|
||||
if (BUILTIN_EXPECT(ret < 0, 0))
|
||||
return ret;
|
||||
}
|
||||
|
||||
// recurse if
|
||||
// - we are not in the PGT
|
||||
// - and the inferior page table is present
|
||||
// - and the current entry represents no huge page
|
||||
if (level && (*entry[level] & PG_PRESENT) && !(*entry[level] & PG_PSE)) {
|
||||
ret = iterate(level-1);
|
||||
if (BUILTIN_EXPECT(ret < 0, 0))
|
||||
return ret;
|
||||
}
|
||||
// or skip the entries we've omit...
|
||||
else {
|
||||
size_t next = (size_t) (entry[level]+1);
|
||||
for (i=0; i<level; i++)
|
||||
entry[i] = (page_entry_t*) (next << (PAGE_MAP_BITS*(level-i)));
|
||||
}
|
||||
|
||||
if (post) { // call post-order callback if available
|
||||
ret = post(entry[level], level);
|
||||
if (BUILTIN_EXPECT(ret < 0, 0))
|
||||
return ret;
|
||||
}
|
||||
|
||||
// return if we've reached the end of table
|
||||
entry[level]++;
|
||||
if (((size_t) entry[level] & ~PAGE_MASK) == 0x000) // TODO
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// we start at the highest order table (PML4 or PGD)
|
||||
return iterate(PAGE_MAP_LEVELS-1);
|
||||
}
|
||||
|
||||
void page_dump(size_t from, size_t to)
|
||||
void page_dump(size_t mask)
|
||||
{
|
||||
task_t* task = per_core(current_task);
|
||||
|
||||
mask |= PG_PRESENT;
|
||||
|
||||
size_t flags = 0;
|
||||
size_t start = 0;
|
||||
size_t end;
|
||||
|
||||
void print(size_t start, size_t end, size_t flags) {
|
||||
size_t size = end - start;
|
||||
|
||||
kprintf("%#018lx-%#018lx %#14x %c%c%c%c%c%c\n", start, end, size,
|
||||
(flags & PG_XD) ? '-' : 'x',
|
||||
(flags & PG_GLOBAL) ? 'g' : '-',
|
||||
(flags & PG_DIRTY) ? 'd' : '-',
|
||||
(flags & PG_ACCESSED) ? 'a' : '-',
|
||||
(flags & PG_USER) ? 'u' : '-',
|
||||
(flags & PG_RW) ? 'w' : '-'
|
||||
(mask & flags & PG_XD) ? '-' : 'x',
|
||||
(mask & flags & PG_GLOBAL) ? 'g' : '-',
|
||||
(mask & flags & PG_DIRTY) ? 'd' : '-',
|
||||
(mask & flags & PG_ACCESSED) ? 'a' : '-',
|
||||
(mask & flags & PG_USER) ? 'u' : '-',
|
||||
(mask & flags & PG_RW) ? 'w' : '-'
|
||||
);
|
||||
}
|
||||
|
||||
int cb(page_entry_t* entry, int level) {
|
||||
size_t end;
|
||||
void traverse(int level, page_entry_t* entry) {
|
||||
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
|
||||
for (; entry != stop; entry++) {
|
||||
if (*entry & PG_PRESENT) {
|
||||
if (level && !(*entry & PG_PSE)) // do "pre-order" traversal
|
||||
// TODO: handle "inheritance" of page table flags (see get_page_flags())
|
||||
traverse(level-1, get_child_entry(entry));
|
||||
else {
|
||||
if (!flags) {
|
||||
flags = *entry & ~PAGE_MASK & mask;
|
||||
start = entry_to_virt(entry, level);
|
||||
}
|
||||
else if (flags != (*entry & ~PAGE_MASK & mask)) {
|
||||
end = entry_to_virt(entry, level);
|
||||
print(start, end, flags);
|
||||
|
||||
if (*entry & PG_PRESENT) {
|
||||
if (!level || (*entry & PG_PSE)) {
|
||||
if (!flags) {
|
||||
flags = *entry & PAGE_FLAGS_MASK;
|
||||
start = entry_to_virt(entry, level);
|
||||
}
|
||||
else if (flags != (*entry & PAGE_FLAGS_MASK)) {
|
||||
end = entry_to_virt(entry, level);
|
||||
print(start, end, flags);
|
||||
start = end;
|
||||
flags = *entry & PAGE_FLAGS_MASK;
|
||||
flags = *entry & ~PAGE_MASK & mask;
|
||||
start = end;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (flags) {
|
||||
end = entry_to_virt(entry, level);
|
||||
print(start, end, flags);
|
||||
flags = 0;
|
||||
}
|
||||
}
|
||||
else if (flags) {
|
||||
end = entry_to_virt(entry, level);
|
||||
print(start, end, flags);
|
||||
flags = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// lock tables
|
||||
|
@ -176,18 +121,18 @@ void page_dump(size_t from, size_t to)
|
|||
spinlock_irqsave_lock(&task->page_lock);
|
||||
|
||||
kprintf("%-18s-%18s %14s %-6s\n", "start", "end", "size", "flags"); // header
|
||||
page_iterate(from, to, cb, NULL);
|
||||
|
||||
traverse(PAGE_MAP_LEVELS-1, current_map);
|
||||
|
||||
if (flags) // workaround to print last mapping
|
||||
print(start, 0L, flags);
|
||||
|
||||
// unlock tables
|
||||
spinlock_unlock(&kslock);
|
||||
spinlock_irqsave_unlock(&task->page_lock);
|
||||
|
||||
// workaround to print last mapping
|
||||
if (flags)
|
||||
print(start, PAGE_FLOOR(to), flags);
|
||||
spinlock_unlock(&kslock);
|
||||
}
|
||||
|
||||
void page_stats(size_t from, size_t to, int reset)
|
||||
void page_stats(int reset)
|
||||
{
|
||||
task_t* task = per_core(current_task);
|
||||
|
||||
|
@ -197,40 +142,42 @@ void page_stats(size_t from, size_t to, int reset)
|
|||
[12] = "exec disabled" // IA-32e / PAE bits
|
||||
};
|
||||
|
||||
int cb(page_entry_t* entry, int level) {
|
||||
if (*entry & PG_PRESENT) {
|
||||
if (!level || (*entry & PG_PSE)) {
|
||||
// increment stat counters
|
||||
int i;
|
||||
for (i=0; i<12; i++) { // IA-32 "legacy" bits
|
||||
if (*entry & (1 << i))
|
||||
stats[i]++;
|
||||
}
|
||||
for (i=0; i<1; i++) { // IA-32e / PAE bits
|
||||
if (*entry & (1 << (63-i)))
|
||||
stats[i+PAGE_BITS]++;
|
||||
}
|
||||
}
|
||||
void traverse(int level, page_entry_t* entry) {
|
||||
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
|
||||
for (; entry != stop; entry++) {
|
||||
if (*entry & PG_PRESENT) {
|
||||
if (level && !(*entry & PG_PSE))
|
||||
traverse(level-1, get_child_entry(entry));
|
||||
else {
|
||||
// increment stat counters
|
||||
int i;
|
||||
for (i=0; i<12; i++) { // IA-32 "legacy" bits
|
||||
if (*entry & (1 << i))
|
||||
stats[i]++;
|
||||
}
|
||||
for (i=0; i<1; i++) { // IA-32e / PAE bits
|
||||
if (*entry & (1 << (63-i)))
|
||||
stats[i+PAGE_BITS]++;
|
||||
}
|
||||
|
||||
// reset accessed and dirty bits
|
||||
if (reset) {
|
||||
*entry &= ~(PG_ACCESSED|PG_DIRTY);
|
||||
tlb_flush_one_page(entry_to_virt(entry, level)); // see IA32 Vol3 4.8
|
||||
if (reset) { // reset accessed and dirty bits
|
||||
*entry &= ~(PG_ACCESSED|PG_DIRTY);
|
||||
tlb_flush_one_page(entry_to_virt(entry, level)); // see IA32 Vol3 4.8
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// lock tables
|
||||
spinlock_lock(&kslock);
|
||||
spinlock_irqsave_lock(&task->page_lock);
|
||||
|
||||
page_iterate(from, to, cb, NULL);
|
||||
traverse(PAGE_MAP_LEVELS-1, current_map);
|
||||
|
||||
// unlock tables
|
||||
spinlock_unlock(&kslock);
|
||||
spinlock_irqsave_unlock(&task->page_lock);
|
||||
spinlock_unlock(&kslock);
|
||||
|
||||
kprintf("total pages:\n");
|
||||
for (i=0; i<13; i++)
|
||||
|
@ -241,64 +188,77 @@ int copy_page_map(task_t* new_task, int copy)
|
|||
{
|
||||
task_t* cur_task = per_core(current_task);
|
||||
|
||||
size_t phyaddr;
|
||||
size_t ret;
|
||||
int traverse(int level, page_entry_t* src, page_entry_t* dest) {
|
||||
page_entry_t* stop = src + PAGE_MAP_ENTRIES;
|
||||
for (; src != stop; src++, dest++) {
|
||||
if (*src & PG_PRESENT) {
|
||||
if (*src & PG_USER) { // deep copy page frame
|
||||
kprintf("copy_page_map: deep src = %p, dest = %p, level = %u\n", src, dest, level); // TODO: remove
|
||||
|
||||
int cb(page_entry_t* src, int level) {
|
||||
page_entry_t* dest = src - (1L<<36); // TODO
|
||||
|
||||
if (*src & PG_PRESENT) {
|
||||
if (*src & PG_USER) {
|
||||
if (copy) { // deep copy page frame
|
||||
size_t phyaddr = get_page();
|
||||
if (BUILTIN_EXPECT(!phyaddr, 0))
|
||||
return -ENOMEM;
|
||||
|
||||
atomic_int32_inc(&cur_task->user_usage);
|
||||
|
||||
copy_page(phyaddr, *src & ~PAGE_FLAGS_MASK);
|
||||
*dest = phyaddr | (*src & PAGE_FLAGS_MASK);
|
||||
copy_page(phyaddr, *src & PAGE_MASK);
|
||||
*dest = phyaddr | (*src & ~PAGE_MASK);
|
||||
|
||||
// do "pre-order" traversal
|
||||
if (level && !(*src & PG_PSE)) {
|
||||
int ret = traverse(level-1, get_child_entry(src),
|
||||
get_child_entry(dest));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
else // shallow copy kernel table
|
||||
*dest = *src;
|
||||
}
|
||||
else // shallow copy kernel table
|
||||
*dest = *src;
|
||||
else // table does not exists
|
||||
*dest = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// fixed mapping for paging structures
|
||||
page_map_t *current = (page_map_t*) PAGE_MAP_PML4;
|
||||
page_map_t *new = palloc(PAGE_SIZE, 0);
|
||||
if (BUILTIN_EXPECT(!new, 0))
|
||||
|
||||
page_entry_t* src_virt = (copy) ? cur_task->page_map : get_boot_page_map();
|
||||
page_entry_t* dest_virt = (page_entry_t*) palloc(PAGE_SIZE, MAP_KERNEL_SPACE);
|
||||
if (BUILTIN_EXPECT(!dest_virt, 0))
|
||||
return -ENOMEM;
|
||||
|
||||
phyaddr = virt_to_phys((size_t) new);
|
||||
size_t src_phys = virt_to_phys((size_t) src_virt);
|
||||
size_t dest_phys = virt_to_phys((size_t) dest_virt);
|
||||
|
||||
// lock tables
|
||||
spinlock_lock(&kslock);
|
||||
spinlock_irqsave_lock(&cur_task->page_lock);
|
||||
|
||||
// map new table
|
||||
current->entries[PAGE_MAP_ENTRIES-2] = phyaddr | PG_TABLE;
|
||||
kprintf("copy_page_map: copy = %u, src = %p (%p, %p), dest = %p (%p, %p)\n",
|
||||
copy, src_virt, src_phys, src_map, dest_virt, dest_phys, dest_map); // TODO: remove
|
||||
|
||||
// temporary map src and dest tables
|
||||
current_map[PAGE_MAP_ENTRIES-2] = (src_phys & PAGE_MASK) | (PG_TABLE & ~PG_RW); // source is read-only!
|
||||
current_map[PAGE_MAP_ENTRIES-3] = (dest_phys & PAGE_MASK) | PG_TABLE;
|
||||
|
||||
tlb_flush(); // ouch :(
|
||||
|
||||
// setup self reference for new table
|
||||
new->entries[PAGE_MAP_ENTRIES-1] = phyaddr | PG_TABLE;
|
||||
int ret = traverse(PAGE_MAP_LEVELS-1, src_map, dest_map);
|
||||
|
||||
ret = page_iterate(0, PAGE_MAP_PGT - (1L<<39), cb, NULL); // TODO: check boundaries
|
||||
// setup self reference for new table
|
||||
dest_map[PAGE_MAP_ENTRIES-1] = dest_phys | PG_TABLE;
|
||||
|
||||
// unmap temporary tables
|
||||
current_map[PAGE_MAP_ENTRIES-2] = 0;
|
||||
current_map[PAGE_MAP_ENTRIES-3] = 0;
|
||||
tlb_flush(); // ouch :(
|
||||
|
||||
// unlock tables
|
||||
spinlock_irqsave_unlock(&cur_task->page_lock);
|
||||
spinlock_unlock(&kslock);
|
||||
|
||||
// unmap new tables
|
||||
current->entries[PAGE_MAP_ENTRIES-2] = 0;
|
||||
tlb_flush(); // ouch :(
|
||||
|
||||
new_task->page_map = new;
|
||||
|
||||
kprintf("copy_page_map: allocated %i page tables\n", ret); // TODO: remove
|
||||
new_task->page_map = dest_virt;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -307,15 +267,21 @@ int drop_page_map(void)
|
|||
{
|
||||
task_t* task = per_core(current_task);
|
||||
|
||||
int cb(page_entry_t* entry, int level) {
|
||||
if (*entry & PG_USER) {
|
||||
kprintf("drop_page_map:cb: entry = %p, level = %u\n", entry, level); // TODO: remove
|
||||
void traverse(int level, page_entry_t* entry) {
|
||||
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
|
||||
for (; entry != stop; entry++) {
|
||||
if (*entry & PG_PRESENT) {
|
||||
// do "post-order" traversal
|
||||
if (level && !(*entry & PG_PSE))
|
||||
traverse(level-1, get_child_entry(entry));
|
||||
|
||||
if (put_page(*entry & ~PAGE_FLAGS_MASK))
|
||||
atomic_int32_dec(&task->user_usage);
|
||||
if (*entry & PG_USER) {
|
||||
kprintf("drop_page_map: entry = %p. level = %u\n", entry, level);
|
||||
if (put_page(*entry & PAGE_MASK))
|
||||
atomic_int32_dec(&task->user_usage);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
kprintf("drop_page_map: task = %u\n", task->id); // TODO: remove
|
||||
|
@ -329,9 +295,15 @@ int drop_page_map(void)
|
|||
// lock tables
|
||||
spinlock_irqsave_lock(&task->page_lock);
|
||||
|
||||
page_iterate(0, PAGE_MAP_PGT, NULL, cb);
|
||||
kprintf("user_usage: %u (task = %u)\n", atomic_int32_read(&task->user_usage), task->id);
|
||||
|
||||
pfree(task->page_map, PAGE_SIZE);
|
||||
traverse(PAGE_MAP_LEVELS-1, current_map);
|
||||
|
||||
put_page((size_t) task->page_map);
|
||||
|
||||
// we replace the page table
|
||||
task->page_map = get_boot_page_map();
|
||||
tlb_flush();
|
||||
|
||||
// unlock tables
|
||||
spinlock_irqsave_unlock(&task->page_lock);
|
||||
|
@ -339,54 +311,77 @@ int drop_page_map(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int set_page_flags(size_t viraddr, uint32_t npages, int flags)
|
||||
int set_page_flags(size_t viraddr, uint32_t npages, int flags)
|
||||
{
|
||||
task_t* task = per_core(current_task);
|
||||
page_entry_t* first[PAGE_MAP_LEVELS];
|
||||
page_entry_t* last[PAGE_MAP_LEVELS];
|
||||
|
||||
size_t bits = page_bits(flags);
|
||||
size_t start = viraddr;
|
||||
size_t end = start + npages * PAGE_SIZE;
|
||||
|
||||
int cb(page_entry_t* entry, int level) {
|
||||
if (level) {
|
||||
if (flags & MAP_USER_SPACE)
|
||||
*entry |= PG_USER;
|
||||
void traverse(int level, page_entry_t* entry) {
|
||||
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
|
||||
for (; entry != stop; entry++) {
|
||||
if (entry < last[level] && entry >= first[level]) {
|
||||
if ((*entry & PG_PRESENT) && !(*entry & PG_PSE)) {
|
||||
if (level) {
|
||||
if (flags & MAP_USER_SPACE)
|
||||
*entry |= PG_USER;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (flags & MAP_CODE)
|
||||
*entry &= ~PG_XD;
|
||||
#endif
|
||||
|
||||
// do "pre-order" traversal
|
||||
traverse(level-1, get_child_entry(entry));
|
||||
}
|
||||
else
|
||||
*entry = (*entry & PAGE_MASK) | bits;
|
||||
|
||||
tlb_flush_one_page(entry_to_virt(entry, level));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
*entry = (*entry & ~PAGE_FLAGS_MASK) | bits;
|
||||
|
||||
tlb_flush_one_page(entry_to_virt(entry, level));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// check assertions
|
||||
if (BUILTIN_EXPECT(start < KERNEL_SPACE && end >= KERNEL_SPACE, 0))
|
||||
return 0;
|
||||
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
|
||||
return 0;
|
||||
|
||||
// calc page tree boundaries
|
||||
int i;
|
||||
for (i=0; i<PAGE_MAP_LEVELS; i++) {
|
||||
first[i] = virt_to_entry(start, i);
|
||||
last[i] = virt_to_entry(end - 1, i) + 1; // exclusive
|
||||
}
|
||||
|
||||
// lock tables
|
||||
if (viraddr < KERNEL_SPACE)
|
||||
if (start < KERNEL_SPACE)
|
||||
spinlock_lock(&kslock);
|
||||
else
|
||||
if (end >= KERNEL_SPACE)
|
||||
spinlock_irqsave_lock(&task->page_lock);
|
||||
|
||||
int ret = page_iterate(start, end, cb, NULL);
|
||||
traverse(PAGE_MAP_LEVELS-1, current_map);
|
||||
|
||||
// unlock tables
|
||||
if (viraddr < KERNEL_SPACE)
|
||||
spinlock_lock(&kslock);
|
||||
else
|
||||
spinlock_irqsave_lock(&task->page_lock);
|
||||
if (start < KERNEL_SPACE)
|
||||
spinlock_unlock(&kslock);
|
||||
if (end >= KERNEL_SPACE)
|
||||
spinlock_irqsave_unlock(&task->page_lock);
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags)
|
||||
{
|
||||
task_t* task = per_core(current_task);
|
||||
page_entry_t* first[PAGE_MAP_LEVELS];
|
||||
page_entry_t* last[PAGE_MAP_LEVELS];
|
||||
|
||||
// TODO: this behaviour should be deprecated
|
||||
if (!viraddr) {
|
||||
int vma_flags = VMA_HEAP;
|
||||
if (flags & MAP_USER_SPACE)
|
||||
|
@ -399,57 +394,66 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
|
|||
size_t start = viraddr;
|
||||
size_t end = start + npages * PAGE_SIZE;
|
||||
|
||||
int cb(page_entry_t* entry, int level) {
|
||||
if (level) { // PGD, PDPT, PML4..
|
||||
if (*entry & PG_PRESENT) {
|
||||
if (flags & MAP_USER_SPACE) {
|
||||
/*
|
||||
* We are changing page map entries which cover
|
||||
* the kernel. So before altering them we need to
|
||||
* make a private copy for the task
|
||||
*/
|
||||
if (!(*entry & PG_USER)) {
|
||||
int traverse(int level, page_entry_t* entry) {
|
||||
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
|
||||
for (; entry != stop; entry++) {
|
||||
if (entry < last[level] && entry >= first[level]) {
|
||||
if (level) { // PGD, PDPT, PML4..
|
||||
if (*entry & PG_PRESENT) {
|
||||
if ((flags & MAP_USER_SPACE) && !(*entry & PG_USER)) {
|
||||
/* We are changing page map entries which cover
|
||||
* the kernel. So before altering them we need to
|
||||
* make a private copy for the task */
|
||||
size_t phyaddr = get_page();
|
||||
if (BUILTIN_EXPECT(!phyaddr, 0))
|
||||
return -ENOMEM;
|
||||
|
||||
atomic_int32_inc(&task->user_usage);
|
||||
|
||||
copy_page(phyaddr, *entry & PAGE_MASK);
|
||||
*entry = phyaddr | (*entry & ~PAGE_MASK) | PG_USER;
|
||||
|
||||
/* We just need to flush the table itself.
|
||||
* TLB entries for the kernel remain valid
|
||||
* because we've not changed them. */
|
||||
tlb_flush_one_page(entry_to_virt(entry, 0));
|
||||
}
|
||||
}
|
||||
else {
|
||||
size_t phyaddr = get_page();
|
||||
if (BUILTIN_EXPECT(!phyaddr, 0))
|
||||
return -ENOMEM;
|
||||
|
||||
atomic_int32_inc(&task->user_usage);
|
||||
if (flags & MAP_USER_SPACE)
|
||||
atomic_int32_inc(&task->user_usage);
|
||||
|
||||
copy_page(phyaddr, *entry & ~PAGE_FLAGS_MASK);
|
||||
*entry = phyaddr | (*entry & PAGE_FLAGS_MASK) | PG_USER;
|
||||
*entry = phyaddr | bits;
|
||||
|
||||
/*
|
||||
* We just need to flush the table itself.
|
||||
* TLB entries for the kernel remain valid
|
||||
* because we've not changed them.
|
||||
*/
|
||||
tlb_flush_one_page(entry_to_virt(entry, 0));
|
||||
memset(get_child_entry(entry), 0x00, PAGE_SIZE); // fill with zeros
|
||||
}
|
||||
|
||||
// do "pre-order" traversal if no hugepage
|
||||
if (!(*entry & PG_PSE)) {
|
||||
int ret = traverse(level-1, get_child_entry(entry));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
else { // PGT
|
||||
if ((*entry & PG_PRESENT) && !(flags & MAP_REMAP))
|
||||
return -EINVAL;
|
||||
|
||||
*entry = phyaddr | bits;
|
||||
|
||||
if (flags & MAP_USER_SPACE)
|
||||
atomic_int32_inc(&task->user_usage);
|
||||
|
||||
if (flags & MAP_REMAP)
|
||||
tlb_flush_one_page(entry_to_virt(entry, level));
|
||||
|
||||
phyaddr += PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
else {
|
||||
size_t phyaddr = get_page();
|
||||
if (BUILTIN_EXPECT(!phyaddr, 0))
|
||||
return -ENOMEM;
|
||||
|
||||
atomic_int32_inc(&task->user_usage);
|
||||
|
||||
*entry = phyaddr | bits;
|
||||
}
|
||||
}
|
||||
else { // PGT
|
||||
if ((*entry & PG_PRESENT) && !(flags & MAP_REMAP))
|
||||
return -EINVAL;
|
||||
|
||||
*entry = phyaddr | bits;
|
||||
|
||||
if (flags & MAP_USER_SPACE)
|
||||
atomic_int32_inc(&task->user_usage);
|
||||
|
||||
if (flags & MAP_REMAP)
|
||||
tlb_flush_one_page(entry_to_virt(entry, level));
|
||||
|
||||
phyaddr += PAGE_SIZE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -457,92 +461,109 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
|
|||
|
||||
kprintf("map_region: map %u pages from %#lx to %#lx with flags: %#x\n", npages, viraddr, phyaddr, flags); // TODO: remove
|
||||
|
||||
// check assertions
|
||||
if (BUILTIN_EXPECT(start < KERNEL_SPACE && end >= KERNEL_SPACE, 0))
|
||||
return 0;
|
||||
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
|
||||
return 0;
|
||||
if (BUILTIN_EXPECT(!viraddr, 0))
|
||||
return 0;
|
||||
|
||||
// calc page tree boundaries
|
||||
int i;
|
||||
for (i=0; i<PAGE_MAP_LEVELS; i++) {
|
||||
first[i] = virt_to_entry(start, i);
|
||||
last[i] = virt_to_entry(end - 1, i) + 1; // exclusive
|
||||
}
|
||||
|
||||
// lock tables
|
||||
if (viraddr < KERNEL_SPACE)
|
||||
if (start < KERNEL_SPACE)
|
||||
spinlock_lock(&kslock);
|
||||
else
|
||||
if (end >= KERNEL_SPACE)
|
||||
spinlock_irqsave_lock(&task->page_lock);
|
||||
|
||||
int ret = page_iterate(start, end, cb, NULL);
|
||||
int ret = traverse(PAGE_MAP_LEVELS-1, current_map);
|
||||
|
||||
// unlock tables
|
||||
if (viraddr < KERNEL_SPACE)
|
||||
if (start < KERNEL_SPACE)
|
||||
spinlock_unlock(&kslock);
|
||||
else
|
||||
if (end >= KERNEL_SPACE)
|
||||
spinlock_irqsave_unlock(&task->page_lock);
|
||||
|
||||
return (ret == 0) ? viraddr : 0;
|
||||
return (ret) ? 0 : viraddr;
|
||||
}
|
||||
|
||||
int unmap_region(size_t viraddr, uint32_t npages)
|
||||
{
|
||||
task_t* task = per_core(current_task);
|
||||
page_entry_t* first[PAGE_MAP_LEVELS];
|
||||
page_entry_t* last[PAGE_MAP_LEVELS];
|
||||
|
||||
size_t start = viraddr;
|
||||
size_t end = start + npages * PAGE_SIZE;
|
||||
|
||||
kprintf("unmap_region: unmap %u pages from %#lx\n", npages, viraddr); // TODO: remove
|
||||
|
||||
int cb(page_entry_t* entry, int level) {
|
||||
if (level) { // PGD, PDPT, PML4
|
||||
page_map_t* map = (page_map_t*) entry_to_virt(entry, 0);
|
||||
int used = 0;
|
||||
/** @return number of page table entries which a present */
|
||||
int traverse(int level, page_entry_t* entry) {
|
||||
int used = 0;
|
||||
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
|
||||
for (; entry != stop; entry++) {
|
||||
if (entry < last[level] && entry >= first[level]) {
|
||||
if (level) { // PGD, PDPT, PML4
|
||||
if ((*entry & PG_PRESENT) && !(*entry & PG_PSE)) {
|
||||
// do "post-order" traversal if table is present and no hugepage
|
||||
if (traverse(level-1, get_child_entry(entry)))
|
||||
used++;
|
||||
else { // child table is empty => delete it
|
||||
*entry &= ~PG_PRESENT;
|
||||
tlb_flush_one_page(entry_to_virt(entry, 0));
|
||||
|
||||
int i;
|
||||
for (i=0; i<PAGE_MAP_ENTRIES; i++) {
|
||||
if (map->entries[i] & PG_PRESENT)
|
||||
if (*entry & PG_USER) {
|
||||
if (put_page(*entry & PAGE_MASK))
|
||||
atomic_int32_dec(&task->user_usage);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else { // PGT
|
||||
*entry &= ~PG_PRESENT;
|
||||
|
||||
tlb_flush_one_page(entry_to_virt(entry, level));
|
||||
|
||||
if (*entry & PG_USER)
|
||||
atomic_int32_dec(&task->user_usage);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (*entry & PG_PRESENT)
|
||||
used++;
|
||||
}
|
||||
|
||||
if (!used) {
|
||||
*entry &= ~PG_PRESENT;
|
||||
tlb_flush_one_page(entry_to_virt(entry, 0));
|
||||
|
||||
if (put_page(*entry & ~PAGE_FLAGS_MASK))
|
||||
atomic_int32_dec(&task->user_usage);
|
||||
}
|
||||
}
|
||||
else { // PGT
|
||||
*entry = 0;
|
||||
|
||||
tlb_flush_one_page(entry_to_virt(entry, level));
|
||||
|
||||
if (viraddr >= KERNEL_SPACE)
|
||||
atomic_int32_dec(&task->user_usage);
|
||||
}
|
||||
|
||||
return 0;
|
||||
return used;
|
||||
}
|
||||
|
||||
// check assertions
|
||||
if (BUILTIN_EXPECT(start < KERNEL_SPACE && end >= KERNEL_SPACE, 0))
|
||||
return 0;
|
||||
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
|
||||
return 0;
|
||||
|
||||
// calc page tree boundaries
|
||||
int i;
|
||||
for (i=0; i<PAGE_MAP_LEVELS; i++) {
|
||||
first[i] = virt_to_entry(start, i);
|
||||
last[i] = virt_to_entry(end - 1, i) + 1; // exclusive
|
||||
}
|
||||
|
||||
// lock tables
|
||||
if (viraddr < KERNEL_SPACE)
|
||||
if (start < KERNEL_SPACE)
|
||||
spinlock_lock(&kslock);
|
||||
else
|
||||
if (end >= KERNEL_SPACE)
|
||||
spinlock_irqsave_lock(&task->page_lock);
|
||||
|
||||
int ret = page_iterate(start, end, NULL, cb);
|
||||
traverse(PAGE_MAP_LEVELS-1, current_map);
|
||||
|
||||
// unlock tables
|
||||
if (viraddr < KERNEL_SPACE)
|
||||
if (start < KERNEL_SPACE)
|
||||
spinlock_unlock(&kslock);
|
||||
else
|
||||
if (end > KERNEL_SPACE)
|
||||
spinlock_irqsave_unlock(&task->page_lock);
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pagefault_handler(struct state *s)
|
||||
|
@ -597,7 +618,8 @@ int arch_paging_init(void)
|
|||
irq_install_handler(14, pagefault_handler);
|
||||
|
||||
// setup recursive paging
|
||||
boot_pml4.entries[PAGE_MAP_ENTRIES-1] = (size_t) &boot_pml4 | PG_TABLE;
|
||||
page_entry_t* boot_map = get_boot_page_map();
|
||||
boot_map[PAGE_MAP_ENTRIES-1] = (size_t) boot_map | PG_TABLE;
|
||||
|
||||
/*
|
||||
* In longmode the kernel is already maped into the kernel space (see entry64.asm)
|
||||
|
@ -658,3 +680,4 @@ int arch_paging_init(void)
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -90,8 +90,8 @@ typedef struct task {
|
|||
atomic_int32_t user_usage;
|
||||
/// locks access to all page maps with PG_USER flag set
|
||||
spinlock_irqsave_t page_lock;
|
||||
/// pointer to page directory (32bit) or page map level 4 (64bit) table respectively
|
||||
page_map_t* page_map;
|
||||
/// virtual address of page map for CR3
|
||||
page_entry_t* page_map;
|
||||
/// lock for the VMA_list
|
||||
spinlock_t vma_lock;
|
||||
/// list of VMAs
|
||||
|
|
Loading…
Add table
Reference in a new issue