thats a huge commit: reimplemented all remaining page map functions according to the new page_iterate()

This commit is contained in:
Steffen Vogel 2014-01-09 13:44:20 +01:00
parent 269bffc208
commit ab67350783

View file

@ -98,58 +98,58 @@ int page_iterate(size_t start, size_t end, page_cb_t pre, page_cb_t post)
page_entry_t* entry[PAGE_MAP_LEVELS];
page_entry_t* last[PAGE_MAP_LEVELS];
if (BUILTIN_EXPECT(start >= end, 0))
return -EINVAL;
// setup subtree boundaries
int i;
for (i=0; i<PAGE_MAP_LEVELS; i++) {
entry[i] = virt_to_entry(start, i);
// this last addresses are exclusive!
// for end == 0 we take the whole address space (overflow => 0)
last[i] = (end) ? virt_to_entry(end - PAGE_SIZE, i) + 1 : 0;
last[i] = virt_to_entry(end - 1, i);
}
// nested iterator function (sees the scope of parent)
int iterate(int level) {
int ret;
while (entry[level] != last[level]) {
//if (*entry[level] && level) kprintf("page_iterate: level=%u, entry[level]=%p, last[level]=%p\n", level, entry[level], last[level]);
// pre-order callback
if (pre)
while (entry[level] <= last[level]) {
if (pre) { // call pre-order callback if available
ret = pre(entry[level], level);
if (BUILTIN_EXPECT(ret < 0, 0))
return ret;
if (BUILTIN_EXPECT(ret < 0, 0))
return ret;
}
// recurse if
// - we are not in the PGT
// - and the inferior page table is present
// - and the current entry is no huge page
if (level && (*entry[level] & PG_PRESENT) && !(*entry[level] & PG_PSE))
iterate(level-1);
// - and the current entry represents no huge page
if (level && (*entry[level] & PG_PRESENT) && !(*entry[level] & PG_PSE)) {
ret = iterate(level-1);
if (BUILTIN_EXPECT(ret < 0, 0))
return ret;
}
// or skip the entries we've omit...
else {
size_t next = (size_t) (entry[level]+1);
for (i=0; i<level; i++)
entry[i] = (page_entry_t*) (next << (PAGE_MAP_SHIFT*(level-i)));
entry[i] = (page_entry_t*) (next << (PAGE_MAP_BITS*(level-i)));
}
// post-order callback
if (post)
if (post) { // call post-order callback if available
ret = post(entry[level], level);
if (BUILTIN_EXPECT(ret < 0, 0))
return ret;
}
if (BUILTIN_EXPECT(ret < 0, 0))
return ret;
// return if
// - we are not at the root table
// - and we've reached the end of the current table
// return if we've reached the end of table
entry[level]++;
if (((size_t) entry[level] & ~PAGE_MASK) == 0x000)
if (((size_t) entry[level] & ~PAGE_MASK) == 0x000) // TODO
return 0;
}
return 0;
}
// we start at the highest order table (PML4 or PGD)
return iterate(PAGE_MAP_LEVELS-1);
}
@ -265,240 +265,329 @@ void page_stats(size_t from, size_t to, int reset)
kprintf(" - %s:%*lu\n", labels[i], 25-strlen(labels[i]), stats[i]);
}
/** @brief Copy a single page frame
*
* @param src virtual address of source page frame
* @return physical addr to copied page frame
*/
static size_t copy_page_frame(size_t *src)
int copy_page_map(task_t* new_task, int copy)
{
kprintf("copy_page_frame(%p)\n", src);
#if 1 // TODO: untested
size_t phyaddr, viraddr;
task_t* cur_task = per_core(current_task);
// allocate and map an empty page
phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
return 0;
viraddr = vma_alloc(PAGE_SIZE, VMA_HEAP);
if (BUILTIN_EXPECT(!viraddr, 0))
return 0;
viraddr = map_region(viraddr, phyaddr, 1, MAP_KERNEL_SPACE);
if (BUILTIN_EXPECT(!viraddr, 0))
return 0;
// copy the whole page
strncpy((void*) viraddr, (void*) src, PAGE_SIZE);
// unmap and free page
unmap_region(viraddr, 1);
vma_free(viraddr, viraddr+PAGE_SIZE);
return phyaddr;
#else
kprintf("TODO: copy_page_frame(%lx)\n", source);
return 0;
#endif
}
/*
* Copy page maps using recursion
*
* @param from pointer to virtual address of source page tables
* @param to pointer to virtual address of destination page tables
* @param copy flags what should be copied (see #define COPY_*)
* @return number of new allocated page frames (for tables only)
*/
static int copy_page_map(page_map_t *src, page_map_t *dest, int copy)
{
page_map_t* next_src, * next_dest;
int ret = 0;
uint32_t i;
for(i=0; i<PAGE_MAP_ENTRIES; i++) {
if (!(src->entries[i] & PG_PRESENT))
// skip empty entries
dest->entries[i] = 0;
else if (src->entries[i] & PG_USER) {
size_t phys;
// deep copy user tables
if ((size_t) src >= PAGE_MAP_PGT) {
phys = get_page();
if (BUILTIN_EXPECT(!phys, 0))
return -ENOMEM;
dest->entries[i] = phys|(src->entries[i] & ~PAGE_MASK);
// reuse pointers to next lower page map tables
next_src = (page_map_t*) ((size_t) &src->entries[i] << 9);
next_dest = (page_map_t*) ((size_t) &dest->entries[i] << 9);
ret += 1 + copy_page_map(next_src, next_dest, copy);
}
// deep copy page frame
else {
if (copy) {
phys = copy_page_frame((size_t*) src->entries[i]);
dest->entries[i] = phys|(src->entries[i] & ~PAGE_MASK);
}
kprintf("c: %p (%lx)\n", &src->entries[i], src->entries[i]);
}
}
// shallow copy kernel only tables
else
dest->entries[i] = src->entries[i];
}
kputs("r\n");
return ret;
}
int create_page_map(task_t* task, int copy)
{
size_t phys;
size_t phyaddr;
uint32_t ret;
int cb(page_entry_t* src, int level) {
page_entry_t* dest = src - (1L<<36); // TODO
if (*src & PG_PRESENT) {
if (*src & PG_USER) {
kprintf("cb: src=%p, dest=%p, *src=%#lx, level=%u ", src, dest, *src, level); // TODO: remove
if (level) { // deep copy user table
kputs("deep copy\n");
size_t phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
return -ENOMEM;
atomic_int32_inc(&cur_task->user_usage);
*dest = phyaddr | (*src & ~PAGE_FLAGS_MASK);
// TODO: copy_page?
// TODO: memset(*dest, 0)?
}
else if (copy) { // deep copy page frame
kputs("deep copy frame\n");
size_t phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
return -ENOMEM;
atomic_int32_inc(&cur_task->user_usage);
copy_page(phyaddr, *src & ~PAGE_FLAGS_MASK);
*dest = phyaddr | (*src & PAGE_FLAGS_MASK);
}
else
kputs("???\n");
}
else // shallow copy kernel table
*dest = *src;
}
tlb_flush(); // ouch :(
return 0;
}
// fixed mapping for paging structures
page_map_t *current = (page_map_t*) PAGE_MAP_PML4;
page_map_t *new = (page_map_t*) (PAGE_MAP_PML4 - 0x1000);
page_map_t *new = palloc(PAGE_SIZE, 0);
if (BUILTIN_EXPECT(!new, 0))
return -ENOMEM;
// get new pml4 table
phys = get_page();
if (!phys) return -ENOMEM;
phyaddr = virt_to_phys(new);
current->entries[PAGE_MAP_ENTRIES-2] = phys|KERN_TABLE;
new->entries[PAGE_MAP_ENTRIES-1] = phys|KERN_TABLE;
// lock tables
spinlock_lock(&kslock);
spinlock_irqsave_lock(&cur_task->page_lock);
// map new table
current->entries[PAGE_MAP_ENTRIES-2] = phyaddr | PG_TABLE;
tlb_flush(); // ouch :(
spinlock_lock(&kslock);
ret = copy_page_map(current, new, copy);
// setup self reference for new table
new->entries[PAGE_MAP_ENTRIES-1] = phyaddr | PG_TABLE;
ret = page_iterate(0, PAGE_MAP_PGT - (1L<<39), cb, NULL); // TODO: check boundaries
// unlock tables
spinlock_irqsave_unlock(&cur_task->page_lock);
spinlock_unlock(&kslock);
new->entries[PAGE_MAP_ENTRIES-1] = phys|KERN_TABLE;
// unmap new tables
current->entries[PAGE_MAP_ENTRIES-2] = 0;
tlb_flush(); // ouch :(
task->page_map = (page_map_t*) phys;
new_task->page_map = new;
kprintf("copy_page_map: allocated %i page tables\n", ret); // TODO: remove
kprintf("create_page_map: allocated %u page tables\n", ret);
return ret;
}
int drop_page_map(void)
{
#if 1
kprintf("TODO: test drop_page_map()\n");
return -EINVAL; // TODO
#else
task_t* task = per_core(current_task);
page_map_t* pml4, * pdpt, * pgd, * pgt;
size_t phys;
uint32_t i, j, k, l;
pml4 = task->page_map;
int cb(page_entry_t* entry, int level) {
if (*entry & PG_USER) {
kprintf("drop_page_map:cb: entry = %p, level = %u\n", entry, level); // TODO: remove
if (BUILTIN_EXPECT(pml4 == &boot_pml4, 0))
return -EINVAL;
spinlock_lock(&task->page_lock);
// delete all user pages and tables
for(i=0; i<PAGE_MAP_ENTRIES; i++) { // pml4
if (pml4->entries[i] & PG_USER) {
for(j=0; j<PAGE_MAP_ENTRIES; j++) { // pdpt
if (pdpt->entries[j] & PG_USER) {
for(k=0; k<PAGE_MAP_ENTRIES; k++) { // pgd
if (pgd->entries[k] & PG_USER) {
for(l=0; l<PAGE_MAP_ENTRIES; l++) { // pgt
if (pgt->entries[l] & PG_USER)
put_page(pgt->entries[l] & PAGE_MASK);
}
// TODO: put pgt
}
}
// TODO: put pgd
}
}
// TODO: put pdpt
if (put_page(*entry & ~PAGE_FLAGS_MASK))
atomic_int32_dec(&task->user_usage);
}
return 0;
}
put_page(virt_to_phys((size_t) pml4));
task->page_map = NULL;
kprintf("drop_page_map: task = %u\n", task->id); // TODO: remove
spinlock_unlock(&task->page_lock);
// check assertions
if (BUILTIN_EXPECT(task->page_map == get_boot_page_map(), 0))
return -EINVAL;
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return -EINVAL;
// lock tables
spinlock_irqsave_lock(&task->page_lock);
int ret = page_iterate(0, PAGE_MAP_PGT, NULL, cb); // TODO: check boundaries
pfree(task->page_map, PAGE_SIZE);
// unlock tables
spinlock_irqsave_unlock(&task->page_lock);
kprintf("drop_page_map: finished\n"); // TODO: remove
return 0;
#endif
}
static int set_page_flags(size_t viraddr, uint32_t npages, int flags)
{
task_t* task = per_core(current_task);
size_t bits = page_bits(flags);
size_t start = viraddr;
size_t end = start + npages * PAGE_SIZE;
int cb(page_entry_t* entry, int level) {
if (level) {
if (flags & MAP_USER_SPACE)
*entry |= PG_USER;
}
else
*entry = (*entry & ~PAGE_FLAGS_MASK) | bits;
tlb_flush_one_page(entry_to_virt(entry, level));
return 0;
}
// check assertions
if (BUILTIN_EXPECT(start < KERNEL_SPACE && end >= KERNEL_SPACE, 0))
return 0;
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return 0;
// lock tables
if (viraddr < KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->page_lock);
int ret = page_iterate(start, end, cb, NULL);
// unlock tables
if (viraddr < KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->page_lock);
return ret;
}
size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags)
{
task_t* task = per_core(current_task);
size_t i, ret;
if (!viraddr) {
int vma_flags = VMA_HEAP;
if (flags & MAP_USER_SPACE)
vma_flags |= VMA_USER;
viraddr = vma_alloc(npages * PAGE_SIZE, vma_flags);
}
size_t bits = page_bits(flags);
size_t start = viraddr;
size_t end = start + npages * PAGE_SIZE;
int cb(page_entry_t* entry, int level) {
if (level) { // PGD, PDPT, PML4..
if (*entry & PG_PRESENT) {
if (flags & MAP_USER_SPACE) {
/*
* We are changing page map entries which cover
* the kernel. So before altering them we need to
* make a private copy for the task
*/
if (!(*entry & PG_USER)) {
size_t phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
return -ENOMEM;
atomic_int32_inc(&task->user_usage);
copy_page(phyaddr, *entry & ~PAGE_FLAGS_MASK);
*entry = phyaddr | (*entry & PAGE_FLAGS_MASK) | PG_USER;
/*
* We just need to flush the table itself.
* TLB entries for the kernel remain valid
* because we've not changed them.
*/
tlb_flush_one_page(entry_to_virt(entry, 0));
}
}
}
else {
size_t phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
return -ENOMEM;
atomic_int32_inc(&task->user_usage);
*entry = phyaddr | bits;
}
}
else { // PGT
if ((*entry & PG_PRESENT) && !(flags & MAP_REMAP))
return -EINVAL;
*entry = phyaddr | bits;
if (flags & MAP_USER_SPACE)
atomic_int32_inc(&task->user_usage);
if (flags & MAP_REMAP)
tlb_flush_one_page(entry_to_virt(entry, level));
phyaddr += PAGE_SIZE;
}
return 0;
}
kprintf("map_region: map %u pages from %#lx to %#lx with flags: %#x\n", npages, viraddr, phyaddr, flags); // TODO: remove
// check assertions
if (BUILTIN_EXPECT(start < KERNEL_SPACE && end >= KERNEL_SPACE, 0))
return 0;
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return 0;
if (BUILTIN_EXPECT(!viraddr, 0))
return 0;
// lock tables
if (viraddr < KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->page_lock);
int ret = page_iterate(start, end, cb, NULL);
// unlock tables
if (viraddr < KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->page_lock);
return (ret == 0) ? viraddr : 0;
}
int unmap_region(size_t viraddr, uint32_t npages)
{
task_t* task = per_core(current_task);
size_t start = viraddr;
size_t end = start + npages * PAGE_SIZE;
kprintf("unmap_region: unmap %u pages from %#lx\n", npages, viraddr); // TODO: remove
int cb(page_entry_t* entry, int level) {
if (level) { // PGD, PDPT, PML4
page_map_t* map = (page_map_t*) entry_to_virt(entry, 0);
int used = 0;
int i;
for (i=0; i<PAGE_MAP_ENTRIES; i++) {
if (map->entries[i] & PG_PRESENT)
used++;
}
if (!used) {
*entry &= ~PG_PRESENT;
tlb_flush_one_page(entry_to_virt(entry, 0));
if (put_page(*entry & ~PAGE_FLAGS_MASK))
atomic_int32_dec(&task->user_usage);
}
}
else { // PGT
*entry = 0;
tlb_flush_one_page(entry_to_virt(entry, level));
if (viraddr >= KERNEL_SPACE)
atomic_int32_dec(&task->user_usage);
}
return 0;
}
// check assertions
if (BUILTIN_EXPECT(start < KERNEL_SPACE && end >= KERNEL_SPACE, 0))
return 0;
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return 0;
if (!viraddr) {
kputs("map_region: deprecated vma_alloc() call from within map_region\n");
viraddr = vma_alloc(npages*PAGE_SIZE, VMA_HEAP);
if (BUILTIN_EXPECT(!viraddr, 0)) {
kputs("map_region: found no valid virtual address\n");
ret = 0;
goto out;
}
}
kprintf("map_region: map %u pages from 0x%lx to 0x%lx with flags: 0x%x\n", npages, viraddr, phyaddr, flags); // TODO: remove
// correct alignment
phyaddr &= PAGE_MASK;
viraddr &= PAGE_MASK;
ret = viraddr;
if (flags & MAP_KERNEL_SPACE)
// lock tables
if (viraddr < KERNEL_SPACE)
spinlock_lock(&kslock);
else
else
spinlock_irqsave_lock(&task->page_lock);
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE, phyaddr+=PAGE_SIZE) {
// page table entry
size_t* pte = (size_t *) (PAGE_MAP_PGT|(viraddr >> PAGE_MAP_SHIFT));
int ret = page_iterate(start, end, NULL, cb);
if (*pte && !(flags & MAP_REMAP)) {
kprintf("map_region: 0x%lx is already mapped\n", viraddr);
ret = 0;
goto out;
}
if (flags & MAP_USER_SPACE)
*pte = phyaddr | USER_PAGE;
else
*pte = phyaddr | KERN_PAGE;
if (flags & MAP_NO_CACHE)
*pte |= PG_PCD;
if (flags & MAP_NO_ACCESS)
*pte &= ~PG_PRESENT;
if (flags & MAP_WT)
*pte |= PG_PWT;
if (flags & MAP_USER_SPACE)
atomic_int32_inc(&task->user_usage);
tlb_flush_one_page(viraddr);
}
out:
if (flags & MAP_KERNEL_SPACE)
// unlock tables
if (viraddr < KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->page_lock);
@ -506,131 +595,6 @@ out:
return ret;
}
int change_page_permissions(size_t start, size_t end, uint32_t flags)
{
#if 0
uint32_t index1, index2, newflags;
size_t viraddr = start & PAGE_MASK;
size_t phyaddr;
page_map_t* pgt;
page_map_t* pgd;
task_t* task = per_core(current_task);
pgd = per_core(current_task)->page_map;
if (BUILTIN_EXPECT(!pgd, 0))
return -EINVAL;
spinlock_lock(&task->page_lock);
while (viraddr < end)
{
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
while ((viraddr < end) && (index2 < 1024)) {
pgt = (page_map_t*) (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (pgt && pgt->entries[index2]) {
phyaddr = pgt->entries[index2] & PAGE_MASK;
newflags = pgt->entries[index2] & 0xFFF; // get old flags
if (!(newflags & PG_SVM_INIT)) {
if ((newflags & PG_SVM_STRONG) && !(newflags & PG_PRESENT) && (flags & (VMA_READ|VMA_WRITE) && !(flags & VMA_NOACCESS)))
newflags |= PG_PRESENT;
else if ((newflags & PG_SVM_STRONG) && (newflags & PG_PRESENT) && (flags & VMA_NOACCESS))
newflags &= ~PG_PRESENT;
}
// update flags
if (!(flags & VMA_WRITE)) {
newflags &= ~PG_RW;
} else {
newflags |= PG_RW;
}
pgt->entries[index2] = (newflags & 0xFFF) | (phyaddr & PAGE_MASK);
tlb_flush_one_page(viraddr);
}
index2++;
viraddr += PAGE_SIZE;
}
}
spinlock_unlock(&task->page_lock);
#endif
return -EINVAL;
}
int unmap_region(size_t viraddr, uint32_t npages)
{
task_t* task = per_core(current_task);
page_map_t* pdpt, * pgd, * pgt;
size_t i;
uint16_t index_pml4, index_pdpt;
uint16_t index_pgd, index_pgt;
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return -EINVAL;
if (viraddr <= KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->page_lock);
i = 0;
while(i<npages)
{
index_pml4 = (viraddr >> 39) & 0x1FF;
index_pdpt = (viraddr >> 30) & 0x1FF;
index_pgd = (viraddr >> 21) & 0x1FF;
index_pgt = (viraddr >> 12) & 0x1FF;
// currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pdpt = (page_map_t*) (task->page_map->entries[index_pml4] & PAGE_MASK);
if (!pdpt) {
viraddr += (size_t) PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES*PAGE_SIZE;
i += PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES;
continue;
}
pgd = (page_map_t*) (pdpt->entries[index_pdpt] & PAGE_MASK);
if (!pgd) {
viraddr += PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES*PAGE_SIZE;
i += PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES;
continue;
}
pgt = (page_map_t*) (pgd->entries[index_pgd] & PAGE_MASK);
if (!pgt) {
viraddr += PAGE_MAP_ENTRIES*PAGE_SIZE;
i += PAGE_MAP_ENTRIES;
continue;
}
if (pgt->entries[index_pgt])
pgt->entries[index_pgt] &= ~PG_PRESENT;
viraddr +=PAGE_SIZE;
i++;
if (viraddr > KERNEL_SPACE)
atomic_int32_dec(&task->user_usage);
tlb_flush_one_page(viraddr);
}
if (viraddr <= KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->page_lock);
return 0;
}
static void pagefault_handler(struct state *s)
{
task_t* task = per_core(current_task);