diff --git a/arch/x86/mm/page64.c b/arch/x86/mm/page64.c index 29f61594..88d0a948 100644 --- a/arch/x86/mm/page64.c +++ b/arch/x86/mm/page64.c @@ -98,58 +98,58 @@ int page_iterate(size_t start, size_t end, page_cb_t pre, page_cb_t post) page_entry_t* entry[PAGE_MAP_LEVELS]; page_entry_t* last[PAGE_MAP_LEVELS]; + if (BUILTIN_EXPECT(start >= end, 0)) + return -EINVAL; + // setup subtree boundaries int i; for (i=0; i 0) - last[i] = (end) ? virt_to_entry(end - PAGE_SIZE, i) + 1 : 0; + last[i] = virt_to_entry(end - 1, i); } // nested iterator function (sees the scope of parent) int iterate(int level) { int ret; - while (entry[level] != last[level]) { - //if (*entry[level] && level) kprintf("page_iterate: level=%u, entry[level]=%p, last[level]=%p\n", level, entry[level], last[level]); - - // pre-order callback - if (pre) + while (entry[level] <= last[level]) { + if (pre) { // call pre-order callback if available ret = pre(entry[level], level); - - if (BUILTIN_EXPECT(ret < 0, 0)) - return ret; + if (BUILTIN_EXPECT(ret < 0, 0)) + return ret; + } // recurse if // - we are not in the PGT // - and the inferior page table is present - // - and the current entry is no huge page - if (level && (*entry[level] & PG_PRESENT) && !(*entry[level] & PG_PSE)) - iterate(level-1); + // - and the current entry represents no huge page + if (level && (*entry[level] & PG_PRESENT) && !(*entry[level] & PG_PSE)) { + ret = iterate(level-1); + if (BUILTIN_EXPECT(ret < 0, 0)) + return ret; + } // or skip the entries we've omit... else { size_t next = (size_t) (entry[level]+1); for (i=0; ientries[i] & PG_PRESENT)) - // skip empty entries - dest->entries[i] = 0; - else if (src->entries[i] & PG_USER) { - size_t phys; - - // deep copy user tables - if ((size_t) src >= PAGE_MAP_PGT) { - phys = get_page(); - if (BUILTIN_EXPECT(!phys, 0)) - return -ENOMEM; - - dest->entries[i] = phys|(src->entries[i] & ~PAGE_MASK); - - // reuse pointers to next lower page map tables - next_src = (page_map_t*) ((size_t) &src->entries[i] << 9); - next_dest = (page_map_t*) ((size_t) &dest->entries[i] << 9); - - ret += 1 + copy_page_map(next_src, next_dest, copy); - } - // deep copy page frame - else { - if (copy) { - phys = copy_page_frame((size_t*) src->entries[i]); - dest->entries[i] = phys|(src->entries[i] & ~PAGE_MASK); - } - kprintf("c: %p (%lx)\n", &src->entries[i], src->entries[i]); - } - } - // shallow copy kernel only tables - else - dest->entries[i] = src->entries[i]; - } - - kputs("r\n"); - return ret; -} - -int create_page_map(task_t* task, int copy) -{ - size_t phys; + size_t phyaddr; uint32_t ret; + int cb(page_entry_t* src, int level) { + page_entry_t* dest = src - (1L<<36); // TODO + + if (*src & PG_PRESENT) { + if (*src & PG_USER) { + kprintf("cb: src=%p, dest=%p, *src=%#lx, level=%u ", src, dest, *src, level); // TODO: remove + + if (level) { // deep copy user table + kputs("deep copy\n"); + + size_t phyaddr = get_page(); + if (BUILTIN_EXPECT(!phyaddr, 0)) + return -ENOMEM; + + atomic_int32_inc(&cur_task->user_usage); + + *dest = phyaddr | (*src & ~PAGE_FLAGS_MASK); + + // TODO: copy_page? + // TODO: memset(*dest, 0)? + } + else if (copy) { // deep copy page frame + kputs("deep copy frame\n"); + + size_t phyaddr = get_page(); + if (BUILTIN_EXPECT(!phyaddr, 0)) + return -ENOMEM; + + atomic_int32_inc(&cur_task->user_usage); + + copy_page(phyaddr, *src & ~PAGE_FLAGS_MASK); + *dest = phyaddr | (*src & PAGE_FLAGS_MASK); + } + else + kputs("???\n"); + } + else // shallow copy kernel table + *dest = *src; + } + tlb_flush(); // ouch :( + return 0; + } + // fixed mapping for paging structures page_map_t *current = (page_map_t*) PAGE_MAP_PML4; - page_map_t *new = (page_map_t*) (PAGE_MAP_PML4 - 0x1000); + page_map_t *new = palloc(PAGE_SIZE, 0); + if (BUILTIN_EXPECT(!new, 0)) + return -ENOMEM; - // get new pml4 table - phys = get_page(); - if (!phys) return -ENOMEM; + phyaddr = virt_to_phys(new); - current->entries[PAGE_MAP_ENTRIES-2] = phys|KERN_TABLE; - new->entries[PAGE_MAP_ENTRIES-1] = phys|KERN_TABLE; + // lock tables + spinlock_lock(&kslock); + spinlock_irqsave_lock(&cur_task->page_lock); + // map new table + current->entries[PAGE_MAP_ENTRIES-2] = phyaddr | PG_TABLE; tlb_flush(); // ouch :( - spinlock_lock(&kslock); - ret = copy_page_map(current, new, copy); + // setup self reference for new table + new->entries[PAGE_MAP_ENTRIES-1] = phyaddr | PG_TABLE; + + ret = page_iterate(0, PAGE_MAP_PGT - (1L<<39), cb, NULL); // TODO: check boundaries + + // unlock tables + spinlock_irqsave_unlock(&cur_task->page_lock); spinlock_unlock(&kslock); - new->entries[PAGE_MAP_ENTRIES-1] = phys|KERN_TABLE; + // unmap new tables current->entries[PAGE_MAP_ENTRIES-2] = 0; + tlb_flush(); // ouch :( - task->page_map = (page_map_t*) phys; + new_task->page_map = new; + + kprintf("copy_page_map: allocated %i page tables\n", ret); // TODO: remove - kprintf("create_page_map: allocated %u page tables\n", ret); return ret; } int drop_page_map(void) { -#if 1 - kprintf("TODO: test drop_page_map()\n"); - return -EINVAL; // TODO -#else task_t* task = per_core(current_task); - page_map_t* pml4, * pdpt, * pgd, * pgt; - size_t phys; - uint32_t i, j, k, l; - pml4 = task->page_map; + int cb(page_entry_t* entry, int level) { + if (*entry & PG_USER) { + kprintf("drop_page_map:cb: entry = %p, level = %u\n", entry, level); // TODO: remove - if (BUILTIN_EXPECT(pml4 == &boot_pml4, 0)) - return -EINVAL; - - spinlock_lock(&task->page_lock); - - // delete all user pages and tables - for(i=0; ientries[i] & PG_USER) { - for(j=0; jentries[j] & PG_USER) { - for(k=0; kentries[k] & PG_USER) { - for(l=0; lentries[l] & PG_USER) - put_page(pgt->entries[l] & PAGE_MASK); - } - // TODO: put pgt - } - } - // TODO: put pgd - } - } - // TODO: put pdpt + if (put_page(*entry & ~PAGE_FLAGS_MASK)) + atomic_int32_dec(&task->user_usage); } + + return 0; } - put_page(virt_to_phys((size_t) pml4)); - task->page_map = NULL; + kprintf("drop_page_map: task = %u\n", task->id); // TODO: remove - spinlock_unlock(&task->page_lock); + // check assertions + if (BUILTIN_EXPECT(task->page_map == get_boot_page_map(), 0)) + return -EINVAL; + if (BUILTIN_EXPECT(!task || !task->page_map, 0)) + return -EINVAL; + + // lock tables + spinlock_irqsave_lock(&task->page_lock); + + int ret = page_iterate(0, PAGE_MAP_PGT, NULL, cb); // TODO: check boundaries + + pfree(task->page_map, PAGE_SIZE); + + // unlock tables + spinlock_irqsave_unlock(&task->page_lock); + + kprintf("drop_page_map: finished\n"); // TODO: remove return 0; -#endif } +static int set_page_flags(size_t viraddr, uint32_t npages, int flags) +{ + task_t* task = per_core(current_task); + + size_t bits = page_bits(flags); + size_t start = viraddr; + size_t end = start + npages * PAGE_SIZE; + + int cb(page_entry_t* entry, int level) { + if (level) { + if (flags & MAP_USER_SPACE) + *entry |= PG_USER; + } + else + *entry = (*entry & ~PAGE_FLAGS_MASK) | bits; + + tlb_flush_one_page(entry_to_virt(entry, level)); + + return 0; + } + + // check assertions + if (BUILTIN_EXPECT(start < KERNEL_SPACE && end >= KERNEL_SPACE, 0)) + return 0; + if (BUILTIN_EXPECT(!task || !task->page_map, 0)) + return 0; + + // lock tables + if (viraddr < KERNEL_SPACE) + spinlock_lock(&kslock); + else + spinlock_irqsave_lock(&task->page_lock); + + int ret = page_iterate(start, end, cb, NULL); + + // unlock tables + if (viraddr < KERNEL_SPACE) + spinlock_lock(&kslock); + else + spinlock_irqsave_lock(&task->page_lock); + + return ret; } size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags) { task_t* task = per_core(current_task); - size_t i, ret; + if (!viraddr) { + int vma_flags = VMA_HEAP; + if (flags & MAP_USER_SPACE) + vma_flags |= VMA_USER; + + viraddr = vma_alloc(npages * PAGE_SIZE, vma_flags); + } + + size_t bits = page_bits(flags); + size_t start = viraddr; + size_t end = start + npages * PAGE_SIZE; + + int cb(page_entry_t* entry, int level) { + if (level) { // PGD, PDPT, PML4.. + if (*entry & PG_PRESENT) { + if (flags & MAP_USER_SPACE) { + /* + * We are changing page map entries which cover + * the kernel. So before altering them we need to + * make a private copy for the task + */ + if (!(*entry & PG_USER)) { + size_t phyaddr = get_page(); + if (BUILTIN_EXPECT(!phyaddr, 0)) + return -ENOMEM; + + atomic_int32_inc(&task->user_usage); + + copy_page(phyaddr, *entry & ~PAGE_FLAGS_MASK); + *entry = phyaddr | (*entry & PAGE_FLAGS_MASK) | PG_USER; + + /* + * We just need to flush the table itself. + * TLB entries for the kernel remain valid + * because we've not changed them. + */ + tlb_flush_one_page(entry_to_virt(entry, 0)); + } + } + } + else { + size_t phyaddr = get_page(); + if (BUILTIN_EXPECT(!phyaddr, 0)) + return -ENOMEM; + + atomic_int32_inc(&task->user_usage); + + *entry = phyaddr | bits; + } + } + else { // PGT + if ((*entry & PG_PRESENT) && !(flags & MAP_REMAP)) + return -EINVAL; + + *entry = phyaddr | bits; + + if (flags & MAP_USER_SPACE) + atomic_int32_inc(&task->user_usage); + + if (flags & MAP_REMAP) + tlb_flush_one_page(entry_to_virt(entry, level)); + + phyaddr += PAGE_SIZE; + } + + return 0; + } + + kprintf("map_region: map %u pages from %#lx to %#lx with flags: %#x\n", npages, viraddr, phyaddr, flags); // TODO: remove + + // check assertions + if (BUILTIN_EXPECT(start < KERNEL_SPACE && end >= KERNEL_SPACE, 0)) + return 0; + if (BUILTIN_EXPECT(!task || !task->page_map, 0)) + return 0; + if (BUILTIN_EXPECT(!viraddr, 0)) + return 0; + + // lock tables + if (viraddr < KERNEL_SPACE) + spinlock_lock(&kslock); + else + spinlock_irqsave_lock(&task->page_lock); + + int ret = page_iterate(start, end, cb, NULL); + + // unlock tables + if (viraddr < KERNEL_SPACE) + spinlock_unlock(&kslock); + else + spinlock_irqsave_unlock(&task->page_lock); + + return (ret == 0) ? viraddr : 0; +} + +int unmap_region(size_t viraddr, uint32_t npages) +{ + task_t* task = per_core(current_task); + + size_t start = viraddr; + size_t end = start + npages * PAGE_SIZE; + + kprintf("unmap_region: unmap %u pages from %#lx\n", npages, viraddr); // TODO: remove + + int cb(page_entry_t* entry, int level) { + if (level) { // PGD, PDPT, PML4 + page_map_t* map = (page_map_t*) entry_to_virt(entry, 0); + int used = 0; + + int i; + for (i=0; ientries[i] & PG_PRESENT) + used++; + } + + if (!used) { + *entry &= ~PG_PRESENT; + tlb_flush_one_page(entry_to_virt(entry, 0)); + + if (put_page(*entry & ~PAGE_FLAGS_MASK)) + atomic_int32_dec(&task->user_usage); + } + } + else { // PGT + *entry = 0; + + tlb_flush_one_page(entry_to_virt(entry, level)); + + if (viraddr >= KERNEL_SPACE) + atomic_int32_dec(&task->user_usage); + } + + return 0; + } + + // check assertions + if (BUILTIN_EXPECT(start < KERNEL_SPACE && end >= KERNEL_SPACE, 0)) + return 0; if (BUILTIN_EXPECT(!task || !task->page_map, 0)) return 0; - if (!viraddr) { - kputs("map_region: deprecated vma_alloc() call from within map_region\n"); - viraddr = vma_alloc(npages*PAGE_SIZE, VMA_HEAP); - if (BUILTIN_EXPECT(!viraddr, 0)) { - kputs("map_region: found no valid virtual address\n"); - ret = 0; - goto out; - } - } - - kprintf("map_region: map %u pages from 0x%lx to 0x%lx with flags: 0x%x\n", npages, viraddr, phyaddr, flags); // TODO: remove - - // correct alignment - phyaddr &= PAGE_MASK; - viraddr &= PAGE_MASK; - ret = viraddr; - - if (flags & MAP_KERNEL_SPACE) + // lock tables + if (viraddr < KERNEL_SPACE) spinlock_lock(&kslock); - else + else spinlock_irqsave_lock(&task->page_lock); - for(i=0; i> PAGE_MAP_SHIFT)); + int ret = page_iterate(start, end, NULL, cb); - if (*pte && !(flags & MAP_REMAP)) { - kprintf("map_region: 0x%lx is already mapped\n", viraddr); - ret = 0; - goto out; - } - - if (flags & MAP_USER_SPACE) - *pte = phyaddr | USER_PAGE; - else - *pte = phyaddr | KERN_PAGE; - - if (flags & MAP_NO_CACHE) - *pte |= PG_PCD; - - if (flags & MAP_NO_ACCESS) - *pte &= ~PG_PRESENT; - - if (flags & MAP_WT) - *pte |= PG_PWT; - - if (flags & MAP_USER_SPACE) - atomic_int32_inc(&task->user_usage); - - tlb_flush_one_page(viraddr); - } - -out: - if (flags & MAP_KERNEL_SPACE) + // unlock tables + if (viraddr < KERNEL_SPACE) spinlock_unlock(&kslock); else spinlock_irqsave_unlock(&task->page_lock); @@ -506,131 +595,6 @@ out: return ret; } -int change_page_permissions(size_t start, size_t end, uint32_t flags) -{ -#if 0 - uint32_t index1, index2, newflags; - size_t viraddr = start & PAGE_MASK; - size_t phyaddr; - page_map_t* pgt; - page_map_t* pgd; - task_t* task = per_core(current_task); - - pgd = per_core(current_task)->page_map; - if (BUILTIN_EXPECT(!pgd, 0)) - return -EINVAL; - - spinlock_lock(&task->page_lock); - - while (viraddr < end) - { - index1 = viraddr >> 22; - index2 = (viraddr >> 12) & 0x3FF; - - while ((viraddr < end) && (index2 < 1024)) { - pgt = (page_map_t*) (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); - if (pgt && pgt->entries[index2]) { - phyaddr = pgt->entries[index2] & PAGE_MASK; - newflags = pgt->entries[index2] & 0xFFF; // get old flags - - if (!(newflags & PG_SVM_INIT)) { - if ((newflags & PG_SVM_STRONG) && !(newflags & PG_PRESENT) && (flags & (VMA_READ|VMA_WRITE) && !(flags & VMA_NOACCESS))) - newflags |= PG_PRESENT; - else if ((newflags & PG_SVM_STRONG) && (newflags & PG_PRESENT) && (flags & VMA_NOACCESS)) - newflags &= ~PG_PRESENT; - } - - // update flags - if (!(flags & VMA_WRITE)) { - newflags &= ~PG_RW; - } else { - newflags |= PG_RW; - } - - pgt->entries[index2] = (newflags & 0xFFF) | (phyaddr & PAGE_MASK); - - tlb_flush_one_page(viraddr); - } - - index2++; - viraddr += PAGE_SIZE; - } - } - - spinlock_unlock(&task->page_lock); -#endif - - return -EINVAL; -} - - -int unmap_region(size_t viraddr, uint32_t npages) -{ - task_t* task = per_core(current_task); - page_map_t* pdpt, * pgd, * pgt; - size_t i; - uint16_t index_pml4, index_pdpt; - uint16_t index_pgd, index_pgt; - - if (BUILTIN_EXPECT(!task || !task->page_map, 0)) - return -EINVAL; - - if (viraddr <= KERNEL_SPACE) - spinlock_lock(&kslock); - else - spinlock_irqsave_lock(&task->page_lock); - - i = 0; - while(i> 39) & 0x1FF; - index_pdpt = (viraddr >> 30) & 0x1FF; - index_pgd = (viraddr >> 21) & 0x1FF; - index_pgt = (viraddr >> 12) & 0x1FF; - - // currently, we allocate pages only in kernel space. - // => physical address of the page table is identical of the virtual address - pdpt = (page_map_t*) (task->page_map->entries[index_pml4] & PAGE_MASK); - if (!pdpt) { - viraddr += (size_t) PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES*PAGE_SIZE; - i += PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES; - continue; - } - - pgd = (page_map_t*) (pdpt->entries[index_pdpt] & PAGE_MASK); - if (!pgd) { - viraddr += PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES*PAGE_SIZE; - i += PAGE_MAP_ENTRIES*PAGE_MAP_ENTRIES; - continue; - } - - pgt = (page_map_t*) (pgd->entries[index_pgd] & PAGE_MASK); - if (!pgt) { - viraddr += PAGE_MAP_ENTRIES*PAGE_SIZE; - i += PAGE_MAP_ENTRIES; - continue; - } - - if (pgt->entries[index_pgt]) - pgt->entries[index_pgt] &= ~PG_PRESENT; - - viraddr +=PAGE_SIZE; - i++; - - if (viraddr > KERNEL_SPACE) - atomic_int32_dec(&task->user_usage); - - tlb_flush_one_page(viraddr); - } - - if (viraddr <= KERNEL_SPACE) - spinlock_unlock(&kslock); - else - spinlock_irqsave_unlock(&task->page_lock); - - return 0; -} - static void pagefault_handler(struct state *s) { task_t* task = per_core(current_task);