From ed2186ee03f50364aea42a42c0fcf689d8fa84f8 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Tue, 29 May 2012 20:47:45 +0200 Subject: [PATCH] add init code, which jumps to 64bit mode and and calls main --- Makefile.example | 3 +- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/{entry.asm => entry32.asm} | 0 arch/x86/kernel/entry64.asm | 1025 ++++++++++++++++++++ arch/x86/kernel/gdt.c | 2 +- arch/x86/mm/Makefile | 2 +- arch/x86/mm/{page.c => page32.c} | 0 arch/x86/mm/page64.c | 871 +++++++++++++++++ kernel/main.c | 7 +- 9 files changed, 1907 insertions(+), 5 deletions(-) rename arch/x86/kernel/{entry.asm => entry32.asm} (100%) create mode 100644 arch/x86/kernel/entry64.asm rename arch/x86/mm/{page.c => page32.c} (100%) create mode 100644 arch/x86/mm/page64.c diff --git a/Makefile.example b/Makefile.example index db58a6cc..da2779da 100644 --- a/Makefile.example +++ b/Makefile.example @@ -46,6 +46,7 @@ ARFLAGS = rsv LDFLAGS = -T link$(BIT).ld -z max-page-size=4096 --defsym __BUILD_DATE=$(shell date +'%Y%m%d') --defsym __BUILD_TIME=$(shell date +'%H%M%S') STRIP_DEBUG = --strip-debug KEEP_DEBUG = --only-keep-debug +OUTPUT_FORMAT = -O elf32-i386 CFLAGS_FOR_NEWLIB = -m32 -march=i586 -O2 $(STACKPROT) LDFLAGS_FOR_NEWLIB = -m32 -march=i586 NASMFLAGS_FOR_NEWLIB = -felf32 @@ -83,7 +84,7 @@ $(NAME).elf: @echo [OBJCOPY] $(NAME).sym $Q$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $(NAME).elf $(NAME).sym @echo [OBJCOPY] $(NAME).elf - $Q$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $(NAME).elf + $Q$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $(OUTPUT_FORMAT) $(NAME).elf qemu: newlib tools $(NAME).elf $(QEMU) -monitor stdio -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 95fd49cf..1145b742 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -1,5 +1,5 @@ C_source := gdt.c kb.c timer.c irq.c isrs.c idt.c vga.c multiboot.c apic.c pci.c processor.c -ASM_source := entry.asm string$(BIT).asm +ASM_source := entry$(BIT).asm string$(BIT).asm MODULE := arch_x86_kernel include $(TOPDIR)/Makefile.inc diff --git a/arch/x86/kernel/entry.asm b/arch/x86/kernel/entry32.asm similarity index 100% rename from arch/x86/kernel/entry.asm rename to arch/x86/kernel/entry32.asm diff --git a/arch/x86/kernel/entry64.asm b/arch/x86/kernel/entry64.asm new file mode 100644 index 00000000..bb06c3b1 --- /dev/null +++ b/arch/x86/kernel/entry64.asm @@ -0,0 +1,1025 @@ +; +; Copyright 2010 Stefan Lankes, Chair for Operating Systems, +; RWTH Aachen University +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; This file is part of MetalSVM. + +; This is the kernel's entry point. We could either call main here, +; or we can use this to setup the stack or other nice stuff, like +; perhaps setting up the GDT and segments. Please note that interrupts +; are disabled at this point: More on interrupts later! + +%include "config.inc" + +[BITS 32] +; We use a special name to map this section at the begin of our kernel +; => Multiboot needs its magic number at the begin of the kernel +SECTION .mboot +global start +start: + jmp stublet + +; This part MUST be 4byte aligned, so we solve that issue using 'ALIGN 4' +ALIGN 4 +mboot: + ; Multiboot macros to make a few lines more readable later + MULTIBOOT_PAGE_ALIGN equ 1<<0 + MULTIBOOT_MEMORY_INFO equ 1<<1 + ; MULTIBOOT_AOUT_KLUDGE equ 1<<16 + MULTIBOOT_HEADER_MAGIC equ 0x1BADB002 + MULTIBOOT_HEADER_FLAGS equ MULTIBOOT_PAGE_ALIGN | MULTIBOOT_MEMORY_INFO ; | MULTIBOOT_AOUT_KLUDGE + MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS) + EXTERN code, bss, end + + ; This is the GRUB Multiboot header. A boot signature + dd MULTIBOOT_HEADER_MAGIC + dd MULTIBOOT_HEADER_FLAGS + dd MULTIBOOT_CHECKSUM + + ; AOUT kludge - must be physical addresses. Make a note of these: + ; The linker script fills in the data for these ones! + ; dd mboot + ; dd code + ; dd bss + ; dd end + ; dd start + +ALIGN 4 +; we need already a valid GDT to switch in the 64bit modus +GDT64: ; Global Descriptor Table (64-bit). + .Null: equ $ - GDT64 ; The null descriptor. + dw 0 ; Limit (low). + dw 0 ; Base (low). + db 0 ; Base (middle) + db 0 ; Access. + db 0 ; Granularity. + db 0 ; Base (high). + .Code: equ $ - GDT64 ; The code descriptor. + dw 0 ; Limit (low). + dw 0 ; Base (low). + db 0 ; Base (middle) + db 10011000b ; Access. + db 00100000b ; Granularity. + db 0 ; Base (high). + .Data: equ $ - GDT64 ; The data descriptor. + dw 0 ; Limit (low). + dw 0 ; Base (low). + db 0 ; Base (middle) + db 10010010b ; Access. + db 00000000b ; Granularity. + db 0 ; Base (high). + .Pointer: ; The GDT-pointer. + dw $ - GDT64 - 1 ; Limit. + dq GDT64 ; Base. + + times 256 DD 0 +startup_stack: + +SECTION .data +; create default page tables for the 64bit kernel +global boot_pgd ; aka PML4 +ALIGN 4096 ; of course, the page tables have to be page aligned +NOPTS equ 2 +boot_pgd times 512 DQ 0 +pdpt times 512 DQ 0 +pd times 512 DQ 0 +pt times (NOPTS*512) DQ 0 + +SECTION .text +ALIGN 4 +stublet: + mov esp, startup_stack-4 + push ebx ; save pointer to multiboot structure + mov eax, cr0 +; enable caching, disable paging and fpu emulation + and eax, 0x1ffffffb +; ...and turn on FPU exceptions + or eax, 0x22 + mov cr0, eax +; clears the current pgd entry + xor eax, eax + mov cr3, eax +; at this stage, we disable the SSE support + mov eax, cr4 + and eax, 0xfffbf9ff + mov cr4, eax +; do we have the instruction cpuid? + pushfd + pop eax + mov ecx, eax + xor eax, 1 << 21 + push eax + popfd + pushfd + pop eax + push ecx + popfd + xor eax, ecx + jz Linvalid +; cpuid > 0x80000000? + mov eax, 0x80000000 + cpuid + cmp eax, 0x80000001 + jb Linvalid ; It is less, there is no long mode. +; do we have a long mode? + mov eax, 0x80000001 + cpuid + test edx, 1 << 29 ; Test if the LM-bit, which is bit 29, is set in the D-register. + jz Linvalid ; They aren't, there is no long mode. + +; initialize page table + mov edi, boot_pgd + mov cr3, edi + +; So lets make PML4T[0] point to the PDPT and so on: + mov DWORD [edi], pdpt ; Set the double word at the destination index to pdpt. + or DWORD [edi], 0x00000003 ; Set present and writeable bit + mov edi, pdpt + mov DWORD [edi], pd ; Set the double word at the destination index to pd. + or DWORD [edi], 0x00000003 ; Set present and writeable bit + mov edi, pd + mov ebx, pt + mov ecx, NOPTS +L0: + mov DWORD [edi], ebx ; Set the double word at the destination index to pt. + or DWORD [edi], 0x00000003 ; Set present and writeable bit + add edi, 8 + add ebx, 0x1000 + loop L0 + + ; map the VGA address into the virtual address space + mov edi, 0xB8000 + shr edi, 9 ; (edi >> 12) * 8 + add edi, pt + mov ebx, 0xB8000 + or ebx, 0x00000003 + mov DWORD [edi], ebx + + extern kernel_start ; defined in linker script + extern kernel_end + mov edi, kernel_start + shr edi, 9 ; (kernel_start >> 12) * 8 + add edi, pt + mov ebx, kernel_start + or ebx, 0x00000003 + mov ecx, kernel_end ; determine kernel size in number of pages + sub ecx, kernel_start + shr ecx, 12 + inc ecx + +L1: + mov DWORD [edi], ebx ; Set the double word at the destination index to the B-register. + add edi, 8 + add ebx, 0x1000 + loop L1 + +; we need to enable PAE modus + mov eax, cr4 + or eax, 1 << 5 + mov cr4, eax + +; switch to the compatibility mode (which is part of long mode) + mov ecx, 0xC0000080 + rdmsr + or eax, 1 << 8 + wrmsr + +; enable paging + mov eax, cr0 + or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PM-bit, which is the 0th bit. + mov cr0, eax + + pop ebx ; restore pointer to multiboot structure + lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table. + jmp GDT64.Code:start64 ; Set the code segment and enter 64-bit long mode. + +Linvalid: + jmp $ + +[BITS 64] +start64: +; initialize segment registers + mov ax, GDT64.Data + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + mov ss, ax +; set default stack pointer + extern default_stack_pointer + mov rsp, default_stack_pointer +; interpret multiboot information + extern multiboot_init + mov rdi, rbx + call multiboot_init + ; jump to the boot processors's C code + extern main + call main + jmp $ + +global cpu_init +cpu_init: +; mov eax, cr0 +; enable caching, disable paging and fpu emulation +; and eax, 0x1ffffffb +; ...and turn on FPU exceptions +; or eax, 0x22 +; mov cr0, eax +; clears the current pgd entry +; xor eax, eax +; mov cr3, eax +; at this stage, we disable the SSE support +; mov eax, cr4 +; and eax, 0xfffbf9ff +; mov cr4, eax +; ret + +; This will set up our new segment registers. We need to do +; something special in order to set CS. We do what is called a +; far jump. A jump that includes a segment as well as an offset. +; This is declared in C as 'extern void gdt_flush();' +global gdt_flush +extern gp +gdt_flush: +; lgdt [gp] +; mov ax, 0x10 +; mov ds, ax +; mov es, ax +; mov fs, ax +; mov gs, ax +; mov ss, ax +; jmp 0x08:flush2 +flush2: + ret + +; determines the current instruction pointer (after the jmp) +global read_eip +read_eip: + pop rax ; Get the return address + jmp rax ; Return. Can't use RET because return + ; address popped off the stack. + +; In just a few pages in this tutorial, we will add our Interrupt +; Service Routines (ISRs) right here! +global isr0 +global isr1 +global isr2 +global isr3 +global isr4 +global isr5 +global isr6 +global isr7 +global isr8 +global isr9 +global isr10 +global isr11 +global isr12 +global isr13 +global isr14 +global isr15 +global isr16 +global isr17 +global isr18 +global isr19 +global isr20 +global isr21 +global isr22 +global isr23 +global isr24 +global isr25 +global isr26 +global isr27 +global isr28 +global isr29 +global isr30 +global isr31 +global isrsyscall + +; 0: Divide By Zero Exception +isr0: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 0 + jmp common_stub + +; 1: Debug Exception +isr1: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 1 + jmp common_stub + +; 2: Non Maskable Interrupt Exception +isr2: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 2 + jmp common_stub + +; 3: Int 3 Exception +isr3: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 3 + jmp common_stub + +; 4: INTO Exception +isr4: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 4 + jmp common_stub + +; 5: Out of Bounds Exception +isr5: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 5 + jmp common_stub + +; 6: Invalid Opcode Exception +isr6: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 6 + jmp common_stub + +; 7: Coprocessor Not Available Exception +isr7: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 7 + jmp common_stub + +; 8: Double Fault Exception (With Error Code!) +isr8: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 8 + jmp common_stub + +; 9: Coprocessor Segment Overrun Exception +isr9: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 9 + jmp common_stub + +; 10: Bad TSS Exception (With Error Code!) +isr10: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 10 + jmp common_stub + +; 11: Segment Not Present Exception (With Error Code!) +isr11: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 11 + jmp common_stub + +; 12: Stack Fault Exception (With Error Code!) +isr12: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 12 + jmp common_stub + +; 13: General Protection Fault Exception (With Error Code!) +isr13: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 13 + jmp common_stub + +; 14: Page Fault Exception (With Error Code!) +isr14: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 14 + jmp common_stub + +; 15: Reserved Exception +isr15: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 15 + jmp common_stub + +; 16: Floating Point Exception +isr16: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 16 + jmp common_stub + +; 17: Alignment Check Exception +isr17: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 17 + jmp common_stub + +; 18: Machine Check Exception +isr18: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 18 + jmp common_stub + +; 19: Reserved +isr19: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 19 + jmp common_stub + +; 20: Reserved +isr20: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 20 + jmp common_stub + +; 21: Reserved +isr21: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 21 + jmp common_stub + +; 22: Reserved +isr22: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 22 + jmp common_stub + +; 23: Reserved +isr23: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 23 + jmp common_stub + +; 24: Reserved +isr24: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 24 + jmp common_stub + +; 25: Reserved +isr25: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 25 + jmp common_stub + +; 26: Reserved +isr26: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 26 + jmp common_stub + +; 27: Reserved +isr27: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 27 + jmp common_stub + +; 28: Reserved +isr28: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 28 + jmp common_stub + +; 29: Reserved +isr29: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 29 + jmp common_stub + +; 30: Reserved +isr30: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 30 + jmp common_stub + +; 31: Reserved +isr31: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 31 + jmp common_stub + +extern syscall_handler + +; used to realize system calls +isrsyscall: + push r15 + push r14 + push r13 + push r12 + push r11 + push r10 + push r9 + push r8 + push rdi + push rsi + push rbp + push Qword 0 + push rbx + push rdx + push rcx + push rax + + mov rdi, rsp + call syscall_handler + + pop rax + pop rcx + pop rdx + pop rbx + add rsp, 1*8 + pop rbp + pop rsi + pop rdi + pop r8 + pop r9 + pop r10 + pop r11 + pop r12 + pop r13 + pop r14 + iret + +global irq0 +global irq1 +global irq2 +global irq3 +global irq4 +global irq5 +global irq6 +global irq7 +global irq8 +global irq9 +global irq10 +global irq11 +global irq12 +global irq13 +global irq14 +global irq15 +global irq16 +global irq17 +global irq18 +global irq19 +global irq20 +global irq21 +global irq22 +global irq23 +global apic_timer +global apic_lint0 +global apic_lint1 +global apic_error +global apic_svr + +; 32: IRQ0 +irq0: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 32 + jmp common_stub + +; 33: IRQ1 +irq1: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 33 + jmp common_stub + +; 34: IRQ2 +irq2: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 34 + jmp common_stub + +; 35: IRQ3 +irq3: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 35 + jmp common_stub + +; 36: IRQ4 +irq4: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 36 + jmp common_stub + +; 37: IRQ5 +irq5: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 37 + jmp common_stub + +; 38: IRQ6 +irq6: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 38 + jmp common_stub + +; 39: IRQ7 +irq7: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 39 + jmp common_stub + +; 40: IRQ8 +irq8: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 40 + jmp common_stub + +; 41: IRQ9 +irq9: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 41 + jmp common_stub + +; 42: IRQ10 +irq10: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 42 + jmp common_stub + +; 43: IRQ11 +irq11: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 43 + jmp common_stub + +; 44: IRQ12 +irq12: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 44 + jmp common_stub + +; 45: IRQ13 +irq13: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 45 + jmp common_stub + +; 46: IRQ14 +irq14: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 46 + jmp common_stub + +; 47: IRQ15 +irq15: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 47 + jmp common_stub + +; 48: IRQ16 +irq16: + ; irq16 - irq23 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 48 + jmp common_stub + +; 49: IRQ17 +irq17: + ; irq16- irq23 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 49 + jmp common_stub + +; 50: IRQ18 +irq18: + ; irq16 - irq23 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 50 + jmp common_stub + +; 51: IRQ19 +irq19: + ; irq16 - irq23 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 51 + jmp common_stub + +; 52: IRQ20 +irq20: + ; irq16- irq23 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 52 + jmp common_stub + +; 53: IRQ21 +irq21: + ; irq16 - irq23 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; error code + push byte 53 + jmp common_stub + +; 54: IRQ22 +irq22: + ; irq16- irq23 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 54 + jmp common_stub + +; 55: IRQ23 +irq23: + ; irq16 - irq23 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 55 + jmp common_stub + +apic_timer: + ; apic timer is registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 123 + jmp common_stub + +apic_lint0: + ; lint0 is registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 124 + jmp common_stub + +apic_lint1: + ; lint1 is registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 125 + jmp common_stub + +apic_error: + ; LVT error interrupt is registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 126 + jmp common_stub + +apic_svr: + ; SVR is registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 127 + jmp common_stub + +extern irq_handler +extern get_current_stack +extern finish_task_switch +extern apic_cpu_id +extern task_state_segments + +global switch_context +ALIGN 8 +switch_context: + ; create on the stack a pseudo interrupt + ; afterwards, we switch to the task with iret + mov rax, [rdi] ; rdi contains the address to store the old rsp + pushf ; EFLAGS + push QWORD 0x8 ; CS + push QWORD rollback ; RIP + push QWORD 0 ; Interrupt number + push QWORD 0x0edbabe ; Error code + push rax + push rcx + push rdx + push rbx + push Qword 0 + push rbp + push rsi + push rdi + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + + jmp common_switch + +ALIGN 8 +rollback: + ret + +ALIGN 8 +common_stub: + push rax + push rcx + push rdx + push rbx + push Qword 0 + push rbp + push rsi + push rdi + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + + ; use the same handler for interrupts and exceptions + mov rdi, rsp + call irq_handler + + cmp rax, 0 + je no_context_switch + +common_switch: + mov [rax], rsp ; store old rsp + call get_current_stack ; get new rsp + xchg rax, rsp + + ; determine TSS +%if MAX_CORES > 1 + call apic_cpu_id + mov rcx, QWORD 0x68 + mul rcx +%else + xor rax, rax +%endif + add eax, task_state_segments + ; set rsp0 in TSS + mov [rax+4], rsp + + ; call cleanup code + call finish_task_switch + +no_context_switch: + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop rdi + pop rsi + pop rbp + add rsp, 1*8 + pop rbx + pop rdx + pop rcx + pop rax + + add rsp, 16 + iret + +SECTION .note.GNU-stack noalloc noexec nowrite progbits diff --git a/arch/x86/kernel/gdt.c b/arch/x86/kernel/gdt.c index d946bb20..84251c3b 100644 --- a/arch/x86/kernel/gdt.c +++ b/arch/x86/kernel/gdt.c @@ -29,7 +29,7 @@ gdt_ptr_t gp; tss_t task_state_segments[MAX_CORES] __attribute__ ((aligned (PAGE_SIZE))); static unsigned char kstacks[MAX_TASKS][KERNEL_STACK_SIZE] __attribute__ ((aligned (PAGE_SIZE))) = {[0 ... MAX_TASKS-1][0 ... KERNEL_STACK_SIZE-1] = 0xCD}; -size_t default_stack_pointer = (size_t) kstacks[0] + KERNEL_STACK_SIZE - sizeof(size_t); +size_t default_stack_pointer = (size_t) kstacks[0] + KERNEL_STACK_SIZE - sizeof(size_t); // currently, our kernel has full access to the ioports static gdt_entry_t gdt[GDT_ENTRIES] = {[0 ... GDT_ENTRIES-1] = {0, 0, 0, 0, 0, 0}}; diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 09392539..c5b4f064 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,4 +1,4 @@ -C_source := page.c svm.c +C_source := page$(BIT).c svm.c MODULE := arch_x86_mm include $(TOPDIR)/Makefile.inc diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page32.c similarity index 100% rename from arch/x86/mm/page.c rename to arch/x86/mm/page32.c diff --git a/arch/x86/mm/page64.c b/arch/x86/mm/page64.c new file mode 100644 index 00000000..5312b413 --- /dev/null +++ b/arch/x86/mm/page64.c @@ -0,0 +1,871 @@ +/* + * Copyright 2012 Stefan Lankes, Chair for Operating Systems, + * RWTH Aachen University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file is part of MetalSVM. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_ROCKCREEK +#include +#include +#include +#include +#endif + +/* + * Virtual Memory Layout of the standard configuration + * (1 GB kernel space) + * + * 0x00000000 - 0x000FFFFF: reserved for IO devices (16MB) + * 0x00100000 - 0x0DEADFFF: Kernel (size depends on the configuration) (221MB) + * 0x0DEAE000 - 0x3FFFEFFF: Kernel heap (801MB) + * 0x3FFFF000 - 0x3FFFFFFF: Page Tables are mapped in this region (4KB) + * (The last 256 entries belongs to kernel space) + */ + +/* + * Note that linker symbols are not variables, they have no memory allocated for + * maintaining a value, rather their address is their value. + */ +extern const void kernel_start; +extern const void kernel_end; + +// boot task's page directory and page directory lock +static page_dir_t boot_pgd = {{[0 ... PGT_ENTRIES-1] = 0}}; +static spinlock_t kslock = SPINLOCK_INIT; +//static int paging_enabled = 0; + +page_dir_t* get_boot_pgd(void) +{ + return &boot_pgd; +} + +#if 0 +/* + * TODO: We create a full copy of the current task. Copy-On-Access will be the better solution. + * + * No PGD locking is needed because onls create_pgd use this function and holds already the + * PGD lock. + */ +inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_table_t* pgt, int* counter) +{ + uint32_t i; + page_table_t* new_pgt; + size_t phyaddr; + +#ifdef CONFIG_X86_32 + if (BUILTIN_EXPECT(!pgt, 0)) + return 0; + + new_pgt = kmalloc(sizeof(page_table_t)); + if (!new_pgt) + return 0; + memset(new_pgt, 0x00, sizeof(page_table_t)); + if (counter) + (*counter)++; + + for(i=0; i<1024; i++) { + if (pgt->entries[i] & PAGE_MASK) { + if (!(pgt->entries[i] & PG_USER)) { + // Kernel page => copy only page entries + new_pgt->entries[i] = pgt->entries[i]; + continue; + } + + phyaddr = get_page(); + if (!phyaddr) + continue; + if (counter) + (*counter)++; + + copy_page_physical((void*)phyaddr, (void*) (pgt->entries[i] & PAGE_MASK)); + + new_pgt->entries[i] = phyaddr | (pgt->entries[i] & 0xFFF); + + atomic_int32_inc(&task->user_usage); + } + } + + phyaddr = virt_to_phys((size_t)new_pgt); + + return phyaddr; +#else +#warning Currently, not supported + return 0; +#endif +} +#endif + +int create_pgd(task_t* task, int copy) +{ +#if 0 + page_dir_t* pgd; + page_table_t* pgt; + page_table_t* pgt_container; + uint32_t i; + uint32_t index1, index2; + size_t viraddr, phyaddr; + int counter = 0; + task_t* curr_task = per_core(current_task); + + if (BUILTIN_EXPECT(!paging_enabled, 0)) + return -EINVAL; + + // we already know the virtual address of the "page table container" + // (see file header) + pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK); + + // create new page directory for the new task + pgd = kmalloc(sizeof(page_dir_t)); + if (!pgd) + return -ENOMEM; + memset(pgd, 0x00, sizeof(page_dir_t)); + + // create a new "page table container" for the new task + pgt = kmalloc(sizeof(page_table_t)); + if (!pgt) { + kfree(pgd, sizeof(page_dir_t)); + return -ENOMEM; + } + memset(pgt, 0x00, sizeof(page_table_t)); + + spinlock_lock(&kslock); + + for(i=0; i<1024; i++) { + pgd->entries[i] = boot_pgd.entries[i]; + // only kernel entries will be copied + if (pgd->entries[i] && !(pgd->entries[i] & PG_USER)) + pgt->entries[i] = pgt_container->entries[i]; + } + + spinlock_unlock(&kslock); + + // map page table container at the end of the kernel space + viraddr = (KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK; + index1 = viraddr >> 22; + index2 = (viraddr >> 12) & 0x3FF; + + // now, we create a self reference + pgd->entries[index1] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_TABLE; + pgt->entries[index2] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_PAGE; + + task->pgd = pgd; + + if (copy) { + spinlock_lock(&curr_task->pgd_lock); + + for (i=KERNEL_SPACE/(1024*PAGE_SIZE); i<1024; i++) { + if (!(curr_task->pgd->entries[i])) + continue; + if (!(curr_task->pgd->entries[i] & PG_USER)) + continue; + + phyaddr = copy_page_table(task, i, (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + i*PAGE_SIZE) & PAGE_MASK), &counter); + if (phyaddr) { + pgd->entries[i] = (phyaddr & PAGE_MASK) | (curr_task->pgd->entries[i] & 0xFFF); + pgt->entries[i] = (phyaddr & PAGE_MASK) | KERN_PAGE; + } + } + + spinlock_unlock(&curr_task->pgd_lock); + } + + return counter; +#endif + + return 0; +} + +/* + * drops all page frames and the PGD of a user task + */ +int drop_pgd(void) +{ +#if 0 + page_dir_t* pgd = per_core(current_task)->pgd; + size_t phy_pgd = virt_to_phys((size_t) pgd); + task_t* task = per_core(current_task); + uint32_t i; + + if (BUILTIN_EXPECT(pgd == &boot_pgd, 0)) + return -EINVAL; + + spinlock_lock(&task->pgd_lock); + + for(i=0; i<1024; i++) { + if (pgd->entries[i] & PG_USER) { + put_page(pgd->entries[i] & PAGE_MASK); + pgd->entries[i] = 0; + } + } + + // freeing the page directory + put_page(phy_pgd); + + task->pgd = NULL; + + spinlock_unlock(&task->pgd_lock); +#endif + + return 0; +} + +size_t virt_to_phys(size_t viraddr) +{ +#if 0 + task_t* task = per_core(current_task); + uint32_t index1, index2; + page_table_t* pgt; + size_t ret = 0; + + if (!paging_enabled) + return viraddr; + + if (BUILTIN_EXPECT(!task || !task->pgd, 0)) + return 0; + + spinlock_lock(&task->pgd_lock); + + index1 = viraddr >> 22; + index2 = (viraddr >> 12) & 0x3FF; + + if (!(task->pgd->entries[index1] & PAGE_MASK)) + goto out; + + pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); + if (!pgt || !(pgt->entries[index2])) + goto out; + + ret = pgt->entries[index2] & PAGE_MASK; // determine page frame + ret = ret | (viraddr & 0xFFF); // add page offset +out: + //kprintf("vir %p to phy %p\n", viraddr, ret); + + spinlock_unlock(&task->pgd_lock); + + return ret; +#endif + return 0; +} + +size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags) +{ +#if 0 + task_t* task = per_core(current_task); + spinlock_t* pgd_lock; + page_table_t* pgt; + size_t index, i; + size_t ret; + + if (BUILTIN_EXPECT(!task || !task->pgd, 0)) + return 0; + + if (BUILTIN_EXPECT(!paging_enabled && (viraddr != phyaddr), 0)) + return 0; + + if (flags & MAP_KERNEL_SPACE) + pgd_lock = &kslock; + else + pgd_lock = &task->pgd_lock; + + spinlock_lock(pgd_lock); + + if (!viraddr) { + viraddr = vm_alloc(npages, flags); + if (BUILTIN_EXPECT(!viraddr, 0)) { + spinlock_unlock(pgd_lock); + kputs("map_adress: found no valid virtual address\n"); + return 0; + } + } + + ret = viraddr; + //kprintf("map %d pages from %p to %p\n", npages, phyaddr, ret); + for(i=0; i> 22; + + if (!(task->pgd->entries[index])) { + page_table_t* pgt_container; + + pgt = (page_table_t*) get_pages(1); + if (BUILTIN_EXPECT(!pgt, 0)) { + spinlock_unlock(pgd_lock); + kputs("map_address: out of memory\n"); + return 0; + } + + // set the new page table into the directory + if (flags & MAP_USER_SPACE) + task->pgd->entries[index] = (size_t)pgt|USER_TABLE; + else + task->pgd->entries[index] = (size_t)pgt|KERN_TABLE; + + // if paging is already enabled, we need to use the virtual address + if (paging_enabled) + // we already know the virtual address of the "page table container" + // (see file header) + pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK); + else + pgt_container = (page_table_t*) (task->pgd->entries[(KERNEL_SPACE - PAGE_SIZE) >> 22] & PAGE_MASK); + + if (BUILTIN_EXPECT(!pgt_container, 0)) { + spinlock_unlock(pgd_lock); + kputs("map_address: internal error\n"); + return 0; + } + + // map the new table into the address space of the kernel space + pgt_container->entries[index] = ((size_t) pgt)|KERN_PAGE; + + // clear the page table + if (paging_enabled) + memset((void*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK), 0x00, PAGE_SIZE); + else + memset(pgt, 0x00, PAGE_SIZE); + } else pgt = (page_table_t*) (task->pgd->entries[index] & PAGE_MASK); + + /* convert physical address to virtual */ + if (paging_enabled) + pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK); + + index = (viraddr >> 12) & 0x3FF; + if (pgt->entries[index] && !(flags & MAP_REMAP)) { + spinlock_unlock(pgd_lock); + kprintf("0x%x is already mapped\n", viraddr); + return 0; + } + + if (flags & MAP_USER_SPACE) + pgt->entries[index] = USER_PAGE|(phyaddr & PAGE_MASK); + else + pgt->entries[index] = KERN_PAGE|(phyaddr & PAGE_MASK); + + if (flags & MAP_NO_CACHE) + pgt->entries[index] |= PG_PCD; +#ifdef CONFIG_ROCKCREEK + if (flags & MAP_MPE) + pgt->entries[index] |= PG_MPE; +#endif + if (flags & MAP_SVM_STRONG) +#ifndef SVM_WB + pgt->entries[index] |= PG_SVM_STRONG|PG_PWT; +#else + pgt->entries[index] |= PG_SVM; +#endif + if (flags & MAP_SVM_LAZYRELEASE) + pgt->entries[index] |= PG_SVM_LAZYRELEASE|PG_PWT; + + if (flags & MAP_SVM_INIT) + pgt->entries[index] |= PG_SVM_INIT; + + if (flags & MAP_NO_ACCESS) + pgt->entries[index] &= ~PG_PRESENT; + + if (flags & MAP_WT) + pgt->entries[index] |= PG_PWT; + + if (flags & MAP_USER_SPACE) + atomic_int32_inc(&task->user_usage); + + tlb_flush_one_page(viraddr); + } + + spinlock_unlock(pgd_lock); + + return ret; +#endif + + return 0; +} + +int change_page_permissions(size_t start, size_t end, uint32_t flags) +{ +#if 0 + uint32_t index1, index2, newflags; + size_t viraddr = start & PAGE_MASK; + size_t phyaddr; + page_table_t* pgt; + page_dir_t* pgd; + task_t* task = per_core(current_task); + + if (BUILTIN_EXPECT(!paging_enabled, 0)) + return -EINVAL; + + pgd = per_core(current_task)->pgd; + if (BUILTIN_EXPECT(!pgd, 0)) + return -EINVAL; + + spinlock_lock(&task->pgd_lock); + + while (viraddr < end) + { + index1 = viraddr >> 22; + index2 = (viraddr >> 12) & 0x3FF; + + while ((viraddr < end) && (index2 < 1024)) { + pgt = (page_table_t*) (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); + if (pgt && pgt->entries[index2]) { + phyaddr = pgt->entries[index2] & PAGE_MASK; + newflags = pgt->entries[index2] & 0xFFF; // get old flags + + if (!(newflags & PG_SVM_INIT)) { + if ((newflags & PG_SVM_STRONG) && !(newflags & PG_PRESENT) && (flags & (VMA_READ|VMA_WRITE) && !(flags & VMA_NOACCESS))) + newflags |= PG_PRESENT; + else if ((newflags & PG_SVM_STRONG) && (newflags & PG_PRESENT) && (flags & VMA_NOACCESS)) + newflags &= ~PG_PRESENT; + } + + // update flags + if (!(flags & VMA_WRITE)) { + newflags &= ~PG_RW; +#ifdef CONFIG_ROCKCREEK + if (newflags & (PG_SVM_STRONG|PG_SVM_LAZYRELEASE)) + newflags &= ~PG_MPE; +#endif + } else { + newflags |= PG_RW; +#ifdef CONFIG_ROCKCREEK + if (newflags & (PG_SVM_STRONG|PG_SVM_LAZYRELEASE)) + newflags |= PG_MPE; +#endif + } + + pgt->entries[index2] = (newflags & 0xFFF) | (phyaddr & PAGE_MASK); + + tlb_flush_one_page(viraddr); + } + + index2++; + viraddr += PAGE_SIZE; + } + } + + spinlock_unlock(&task->pgd_lock); +#endif + + return 0; +} + +/* + * Use the first fit algorithm to find a valid address range + * + * TODO: O(n) => bad performance, we need a better approach + */ +size_t vm_alloc(uint32_t npages, uint32_t flags) +{ +#if 0 + task_t* task = per_core(current_task); + spinlock_t* pgd_lock; + uint32_t index1, index2, j; + size_t viraddr, i, ret = 0; + size_t start, end; + page_table_t* pgt; + + if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0)) + return 0; + + if (flags & MAP_KERNEL_SPACE) { + pgd_lock = &kslock; + start = (((size_t) &kernel_end) + PAGE_SIZE) & PAGE_MASK; + end = (KERNEL_SPACE - 2*PAGE_SIZE) & PAGE_MASK; // we need 1 PAGE for our PGTs + } else { + pgd_lock = &task->pgd_lock; + start = KERNEL_SPACE & PAGE_MASK; + end = PAGE_MASK; + } + + if (BUILTIN_EXPECT(!npages, 0)) + return 0; + + spinlock_lock(pgd_lock); + + viraddr = i = start; + j = 0; + do { + index1 = i >> 22; + index2 = (i >> 12) & 0x3FF; + + pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); + if (!pgt || !(pgt->entries[index2])) { + i+=PAGE_SIZE; + j++; + } else { + // restart search + j = 0; + viraddr = i + PAGE_SIZE; + i = i + PAGE_SIZE; + } + } while((j < npages) && (i<=end)); + + if ((j >= npages) && (viraddr < end)) + ret = viraddr; + + spinlock_unlock(pgd_lock); + + return ret; +#endif + return 0; +} + +int unmap_region(size_t viraddr, uint32_t npages) +{ +#if 0 + task_t* task = per_core(current_task); + spinlock_t* pgd_lock; + uint32_t i; + uint32_t index1, index2; + page_table_t* pgt; + + if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0)) + return -EINVAL; + + if (viraddr <= KERNEL_SPACE) + pgd_lock = &kslock; + else + pgd_lock = &task->pgd_lock; + + spinlock_lock(pgd_lock); + + for(i=0; i> 22; + index2 = (viraddr >> 12) & 0x3FF; + + pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); + if (!pgt) + continue; + pgt->entries[index2] &= ~PG_PRESENT; + + if (viraddr > KERNEL_SPACE) + atomic_int32_dec(&task->user_usage); + + tlb_flush_one_page(viraddr); + } + + spinlock_unlock(pgd_lock); +#endif + + return 0; +} + +int vm_free(size_t viraddr, uint32_t npages) +{ +#if 0 + task_t* task = per_core(current_task); + spinlock_t* pgd_lock; + uint32_t i; + uint32_t index1, index2; + page_table_t* pgt; + + if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0)) + return -EINVAL; + + if (viraddr <= KERNEL_SPACE) + pgd_lock = &kslock; + else + pgd_lock = &task->pgd_lock; + + spinlock_lock(pgd_lock); + + for(i=0; i> 22; + index2 = (viraddr >> 12) & 0x3FF; + + pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); + if (!pgt) + continue; + pgt->entries[index2] = 0; + + tlb_flush_one_page(viraddr); + } + + spinlock_unlock(pgd_lock); +#endif + + return 0; +} + +#if 0 +int print_paging_tree(size_t viraddr) +{ + task_t* task = per_core(current_task); + uint32_t index1, index2; + page_dir_t* pgd = NULL; + page_table_t* pgt = NULL; + + if (BUILTIN_EXPECT(!viraddr, 0)) + return -EINVAL; + + index1 = viraddr >> 22; + index2 = (viraddr >> 12) & 0x3FF; + + spinlock_lock(&task->pgd_lock); + + kprintf("Paging dump of address 0x%x\n", viraddr); + pgd = task->pgd; + kprintf("\tPage directory entry %u: ", index1); + if (pgd) { + kprintf("0x%0x\n", pgd->entries[index1]); + pgt = (page_table_t*) (pgd->entries[index1] & PAGE_MASK); + } else + kputs("invalid page directory\n"); + + /* convert physical address to virtual */ + if (paging_enabled && pgt) + pgt = (page_table_t*) (KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE); + + kprintf("\tPage table entry %u: ", index2); + if (pgt) + kprintf("0x%x\n", pgt->entries[index2]); + else + kputs("invalid page table\n"); + + spinlock_unlock(&task->pgd_lock); + + return 0; +} + +static void pagefault_handler(struct state *s) +{ + task_t* task = per_core(current_task); + page_dir_t* pgd = task->pgd; + page_table_t* pgt = NULL; + size_t viraddr = read_cr2(); + size_t phyaddr; +#ifdef CONFIG_ROCKCREEK + uint32_t index1, index2; +#endif + + if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) { + viraddr = viraddr & PAGE_MASK; + + phyaddr = get_page(); + if (BUILTIN_EXPECT(!phyaddr, 0)) + goto default_handler; + + if (map_region(viraddr, phyaddr, 1, MAP_USER_SPACE) == viraddr) { + memset((void*) viraddr, 0x00, PAGE_SIZE); + return; + } + + kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr); + put_page(phyaddr); + } + +#ifdef CONFIG_ROCKCREEK + // does our SVM system need to handle this page fault? + index1 = viraddr >> 22; + index2 = (viraddr >> 12) & 0x3FF; + if (!pgd || !(pgd->entries[index1] & PAGE_MASK)) + goto default_handler; + pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); + if (!pgt || !(pgt->entries[index2])) + goto default_handler; + if (pgt->entries[index2] & PG_SVM_INIT) { + if (BUILTIN_EXPECT(!svm_alloc_page(viraddr, pgt), 1)) + return; + else + goto default_handler; + } + if (pgt->entries[index2] & PG_SVM_STRONG) + if (BUILTIN_EXPECT(!svm_access_request(viraddr), 1)) + return; +#endif + +default_handler: + kprintf("PAGE FAULT: Task %u got page fault at %p (irq %d, cs:eip 0x%x:0x%x)\n", task->id, viraddr, s->int_no, s->cs, s->eip); + kprintf("Register state: eax = 0x%x, ebx = 0x%x, ecx = 0x%x, edx = 0x%x, edi = 0x%x, esi = 0x%x, ebp = 0x%x, esp = 0x%x\n", + s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp); + + irq_enable(); + abort(); +} +#endif + +int arch_paging_init(void) +{ +#if 0 + uint32_t i, npages, index1, index2; + page_table_t* pgt; + size_t viraddr; + + // uninstall default handler and install our own + irq_uninstall_handler(14); + irq_install_handler(14, pagefault_handler); + + // Create a page table to reference to the other page tables + pgt = (page_table_t*) get_page(); + if (!pgt) { + kputs("arch_paging_init: Not enough memory!\n"); + return -ENOMEM; + } + memset(pgt, 0, PAGE_SIZE); + + // map this table at the end of the kernel space + viraddr = KERNEL_SPACE - PAGE_SIZE; + index1 = viraddr >> 22; + index2 = (viraddr >> 12) & 0x3FF; + + // now, we create a self reference + per_core(current_task)->pgd->entries[index1] = (((size_t) pgt) & PAGE_MASK)|KERN_TABLE; + pgt->entries[index2] = ((size_t) pgt & PAGE_MASK)|KERN_PAGE; + + // create the other PGTs for the kernel space + for(i=0; ipgd->entries[i] = (phyaddr & PAGE_MASK)|KERN_TABLE; + pgt->entries[i] = (phyaddr & PAGE_MASK)|KERN_PAGE; + } + + /* + * Set the page table and page directory entries for the kernel. We map the kernel's physical address + * to the same virtual address. + */ + npages = ((size_t) &kernel_end - (size_t) &kernel_start) >> PAGE_SHIFT; + if ((size_t)&kernel_end & (PAGE_SIZE-1)) + npages++; + map_region((size_t)&kernel_start, (size_t)&kernel_start, npages, MAP_KERNEL_SPACE); + +#if MAX_CORES > 1 + // Reserve page for smp boot code + if (!map_region(SMP_SETUP_ADDR, SMP_SETUP_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE)) { + kputs("could not reserve page for smp boot code\n"); + return -ENOMEM; + } +#endif + +#ifdef CONFIG_VGA + // map the video memory into the kernel space + map_region(VIDEO_MEM_ADDR, VIDEO_MEM_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE); +#endif + +#ifdef CONFIG_MULTIBOOT + /* + * of course, mb_info has to map into the kernel space + */ + if (mb_info) + map_region((size_t) mb_info & PAGE_MASK, (size_t) mb_info & PAGE_MASK, 1, MAP_KERNEL_SPACE); + +#if 0 + /* + * Map reserved memory regions into the kernel space + */ + if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) { + multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr; + multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length); + + while (mmap < mmap_end) { + if (mmap->type != MULTIBOOT_MEMORY_AVAILABLE) { + npages = mmap->len / PAGE_SIZE; + if ((mmap->addr+mmap->len) % PAGE_SIZE) + npages++; + map_region(mmap->addr, mmap->addr, npages, MAP_KERNEL_SPACE|MAP_NO_CACHE); + } + mmap++; + } + } +#endif + + /* + * Modules like the init ram disk are already loaded. + * Therefore, we map these moduels into the kernel space. + */ + if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) { + multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr); + + npages = mb_info->mods_count * sizeof(multiboot_module_t) >> PAGE_SHIFT; + if (mb_info->mods_count * sizeof(multiboot_module_t) & (PAGE_SIZE-1)) + npages++; + map_region((size_t) (mb_info->mods_addr), (size_t) (mb_info->mods_addr), npages, MAP_KERNEL_SPACE); + + for(i=0; imods_count; i++, mmodule++) { + // map physical address to the same virtual address + npages = (mmodule->mod_end - mmodule->mod_start) >> PAGE_SHIFT; + if (mmodule->mod_end & (PAGE_SIZE-1)) + npages++; + map_region((size_t) (mmodule->mod_start), (size_t) (mmodule->mod_start), npages, MAP_KERNEL_SPACE); + } + } +#endif + +#ifdef CONFIG_ROCKCREEK + // map SCC's bootinfo + viraddr = map_region(SCC_BOOTINFO, SCC_BOOTINFO, 1, MAP_KERNEL_SPACE); + kprintf("Map SCC's bootinfos at 0x%x\n", viraddr); + + // map SCC's configuration registers + viraddr = map_region(CRB_X0_Y0, CRB_X0_Y0, (CRB_OWN-CRB_X0_Y0+16*1024*1024) >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE); + kprintf("Map configuration registers at 0x%x\n", viraddr); + + // map SCC's message passing buffers + viraddr = map_region(MPB_X0_Y0, MPB_X0_Y0, (MPB_OWN-MPB_X0_Y0+16*1024*1024) >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_MPE); + kprintf("Map message passing buffers at 0x%x\n", viraddr); + + // map the FPGA registers + viraddr = map_region(FPGA_BASE, FPGA_BASE, 0x10000 >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE); + kprintf("Map FPGA regsiters at 0x%x\n", viraddr); +#endif + + /* enable paging */ + write_cr3((size_t) &boot_pgd); + i = read_cr0(); + i = i | (1 << 31); + write_cr0(i); + paging_enabled = 1; + +#ifdef CONFIG_ROCKCREEK + // map the initial ramdisk + npages = bootinfo->size >> PAGE_SHIFT; + if (bootinfo->size & (PAGE_SIZE-1)) + npages++; + viraddr = map_region(0, bootinfo->addr, npages, MAP_KERNEL_SPACE); + kprintf("Map initrd from 0x%x to 0x%x (size %u bytes)\n", bootinfo->addr, viraddr, bootinfo->size); + bootinfo->addr = viraddr; +#endif + + /* + * we turned on paging + * => now, we are able to register our task for Task State Switching + */ + register_task(per_core(current_task)); + + // APIC registers into the kernel address space + map_apic(); +#endif + + return 0; +} diff --git a/kernel/main.c b/kernel/main.c index 62753e41..f1017ef1 100644 --- a/kernel/main.c +++ b/kernel/main.c @@ -62,12 +62,17 @@ int main(void) { tid_t id; - lowlevel_init(); + +vga_init(); +vga_puts("aaa"); + //lowlevel_init(); pushbg(COL_BLUE); kprintf("This is MetalSVM %s Build %u, %u\n", METALSVM_VERSION, &__BUILD_DATE, &__BUILD_TIME); popbg(); + +while(1); system_init(); irq_init(); timer_init();