add alpha version of x64 support

New features:
- support of kernel tasks in 64bit mode
- support of LwIP in 64bit mode

Missing features in 64bit mode
- user-level support
- APIC support => SMP support

To create a 64bit version of the MetalSVM kernel, the compiler flags “-m64 -mno-red-zone” and the assembler flags “-felf64” has to be used. Please use qemu-system-x86_64 as test platform.

Notice, metalsvm.elf is a 32bit ELF file. However, it contains (beside the startup code) only 64bit code. This is required because GRUB doesn’t boot 64bit ELF kernels. Therefore, for disassembling via objdump the flag  “-M x86-64” has to be used.
This commit is contained in:
Stefan Lankes 2012-06-10 08:05:24 +02:00
parent ed2186ee03
commit 227cc19890
23 changed files with 517 additions and 660 deletions

View file

@ -1,5 +1,6 @@
TOPDIR = $(shell pwd) TOPDIR = $(shell pwd)
ARCH = x86 ARCH = x86
# For 64bit support, you have define BIT as 64
BIT=32 BIT=32
NAME = metalsvm NAME = metalsvm
LWIPDIRS = lwip/src/arch lwip/src/api lwip/src/core lwip/src/core/ipv4 lwip/src/netif LWIPDIRS = lwip/src/arch lwip/src/api lwip/src/core lwip/src/core/ipv4 lwip/src/netif
@ -32,23 +33,29 @@ READELF_FOR_TARGET = $(CROSSCOMPREFIX)readelf
MAKE = make MAKE = make
RM = rm -rf RM = rm -rf
NASM = nasm NASM = nasm
# For 64bit code, you have to use qemu-system-x86_64
QEMU = qemu-system-i386 QEMU = qemu-system-i386
EMU = qemu EMU = qemu
GDB = gdb GDB = gdb
# For 64bit support, you have to define -felf64 instead of -felf32
NASMFLAGS = -felf32 -g -i$(TOPDIR)/include/metalsvm/ NASMFLAGS = -felf32 -g -i$(TOPDIR)/include/metalsvm/
INCLUDE = -I$(TOPDIR)/include -I$(TOPDIR)/arch/$(ARCH)/include -I$(TOPDIR)/lwip/src/include -I$(TOPDIR)/lwip/src/include/ipv4 -I$(TOPDIR)/drivers INCLUDE = -I$(TOPDIR)/include -I$(TOPDIR)/arch/$(ARCH)/include -I$(TOPDIR)/lwip/src/include -I$(TOPDIR)/lwip/src/include/ipv4 -I$(TOPDIR)/drivers
# For 64bit support, you have to define "-m64 -mno-red-zone" instead of "-m32 -march=i586"
# Compiler options for final code # Compiler options for final code
CFLAGS = -g -m32 -march=i586 -Wall -O2 -fno-builtin -fstrength-reduce -fomit-frame-pointer -finline-functions -nostdinc $(INCLUDE) $(STACKPROT) CFLAGS = -g -m32 -march=i586 -Wall -O2 -fstrength-reduce -fomit-frame-pointer -finline-functions -ffreestanding $(INCLUDE) $(STACKPROT)
# Compiler options for debuging # Compiler options for debuging
#CFLAGS = -g -O -m32 -march=i586 -Wall -fno-builtin -DWITH_FRAME_POINTER -nostdinc $(INCLUDE) $(STACKPROT) #CFLAGS = -g -O -m32 -march=i586 -Wall -DWITH_FRAME_POINTER -ffreestanding $(INCLUDE) $(STACKPROT)
ARFLAGS = rsv ARFLAGS = rsv
LDFLAGS = -T link$(BIT).ld -z max-page-size=4096 --defsym __BUILD_DATE=$(shell date +'%Y%m%d') --defsym __BUILD_TIME=$(shell date +'%H%M%S') LDFLAGS = -T link$(BIT).ld -z max-page-size=4096 --defsym __BUILD_DATE=$(shell date +'%Y%m%d') --defsym __BUILD_TIME=$(shell date +'%H%M%S')
STRIP_DEBUG = --strip-debug STRIP_DEBUG = --strip-debug
KEEP_DEBUG = --only-keep-debug KEEP_DEBUG = --only-keep-debug
OUTPUT_FORMAT = -O elf32-i386 OUTPUT_FORMAT = -O elf32-i386
# For 64bit support, you have to define -m64 instead of "-m32 -march=i586"
CFLAGS_FOR_NEWLIB = -m32 -march=i586 -O2 $(STACKPROT) CFLAGS_FOR_NEWLIB = -m32 -march=i586 -O2 $(STACKPROT)
# For 64bit support, you have to define -m64 instead of "-m32 -march=i586"
LDFLAGS_FOR_NEWLIB = -m32 -march=i586 LDFLAGS_FOR_NEWLIB = -m32 -march=i586
# For 64bit support, you have to define -felf64 instead of -felf32
NASMFLAGS_FOR_NEWLIB = -felf32 NASMFLAGS_FOR_NEWLIB = -felf32
# Prettify output # Prettify output

View file

@ -36,35 +36,37 @@ extern "C" {
#endif #endif
/// This segment is a data segment /// This segment is a data segment
#define GDT_FLAG_DATASEG 0x02 #define GDT_FLAG_DATASEG 0x02
/// This segment is a code segment /// This segment is a code segment
#define GDT_FLAG_CODESEG 0x0a #define GDT_FLAG_CODESEG 0x0a
#define GDT_FLAG_TSS 0x09 #define GDT_FLAG_TSS 0x09
#define GDT_FLAG_TSS_BUSY 0x02 #define GDT_FLAG_TSS_BUSY 0x02
#define GDT_FLAG_SEGMENT 0x10 #define GDT_FLAG_SEGMENT 0x10
/// Privilege level: Ring 0 /// Privilege level: Ring 0
#define GDT_FLAG_RING0 0x00 #define GDT_FLAG_RING0 0x00
/// Privilege level: Ring 1 /// Privilege level: Ring 1
#define GDT_FLAG_RING1 0x20 #define GDT_FLAG_RING1 0x20
/// Privilege level: Ring 2 /// Privilege level: Ring 2
#define GDT_FLAG_RING2 0x40 #define GDT_FLAG_RING2 0x40
/// Privilege level: Ring 3 /// Privilege level: Ring 3
#define GDT_FLAG_RING3 0x60 #define GDT_FLAG_RING3 0x60
/// Segment is present /// Segment is present
#define GDT_FLAG_PRESENT 0x80 #define GDT_FLAG_PRESENT 0x80
/** /**
* @brief Granularity of segment limit * @brief Granularity of segment limit
* - set: segment limit unit is 4 KB (page size) * - set: segment limit unit is 4 KB (page size)
* - not set: unit is bytes * - not set: unit is bytes
*/ */
#define GDT_FLAG_4K_GRAN 0x80 #define GDT_FLAG_4K_GRAN 0x80
/** /**
* @brief Default operand size * @brief Default operand size
* - set: 32 bit * - set: 32 bit
* - not set: 16 bit * - not set: 16 bit
*/ */
#define GDT_FLAG_32_BIT 0x40 #define GDT_FLAG_16_BIT 0x00
#define GDT_FLAG_32_BIT 0x40
#define GDT_FLAG_64_BIT 0x20
/** @brief Defines a GDT entry /** @brief Defines a GDT entry
* *
@ -74,17 +76,17 @@ extern "C" {
*/ */
typedef struct { typedef struct {
/// Lower 16 bits of limit range /// Lower 16 bits of limit range
unsigned short limit_low; uint16_t limit_low;
/// Lower 16 bits of base address /// Lower 16 bits of base address
unsigned short base_low; uint16_t base_low;
/// middle 8 bits of base address /// middle 8 bits of base address
unsigned char base_middle; uint8_t base_middle;
/// Access bits /// Access bits
unsigned char access; uint8_t access;
/// Granularity bits /// Granularity bits
unsigned char granularity; uint8_t granularity;
/// Higher 8 bits of base address /// Higher 8 bits of base address
unsigned char base_high; uint8_t base_high;
} __attribute__ ((packed)) gdt_entry_t; } __attribute__ ((packed)) gdt_entry_t;
/** @brief defines the GDT pointer structure /** @brief defines the GDT pointer structure
@ -93,9 +95,9 @@ typedef struct {
*/ */
typedef struct { typedef struct {
/// Size of the table in bytes (not the number of entries!) /// Size of the table in bytes (not the number of entries!)
unsigned short limit; uint16_t limit;
/// Address of the table /// Address of the table
unsigned int base; size_t base;
} __attribute__ ((packed)) gdt_ptr_t; } __attribute__ ((packed)) gdt_ptr_t;
/// Defines the maximum number of GDT entries /// Defines the maximum number of GDT entries
@ -115,16 +117,6 @@ typedef struct {
*/ */
void gdt_install(void); void gdt_install(void);
/** @brief Configures and returns a GDT descriptor with chosen attributes
*
* Just feed this function with address, limit and the flags
* you have seen in idt.h
*
* @return a preconfigured gdt descriptor
*/
gdt_entry_t configure_gdt_entry(unsigned long base, unsigned long limit,
unsigned char access, unsigned char gran);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View file

@ -71,15 +71,21 @@ extern "C" {
*/ */
typedef struct { typedef struct {
/// Handler function's lower 16 address bits /// Handler function's lower 16 address bits
unsigned short base_lo; uint16_t base_lo;
/// Handler function's segment selector. /// Handler function's segment selector.
unsigned short sel; uint16_t sel;
/// These bits are reserved by Intel /// These bits are reserved by Intel
unsigned char always0; uint8_t always0;
/// These 8 bits contain flags. Exact use depends on the type of interrupt gate. /// These 8 bits contain flags. Exact use depends on the type of interrupt gate.
unsigned char flags; uint8_t flags;
/// Higher 16 bits of handler function's base address /// Higher 16 bits of handler function's base address
unsigned short base_hi; uint16_t base_hi;
#ifdef CONFIG_X86_64
/// In 64 bit mode, the "highest" 32 bits of the handler function's base address
uint32_t base_hi64;
/// resvered entries
uint32_t reserved;
#endif
} __attribute__ ((packed)) idt_entry_t; } __attribute__ ((packed)) idt_entry_t;
/** @brief Defines the idt pointer structure. /** @brief Defines the idt pointer structure.
@ -89,9 +95,9 @@ typedef struct {
*/ */
typedef struct { typedef struct {
/// Size of the IDT in bytes (not the number of entries!) /// Size of the IDT in bytes (not the number of entries!)
unsigned short limit; uint16_t limit;
/// Base address of the IDT /// Base address of the IDT
unsigned int base; size_t base;
} __attribute__ ((packed)) idt_ptr_t; } __attribute__ ((packed)) idt_ptr_t;
/** @brief Installs IDT /** @brief Installs IDT
@ -113,16 +119,6 @@ void idt_install(void);
void idt_set_gate(unsigned char num, size_t base, unsigned short sel, void idt_set_gate(unsigned char num, size_t base, unsigned short sel,
unsigned char flags); unsigned char flags);
/** @brief Configures and returns a IDT entry with chosen attributes
*
* Just feed this function with base, selector and the flags
* you have seen in idt.h
*
* @return a preconfigured idt descriptor
*/
idt_entry_t configure_idt_entry(size_t base, unsigned short sel,
unsigned char flags);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View file

@ -93,7 +93,7 @@
/** @brief Page table structure /** @brief Page table structure
* *
* This structure keeps page table entries.\n * This structure keeps page table entries.\n
* A page table consists of 1024 entries. * On a 32bit system, a page table consists normally of 1024 entries.
*/ */
typedef struct page_table typedef struct page_table
{ {
@ -104,7 +104,7 @@ typedef struct page_table
/** @brief Page directory structure /** @brief Page directory structure
* *
* This structure keeps page directory entries.\ * This structure keeps page directory entries.\
* A page directory consists of 1024 entries. * On a 32bit system, a page directory consists normally of 1024 entries.
*/ */
typedef struct page_dir typedef struct page_dir
{ {

View file

@ -335,7 +335,9 @@ uint32_t read_eip(void);
inline static int system_init(void) inline static int system_init(void)
{ {
gdt_install(); gdt_install();
#ifdef CONFIG_X86_32
apic_init(); apic_init();
#endif
#ifdef CONFIG_PCI #ifdef CONFIG_PCI
pci_init(); pci_init();
#endif #endif

View file

@ -85,33 +85,79 @@ typedef unsigned int wint_t;
* All the interrupt handler routines use this type for their only parameter. * All the interrupt handler routines use this type for their only parameter.
*/ */
struct state { struct state {
#ifdef CONFIG_X86_32
/// EDI register /// EDI register
unsigned int edi; uint32_t edi;
/// ESI register /// ESI register
unsigned int esi; uint32_t esi;
/// EBP register /// EBP register
unsigned int ebp; uint32_t ebp;
/// ESP register /// ESP register
unsigned int esp; uint32_t esp;
/// EBX register /// EBX register
unsigned int ebx; uint32_t ebx;
/// EDX register /// EDX register
unsigned int edx; uint32_t edx;
/// ECX register /// ECX register
unsigned int ecx; uint32_t ecx;
/// EAX register /// EAX register
unsigned int eax; /* pushed by 'pusha' */ uint32_t eax; /* pushed by 'pusha' */
/// Interrupt number /// Interrupt number
unsigned int int_no; uint32_t int_no;
// pushed by the processor automatically // pushed by the processor automatically
unsigned int error; uint32_t error;
unsigned int eip; uint32_t eip;
unsigned int cs; uint32_t cs;
unsigned int eflags; uint32_t eflags;
unsigned int useresp; uint32_t useresp;
unsigned int ss; uint32_t ss;
#elif defined(CONFIG_X86_64)
/// R15 register
uint64_t r15;
/// R14 register
uint64_t r14;
/// R13 register
uint64_t r13;
/// R12 register
uint64_t r12;
/// R11 register
uint64_t r11;
/// R10 register
uint64_t r10;
/// R9 register
uint64_t r9;
/// R8 register
uint64_t r8;
/// RDI register
uint64_t rdi;
/// RSI register
uint64_t rsi;
/// RBP register
uint64_t rbp;
/// (pseudo) RSP register
uint64_t rsp;
/// RBX register
uint64_t rbx;
/// RDX register
uint64_t rdx;
/// RCX register
uint64_t rcx;
/// RAX register
uint64_t rax;
/// Interrupt number
uint64_t int_no;
// pushed by the processor automatically
uint64_t error;
uint64_t rip;
uint64_t cs;
uint64_t rflags;
uint64_t userrsp;
uint64_t ss;
#endif
}; };
uint32_t apic_cpu_id(void); uint32_t apic_cpu_id(void);

View file

@ -96,11 +96,12 @@ inline static void *memcpy(void *dest, const void *src, size_t count)
*/ */
inline static void *memcpy(void* dest, const void *src, size_t count) inline static void *memcpy(void* dest, const void *src, size_t count)
{ {
int32_t i, j, k; size_t i, j, k;
if (BUILTIN_EXPECT(!dest || !src, 0)) if (BUILTIN_EXPECT(!dest || !src, 0))
return dest; return dest;
#ifdef CONFIG_X86_32
asm volatile ( asm volatile (
"cld; rep movsl\n\t" "cld; rep movsl\n\t"
"movl %4, %%ecx\n\t" "movl %4, %%ecx\n\t"
@ -108,6 +109,15 @@ inline static void *memcpy(void* dest, const void *src, size_t count)
"rep movsb\n\t" "rep movsb\n\t"
: "=&c"(i), "=&D"(j), "=&S"(k) : "=&c"(i), "=&D"(j), "=&S"(k)
: "0"(count/4), "g"(count), "1"(dest), "2"(src) : "memory","cc"); : "0"(count/4), "g"(count), "1"(dest), "2"(src) : "memory","cc");
#elif defined(CONFIG_X86_64)
asm volatile (
"cld; rep movsq\n\t"
"movq %4, %%rcx\n\t"
"andq $7, %%rcx\n\t"
"rep movsb\n\t"
: "=&c"(i), "=&D"(j), "=&S"(k)
: "0"(count/8), "g"(count), "1"(dest), "2"(src) : "memory","cc");
#endif
return dest; return dest;
} }
@ -128,7 +138,7 @@ inline static void *memcpy(void* dest, const void *src, size_t count)
*/ */
inline static void *memset(void* dest, int val, size_t count) inline static void *memset(void* dest, int val, size_t count)
{ {
int32_t i, j; size_t i, j;
if (BUILTIN_EXPECT(!dest, 0)) if (BUILTIN_EXPECT(!dest, 0))
return dest; return dest;
@ -155,7 +165,7 @@ inline static void *memset(void* dest, int val, size_t count)
*/ */
inline static void *memset(void* dest, int val, size_t count) inline static void *memset(void* dest, int val, size_t count)
{ {
int32_t i, j; size_t i, j;
if (BUILTIN_EXPECT(!dest, 0)) if (BUILTIN_EXPECT(!dest, 0))
return dest; return dest;
@ -184,15 +194,22 @@ inline static void *memset(void* dest, int val, size_t count)
inline static size_t strlen(const char* str) inline static size_t strlen(const char* str)
{ {
size_t len = 0; size_t len = 0;
uint32_t i, j; size_t i, j;
if (BUILTIN_EXPECT(!str, 0)) if (BUILTIN_EXPECT(!str, 0))
return len; return len;
#ifdef CONFIG_X86_32
asm volatile("not %%ecx; cld; repne scasb; not %%ecx; dec %%ecx" asm volatile("not %%ecx; cld; repne scasb; not %%ecx; dec %%ecx"
: "=&c"(len), "=&D"(i), "=&a"(j) : "=&c"(len), "=&D"(i), "=&a"(j)
: "2"(0), "1"(str), "0"(len) : "2"(0), "1"(str), "0"(len)
: "memory","cc"); : "memory","cc");
#elif defined(CONFIG_X86_64)
asm volatile("not %%rcx; cld; repne scasb; not %%rcx; dec %%rcx"
: "=&c"(len), "=&D"(i), "=&a"(j)
: "2"(0), "1"(str), "0"(len)
: "memory","cc");
#endif
return len; return len;
} }

View file

@ -95,11 +95,11 @@ static inline int jump_to_user_code(uint32_t ep, uint32_t stack)
asm volatile ("mov %0, %%ds; mov %0, %%fs; mov %0, %%gs; mov %0, %%es" :: "r"(0x23)); asm volatile ("mov %0, %%ds; mov %0, %%fs; mov %0, %%gs; mov %0, %%es" :: "r"(0x23));
asm volatile ("push $0x23; push %0; push $0x1B; push %1" :: "r"(stack), "r"(ep)); asm volatile ("push $0x23; push %0; push $0x1B; push %1" :: "r"(stack), "r"(ep));
asm volatile ("lret" ::: "cc"); asm volatile ("lret" ::: "cc");
#else
#warning Currently, not supported!
#endif
return 0; return 0;
#else
return -22;
#endif
} }
/** @brief determines the stack of a specific task /** @brief determines the stack of a specific task

View file

@ -35,6 +35,7 @@ extern "C" {
/** @brief The tast state segment structure /** @brief The tast state segment structure
*/ */
typedef struct { typedef struct {
#ifdef CONFIG_X86_32
uint16_t backlink, __blh; uint16_t backlink, __blh;
uint32_t esp0; uint32_t esp0;
uint16_t ss0, __ss0h; uint16_t ss0, __ss0h;
@ -55,6 +56,24 @@ typedef struct {
uint16_t gs, __gsh; uint16_t gs, __gsh;
uint16_t ldt, __ldth; uint16_t ldt, __ldth;
uint16_t trace, bitmap; uint16_t trace, bitmap;
#endif
#ifdef CONFIG_X86_64
uint16_t res0, res1; // reserved entries
uint64_t rsp0;
uint64_t rsp1;
uint64_t rsp2;
uint32_t res2, res3; // reserved entries
uint64_t ist_rsp1;
uint64_t ist_rsp2;
uint64_t ist_rsp3;
uint64_t ist_rsp4;
uint64_t ist_rsp5;
uint64_t ist_rsp6;
uint64_t ist_rsp7;
uint32_t res4; // reserved entries
uint32_t res5; // reserved entries
uint16_t res6, bitmap;
#endif
} __attribute__ ((packed)) tss_t; } __attribute__ ((packed)) tss_t;
#ifdef __cplusplus #ifdef __cplusplus

View file

@ -66,11 +66,13 @@ ALIGN 4
stublet: stublet:
; initialize stack pointer. ; initialize stack pointer.
mov esp, [default_stack_pointer] mov esp, [default_stack_pointer]
; save pointer to the multiboot structure
push ebx
; initialize cpu features ; initialize cpu features
call cpu_init call cpu_init
; interpret multiboot information ; interpret multiboot information
extern multiboot_init extern multiboot_init
push ebx ; pointer to the multiboot structure is already pushed
call multiboot_init call multiboot_init
add esp, 4 add esp, 4
@ -769,8 +771,8 @@ switch_context:
pushf ; EFLAGS pushf ; EFLAGS
push DWORD 0x8 ; CS push DWORD 0x8 ; CS
push DWORD rollback ; EIP push DWORD rollback ; EIP
push DWORD 0 ; Interrupt number push DWORD 0x0 ; Interrupt number
push DWORD 0xc0edbabe ; Error code push DWORD 0x00edbabe ; Error code
pusha ; Registers... pusha ; Registers...
jmp common_switch jmp common_switch

View file

@ -91,17 +91,17 @@ SECTION .data
; create default page tables for the 64bit kernel ; create default page tables for the 64bit kernel
global boot_pgd ; aka PML4 global boot_pgd ; aka PML4
ALIGN 4096 ; of course, the page tables have to be page aligned ALIGN 4096 ; of course, the page tables have to be page aligned
NOPTS equ 2 NOPTS equ 512
boot_pgd times 512 DQ 0 boot_pgd times 512 DQ 0
pdpt times 512 DQ 0 boot_pdpt times 512 DQ 0
pd times 512 DQ 0 boot_pd times 512 DQ 0
pt times (NOPTS*512) DQ 0 boot_pt times (NOPTS*512) DQ 0
SECTION .text SECTION .text
ALIGN 4 ALIGN 4
stublet: stublet:
mov esp, startup_stack-4 mov esp, startup_stack-4
push ebx ; save pointer to multiboot structure push ebx ; save pointer to the multiboot structure
mov eax, cr0 mov eax, cr0
; enable caching, disable paging and fpu emulation ; enable caching, disable paging and fpu emulation
and eax, 0x1ffffffb and eax, 0x1ffffffb
@ -144,13 +144,13 @@ stublet:
mov cr3, edi mov cr3, edi
; So lets make PML4T[0] point to the PDPT and so on: ; So lets make PML4T[0] point to the PDPT and so on:
mov DWORD [edi], pdpt ; Set the double word at the destination index to pdpt. mov DWORD [edi], boot_pdpt ; Set the double word at the destination index to pdpt.
or DWORD [edi], 0x00000003 ; Set present and writeable bit or DWORD [edi], 0x00000003 ; Set present and writeable bit
mov edi, pdpt mov edi, boot_pdpt
mov DWORD [edi], pd ; Set the double word at the destination index to pd. mov DWORD [edi], boot_pd ; Set the double word at the destination index to pd.
or DWORD [edi], 0x00000003 ; Set present and writeable bit or DWORD [edi], 0x00000003 ; Set present and writeable bit
mov edi, pd mov edi, boot_pd
mov ebx, pt mov ebx, boot_pt
mov ecx, NOPTS mov ecx, NOPTS
L0: L0:
mov DWORD [edi], ebx ; Set the double word at the destination index to pt. mov DWORD [edi], ebx ; Set the double word at the destination index to pt.
@ -162,16 +162,26 @@ L0:
; map the VGA address into the virtual address space ; map the VGA address into the virtual address space
mov edi, 0xB8000 mov edi, 0xB8000
shr edi, 9 ; (edi >> 12) * 8 shr edi, 9 ; (edi >> 12) * 8
add edi, pt add edi, boot_pt
mov ebx, 0xB8000 mov ebx, 0xB8000
or ebx, 0x00000003 or ebx, 0x00000003
mov DWORD [edi], ebx mov DWORD [edi], ebx
; map multiboot structure into the virtual address space
mov edi, [esp]
and edi, 0xFFFFF000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, [esp]
and ebx, 0xFFFFF000
or ebx, 0x00000003
mov DWORD [edi], ebx
extern kernel_start ; defined in linker script extern kernel_start ; defined in linker script
extern kernel_end extern kernel_end
mov edi, kernel_start mov edi, kernel_start
shr edi, 9 ; (kernel_start >> 12) * 8 shr edi, 9 ; (kernel_start >> 12) * 8
add edi, pt add edi, boot_pt
mov ebx, kernel_start mov ebx, kernel_start
or ebx, 0x00000003 or ebx, 0x00000003
mov ecx, kernel_end ; determine kernel size in number of pages mov ecx, kernel_end ; determine kernel size in number of pages
@ -246,22 +256,12 @@ cpu_init:
; mov cr4, eax ; mov cr4, eax
; ret ; ret
; This will set up our new segment registers. We need to do ; This will set up our new segment registers and is declared in
; something special in order to set CS. We do what is called a ; C as 'extern void gdt_flush();'
; far jump. A jump that includes a segment as well as an offset.
; This is declared in C as 'extern void gdt_flush();'
global gdt_flush global gdt_flush
extern gp extern gp
gdt_flush: gdt_flush:
; lgdt [gp] lgdt [gp]
; mov ax, 0x10
; mov ds, ax
; mov es, ax
; mov fs, ax
; mov gs, ax
; mov ss, ax
; jmp 0x08:flush2
flush2:
ret ret
; determines the current instruction pointer (after the jmp) ; determines the current instruction pointer (after the jmp)
@ -603,7 +603,7 @@ isrsyscall:
push rdi push rdi
push rsi push rsi
push rbp push rbp
push Qword 0 push rsp
push rbx push rbx
push rdx push rdx
push rcx push rcx
@ -616,7 +616,7 @@ isrsyscall:
pop rcx pop rcx
pop rdx pop rdx
pop rbx pop rbx
add rsp, 1*8 add rsp, 8
pop rbp pop rbp
pop rsi pop rsi
pop rdi pop rdi
@ -627,7 +627,7 @@ isrsyscall:
pop r12 pop r12
pop r13 pop r13
pop r14 pop r14
iret iretq
global irq0 global irq0
global irq1 global irq1
@ -926,17 +926,20 @@ ALIGN 8
switch_context: switch_context:
; create on the stack a pseudo interrupt ; create on the stack a pseudo interrupt
; afterwards, we switch to the task with iret ; afterwards, we switch to the task with iret
mov rax, [rdi] ; rdi contains the address to store the old rsp mov rax, rdi ; rdi contains the address to store the old rsp
pushf ; EFLAGS push QWORD 0x10 ; SS
push QWORD 0x8 ; CS push rsp ; RSP
add QWORD [rsp], 8*1
pushf ; RFLAGS
push QWORD 0x08 ; CS
push QWORD rollback ; RIP push QWORD rollback ; RIP
push QWORD 0 ; Interrupt number push QWORD 0x00 ; Interrupt number
push QWORD 0x0edbabe ; Error code push QWORD 0x00edbabe ; Error code
push rax push rax
push rcx push rcx
push rdx push rdx
push rbx push rbx
push Qword 0 push rsp
push rbp push rbp
push rsi push rsi
push rdi push rdi
@ -961,7 +964,7 @@ common_stub:
push rcx push rcx
push rdx push rdx
push rbx push rbx
push Qword 0 push rsp
push rbp push rbp
push rsi push rsi
push rdi push rdi
@ -994,7 +997,7 @@ common_switch:
%else %else
xor rax, rax xor rax, rax
%endif %endif
add eax, task_state_segments add rax, task_state_segments
; set rsp0 in TSS ; set rsp0 in TSS
mov [rax+4], rsp mov [rax+4], rsp
@ -1013,13 +1016,13 @@ no_context_switch:
pop rdi pop rdi
pop rsi pop rsi
pop rbp pop rbp
add rsp, 1*8 add rsp, 8
pop rbx pop rbx
pop rdx pop rdx
pop rcx pop rcx
pop rax pop rax
add rsp, 16 add rsp, 16
iret iretq
SECTION .note.GNU-stack noalloc noexec nowrite progbits SECTION .note.GNU-stack noalloc noexec nowrite progbits

View file

@ -61,8 +61,9 @@ size_t get_stack(uint32_t id)
return (size_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t); return (size_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t);
} }
int register_task(task_t* task) { int register_task(task_t* task)
uint16_t sel; {
uint16_t sel;
sel = (CORE_ID+5) << 3; sel = (CORE_ID+5) << 3;
asm volatile ("mov %0, %%ax; ltr %%ax" : : "ir"(sel) : "%eax"); asm volatile ("mov %0, %%ax; ltr %%ax" : : "ir"(sel) : "%eax");
@ -128,8 +129,9 @@ int create_default_frame(task_t* task, entry_point_t ep, void* arg)
{ {
uint16_t cs = 0x08; uint16_t cs = 0x08;
uint32_t id; uint32_t id;
uint32_t *stack; size_t *stack;
struct state *stptr; struct state *stptr;
size_t state_size;
if (BUILTIN_EXPECT(!task, 0)) if (BUILTIN_EXPECT(!task, 0))
return -EINVAL; return -EINVAL;
@ -137,17 +139,18 @@ int create_default_frame(task_t* task, entry_point_t ep, void* arg)
memset(kstacks[id], 0xCD, KERNEL_STACK_SIZE); memset(kstacks[id], 0xCD, KERNEL_STACK_SIZE);
#ifdef CONFIG_X86_32
/* The difference between setting up a task for SW-task-switching /* The difference between setting up a task for SW-task-switching
* and not for HW-task-switching is setting up a stack and not a TSS. * and not for HW-task-switching is setting up a stack and not a TSS.
* This is the stack which will be activated and popped off for iret later. * This is the stack which will be activated and popped off for iret later.
*/ */
stack = (uint32_t*) (kstacks[id] + KERNEL_STACK_SIZE - sizeof(uint32_t)); stack = (size_t*) (kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t));
/* The next three things on the stack are a marker for debugging purposes, ... */ /* The next three things on the stack are a marker for debugging purposes, ... */
*stack-- = 0xDEADBEEF; *stack-- = 0xDEADBEEF;
#ifdef CONFIG_X86_32
/* the first-function-to-be-called's arguments, ... */ /* the first-function-to-be-called's arguments, ... */
*stack-- = (size_t) arg; *stack-- = (size_t) arg;
#endif
/* and the "caller" we shall return to. /* and the "caller" we shall return to.
* This procedure cleans the task after exit. */ * This procedure cleans the task after exit. */
*stack = (size_t) leave_kernel_task; *stack = (size_t) leave_kernel_task;
@ -156,58 +159,71 @@ int create_default_frame(task_t* task, entry_point_t ep, void* arg)
* The stack must look like the stack of a task which was * The stack must look like the stack of a task which was
* scheduled away previously. */ * scheduled away previously. */
stack = (uint32_t*) ((size_t) stack - sizeof(struct state) + 2*sizeof(size_t)); /* In 64bit mode, he stack pointer (SS:RSP) is pushed unconditionally on interrupts.
* In legacy modes, this push is conditional and based on a change in current privilege level (CPL).*/
#ifdef CONFIG_X86_32
state_size = sizeof(struct state) - 2*sizeof(size_t);
#else
state_size = sizeof(struct state);
#endif
stack = (size_t*) ((size_t) stack - state_size);
stptr = (struct state *) stack; stptr = (struct state *) stack;
memset(stptr, 0x00, sizeof(struct state) - 2*sizeof(size_t)); memset(stptr, 0x00, state_size);
stptr->esp = (size_t)stack + sizeof(struct state) - 2*sizeof(size_t); #ifdef CONFIG_X86_32
stptr->esp = (size_t)stack + state_size;
#else
stptr->rsp = (size_t)stack + state_size;
/* the first-function-to-be-called's arguments, ... */
stptr->rdi = (size_t) arg;
#endif
stptr->int_no = 0xB16B00B5; stptr->int_no = 0xB16B00B5;
stptr->error = 0xC03DB4B3; stptr->error = 0xC03DB4B3;
/* The instruction pointer shall be set on the first function to be called /* The instruction pointer shall be set on the first function to be called
* after IRETing */ * after IRETing */
stptr->eip = (uint32_t)ep; #ifdef CONFIG_X86_32
stptr->eip = (size_t)ep;
#else
stptr->rip = (size_t)ep;
#endif
stptr->cs = cs; stptr->cs = cs;
#ifdef CONFIG_X86_32
stptr->eflags = 0x1202; stptr->eflags = 0x1202;
// the creation of a kernel tasks didn't change the IOPL level // the creation of a kernel tasks didn't change the IOPL level
// => useresp & ss is not required // => useresp & ss is not required
#else
stptr->rflags = 0x1202;
stptr->ss = 0x10;
stptr->userrsp = stptr->rsp;
#endif
/* Set the task's stack pointer entry to the stack we have crafted right now. */ /* Set the task's stack pointer entry to the stack we have crafted right now. */
task->stack = (size_t*)stack; task->stack = (size_t*)stack;
#else
#warning Currently, not supported
return -1;
#endif
return 0; return 0;
} }
/* Setup a descriptor in the Global Descriptor Table */ /** @brief Configures GDT descriptor with chosen attributes
*
* Just feed this function with address, limit and the flags
* you have seen in gdt.h
*/
static void gdt_set_gate(int num, unsigned long base, unsigned long limit, static void gdt_set_gate(int num, unsigned long base, unsigned long limit,
unsigned char access, unsigned char gran) unsigned char access, unsigned char gran)
{ {
gdt[num] = configure_gdt_entry(base, limit, access, gran);
}
gdt_entry_t configure_gdt_entry(unsigned long base, unsigned long limit,
unsigned char access, unsigned char gran)
{
gdt_entry_t desc;
/* Setup the descriptor base address */ /* Setup the descriptor base address */
desc.base_low = (base & 0xFFFF); gdt[num].base_low = (base & 0xFFFF);
desc.base_middle = (base >> 16) & 0xFF; gdt[num].base_middle = (base >> 16) & 0xFF;
desc.base_high = (base >> 24) & 0xFF; gdt[num].base_high = (base >> 24) & 0xFF;
/* Setup the descriptor limits */ /* Setup the descriptor limits */
desc.limit_low = (limit & 0xFFFF); gdt[num].limit_low = (limit & 0xFFFF);
desc.granularity = ((limit >> 16) & 0x0F); gdt[num].granularity = ((limit >> 16) & 0x0F);
/* Finally, set up the granularity and access flags */ /* Finally, set up the granularity and access flags */
desc.granularity |= (gran & 0xF0); gdt[num].granularity |= (gran & 0xF0);
desc.access = access; gdt[num].access = access;
return desc;
} }
/* /*
@ -219,13 +235,21 @@ gdt_entry_t configure_gdt_entry(unsigned long base, unsigned long limit,
*/ */
void gdt_install(void) void gdt_install(void)
{ {
unsigned int i; unsigned int i, mode;
memset(task_state_segments, 0x00, MAX_CORES*sizeof(tss_t)); memset(task_state_segments, 0x00, MAX_CORES*sizeof(tss_t));
#ifdef CONFIG_X86_32
mode = GDT_FLAG_32_BIT;
#elif defined(CONFIG_X86_64)
mode = GDT_FLAG_64_BIT;
#else
#error invalid mode
#endif
/* Setup the GDT pointer and limit */ /* Setup the GDT pointer and limit */
gp.limit = (sizeof(gdt_entry_t) * GDT_ENTRIES) - 1; gp.limit = (sizeof(gdt_entry_t) * GDT_ENTRIES) - 1;
gp.base = (unsigned int) &gdt; gp.base = (size_t) &gdt;
/* Our NULL descriptor */ /* Our NULL descriptor */
gdt_set_gate(0, 0, 0, 0, 0); gdt_set_gate(0, 0, 0, 0, 0);
@ -237,7 +261,7 @@ void gdt_install(void)
*/ */
gdt_set_gate(1, 0, 0xFFFFFFFF, gdt_set_gate(1, 0, 0xFFFFFFFF,
GDT_FLAG_RING0 | GDT_FLAG_SEGMENT | GDT_FLAG_CODESEG | GDT_FLAG_PRESENT, GDT_FLAG_RING0 | GDT_FLAG_SEGMENT | GDT_FLAG_CODESEG | GDT_FLAG_PRESENT,
GDT_FLAG_4K_GRAN | GDT_FLAG_32_BIT); GDT_FLAG_4K_GRAN | mode);
/* /*
* The third entry is our Data Segment. It's EXACTLY the * The third entry is our Data Segment. It's EXACTLY the
@ -246,33 +270,37 @@ void gdt_install(void)
*/ */
gdt_set_gate(2, 0, 0xFFFFFFFF, gdt_set_gate(2, 0, 0xFFFFFFFF,
GDT_FLAG_RING0 | GDT_FLAG_SEGMENT | GDT_FLAG_DATASEG | GDT_FLAG_PRESENT, GDT_FLAG_RING0 | GDT_FLAG_SEGMENT | GDT_FLAG_DATASEG | GDT_FLAG_PRESENT,
GDT_FLAG_4K_GRAN | GDT_FLAG_32_BIT); GDT_FLAG_4K_GRAN | mode);
/* /*
* Create code segement for userspace applications (ring 3) * Create code segement for userspace applications (ring 3)
*/ */
gdt_set_gate(3, 0, 0xFFFFFFFF, gdt_set_gate(3, 0, 0xFFFFFFFF,
GDT_FLAG_RING3 | GDT_FLAG_SEGMENT | GDT_FLAG_CODESEG | GDT_FLAG_PRESENT, GDT_FLAG_RING3 | GDT_FLAG_SEGMENT | GDT_FLAG_CODESEG | GDT_FLAG_PRESENT,
GDT_FLAG_4K_GRAN | GDT_FLAG_32_BIT); GDT_FLAG_4K_GRAN | mode);
/* /*
* Create data segement for userspace applications (ring 3) * Create data segement for userspace applications (ring 3)
*/ */
gdt_set_gate(4, 0, 0xFFFFFFFF, gdt_set_gate(4, 0, 0xFFFFFFFF,
GDT_FLAG_RING3 | GDT_FLAG_SEGMENT | GDT_FLAG_DATASEG | GDT_FLAG_PRESENT, GDT_FLAG_RING3 | GDT_FLAG_SEGMENT | GDT_FLAG_DATASEG | GDT_FLAG_PRESENT,
GDT_FLAG_4K_GRAN | GDT_FLAG_32_BIT); GDT_FLAG_4K_GRAN | mode);
/* /*
* Create TSS for each task at ring0 (we use these segments for task switching) * Create TSS for each task at ring0 (we use these segments for task switching)
*/ */
for(i=0; i<MAX_CORES; i++) { for(i=0; i<MAX_CORES; i++) {
#ifdef CONFIG_X86_32
/* set default values */ /* set default values */
task_state_segments[i].eflags = 0x1202; task_state_segments[i].eflags = 0x1202;
task_state_segments[i].ss0 = 0x10; // data segment task_state_segments[i].ss0 = 0x10; // data segment
task_state_segments[i].esp0 = 0xDEADBEEF; // invalid pseudo address task_state_segments[i].esp0 = 0xDEADBEEF; // invalid pseudo address
#elif defined(CONFIG_X86_64)
task_state_segments[i].rsp0 = 0xDEADBEEF; // invalid pseudo address
#endif
gdt_set_gate(5+i, (unsigned long) (task_state_segments+i), sizeof(tss_t)-1, gdt_set_gate(5+i, (unsigned long) (task_state_segments+i), sizeof(tss_t)-1,
GDT_FLAG_PRESENT | GDT_FLAG_TSS | GDT_FLAG_RING0, GDT_FLAG_32_BIT); GDT_FLAG_PRESENT | GDT_FLAG_TSS | GDT_FLAG_RING0, 0);
} }
/* Flush out the old GDT and install the new changes! */ /* Flush out the old GDT and install the new changes! */

View file

@ -42,24 +42,6 @@
static idt_entry_t idt[256] = {[0 ... 255] = {0, 0, 0, 0, 0}}; static idt_entry_t idt[256] = {[0 ... 255] = {0, 0, 0, 0, 0}};
static idt_ptr_t idtp; static idt_ptr_t idtp;
idt_entry_t configure_idt_entry(size_t base, unsigned short sel,
unsigned char flags)
{
idt_entry_t desc;
/* The interrupt routine's base address */
desc.base_lo = (base & 0xFFFF);
desc.base_hi = (base >> 16) & 0xFFFF;
/* The segment or 'selector' that this IDT entry will use
* is set here, along with any access flags */
desc.sel = sel;
desc.always0 = 0;
desc.flags = flags;
return desc;
}
/* /*
* Use this function to set an entry in the IDT. Alot simpler * Use this function to set an entry in the IDT. Alot simpler
* than twiddling with the GDT ;) * than twiddling with the GDT ;)
@ -67,7 +49,19 @@ idt_entry_t configure_idt_entry(size_t base, unsigned short sel,
void idt_set_gate(unsigned char num, size_t base, unsigned short sel, void idt_set_gate(unsigned char num, size_t base, unsigned short sel,
unsigned char flags) unsigned char flags)
{ {
idt[num] = configure_idt_entry(base, sel, flags); /* The interrupt routine's base address */
idt[num].base_lo = (base & 0xFFFF);
idt[num].base_hi = (base >> 16) & 0xFFFF;
#ifdef CONFIG_X86_64
idt[num].base_hi64 = (base >> 32) & 0xFFFFFFFF;
idt[num].reserved = 0x0;
#endif
/* The segment or 'selector' that this IDT entry will use
* is set here, along with any access flags */
idt[num].sel = sel;
idt[num].always0 = 0x0;
idt[num].flags = flags;
} }
extern void isrsyscall(void); extern void isrsyscall(void);
@ -82,7 +76,7 @@ void idt_install(void)
/* Sets the special IDT pointer up, just like in 'gdt.c' */ /* Sets the special IDT pointer up, just like in 'gdt.c' */
idtp.limit = (sizeof(idt_entry_t) * 256) - 1; idtp.limit = (sizeof(idt_entry_t) * 256) - 1;
idtp.base = (unsigned int)&idt; idtp.base = (size_t)&idt;
/* Add any new ISRs to the IDT here using idt_set_gate */ /* Add any new ISRs to the IDT here using idt_set_gate */
idt_set_gate(INT_SYSCALL, (size_t)isrsyscall, KERNEL_CODE_SELECTOR, idt_set_gate(INT_SYSCALL, (size_t)isrsyscall, KERNEL_CODE_SELECTOR,

View file

@ -230,9 +230,14 @@ static void fault_handler(struct state *s)
{ {
if (s->int_no < 32) { if (s->int_no < 32) {
kputs(exception_messages[s->int_no]); kputs(exception_messages[s->int_no]);
#ifdef CONFIG_X86_32
kprintf(" Exception (%d) at 0x%x:0x%x on core %u, error code 0x%x, eflags 0x%x\n", kprintf(" Exception (%d) at 0x%x:0x%x on core %u, error code 0x%x, eflags 0x%x\n",
s->int_no, s->cs, s->eip, CORE_ID, s->error, s->eflags); s->int_no, s->cs, s->eip, CORE_ID, s->error, s->eflags);
#elif defined(CONFIG_X86_64)
kprintf(" Exception (%d) at 0x%llx:0x%llx on core %u, error code 0x%llx, rflags 0x%llx\n",
s->int_no, s->cs, s->rip, CORE_ID, s->error, s->rflags);
#endif
/* Now, we signalize that we have handled the interrupt */ /* Now, we signalize that we have handled the interrupt */
if (apic_is_enabled()) if (apic_is_enabled())
apic_eoi(); apic_eoi();

View file

@ -8,20 +8,11 @@
; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; ;
; TODO [BITS 64]
%if 0
[BITS 32]
SECTION .text SECTION .text
global strcpy global strcpy
strcpy: strcpy:
push ebp push rdi
mov ebp, esp
push edi
push esi
mov esi, [ebp+12]
mov edi, [ebp+8]
L1: L1:
lodsb lodsb
@ -29,25 +20,16 @@ L1:
test al, al test al, al
jne L1 jne L1
mov eax, [ebp+8] pop rax
pop esi
pop edi
pop ebp
ret ret
global strncpy global strncpy
strncpy: strncpy:
push ebp push rdi
mov ebp, esp mov rcx, rdx
push edi
push esi
mov ecx, [ebp+16]
mov esi, [ebp+12]
mov edi, [ebp+8]
L2: L2:
dec ecx dec rcx
js L3 js L3
lodsb lodsb
stosb stosb
@ -57,12 +39,10 @@ L2:
stosb stosb
L3: L3:
mov eax, [ebp+8] pop rax
pop esi
pop edi
pop ebp
ret ret
%if 0
; The following function is derived from JamesM's kernel development tutorials ; The following function is derived from JamesM's kernel development tutorials
; (http://www.jamesmolloy.co.uk/tutorial_html/) ; (http://www.jamesmolloy.co.uk/tutorial_html/)
global copy_page_physical global copy_page_physical

View file

@ -78,7 +78,6 @@ inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_tabl
page_table_t* new_pgt; page_table_t* new_pgt;
size_t phyaddr; size_t phyaddr;
#ifdef CONFIG_X86_32
if (BUILTIN_EXPECT(!pgt, 0)) if (BUILTIN_EXPECT(!pgt, 0))
return 0; return 0;
@ -89,7 +88,7 @@ inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_tabl
if (counter) if (counter)
(*counter)++; (*counter)++;
for(i=0; i<1024; i++) { for(i=0; i<PGT_ENTRIES; i++) {
if (pgt->entries[i] & PAGE_MASK) { if (pgt->entries[i] & PAGE_MASK) {
if (!(pgt->entries[i] & PG_USER)) { if (!(pgt->entries[i] & PG_USER)) {
// Kernel page => copy only page entries // Kernel page => copy only page entries
@ -114,10 +113,6 @@ inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_tabl
phyaddr = virt_to_phys((size_t)new_pgt); phyaddr = virt_to_phys((size_t)new_pgt);
return phyaddr; return phyaddr;
#else
#warning Currently, not supported
return 0;
#endif
} }
int create_pgd(task_t* task, int copy) int create_pgd(task_t* task, int copy)
@ -154,7 +149,7 @@ int create_pgd(task_t* task, int copy)
spinlock_lock(&kslock); spinlock_lock(&kslock);
for(i=0; i<1024; i++) { for(i=0; i<PGT_ENTRIES; i++) {
pgd->entries[i] = boot_pgd.entries[i]; pgd->entries[i] = boot_pgd.entries[i];
// only kernel entries will be copied // only kernel entries will be copied
if (pgd->entries[i] && !(pgd->entries[i] & PG_USER)) if (pgd->entries[i] && !(pgd->entries[i] & PG_USER))
@ -211,7 +206,7 @@ int drop_pgd(void)
spinlock_lock(&task->pgd_lock); spinlock_lock(&task->pgd_lock);
for(i=0; i<1024; i++) { for(i=0; i<PGT_ENTRIES; i++) {
if (pgd->entries[i] & PG_USER) { if (pgd->entries[i] & PG_USER) {
put_page(pgd->entries[i] & PAGE_MASK); put_page(pgd->entries[i] & PAGE_MASK);
pgd->entries[i] = 0; pgd->entries[i] = 0;
@ -310,9 +305,9 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
// set the new page table into the directory // set the new page table into the directory
if (flags & MAP_USER_SPACE) if (flags & MAP_USER_SPACE)
task->pgd->entries[index] = (size_t)pgt|USER_TABLE; task->pgd->entries[index] = (uint32_t)pgt|USER_TABLE;
else else
task->pgd->entries[index] = (size_t)pgt|KERN_TABLE; task->pgd->entries[index] = (uint32_t)pgt|KERN_TABLE;
// if paging is already enabled, we need to use the virtual address // if paging is already enabled, we need to use the virtual address
if (paging_enabled) if (paging_enabled)
@ -392,7 +387,7 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
int change_page_permissions(size_t start, size_t end, uint32_t flags) int change_page_permissions(size_t start, size_t end, uint32_t flags)
{ {
uint32_t index1, index2, newflags; uint32_t index1, index2, newflags;
size_t viraddr = start & PAGE_MASK; size_t viraddr = start & 0xFFFFF000;
size_t phyaddr; size_t phyaddr;
page_table_t* pgt; page_table_t* pgt;
page_dir_t* pgd; page_dir_t* pgd;
@ -708,7 +703,7 @@ int arch_paging_init(void)
// now, we create a self reference // now, we create a self reference
per_core(current_task)->pgd->entries[index1] = (((size_t) pgt) & PAGE_MASK)|KERN_TABLE; per_core(current_task)->pgd->entries[index1] = (((size_t) pgt) & PAGE_MASK)|KERN_TABLE;
pgt->entries[index2] = ((size_t) pgt & PAGE_MASK)|KERN_PAGE; pgt->entries[index2] = ((size_t) pgt & 0xFFFFF000)|KERN_PAGE;
// create the other PGTs for the kernel space // create the other PGTs for the kernel space
for(i=0; i<KERNEL_SPACE/(1024*PAGE_SIZE)-1; i++) { for(i=0; i<KERNEL_SPACE/(1024*PAGE_SIZE)-1; i++) {
@ -783,14 +778,15 @@ int arch_paging_init(void)
npages = mb_info->mods_count * sizeof(multiboot_module_t) >> PAGE_SHIFT; npages = mb_info->mods_count * sizeof(multiboot_module_t) >> PAGE_SHIFT;
if (mb_info->mods_count * sizeof(multiboot_module_t) & (PAGE_SIZE-1)) if (mb_info->mods_count * sizeof(multiboot_module_t) & (PAGE_SIZE-1))
npages++; npages++;
map_region((size_t) (mb_info->mods_addr), (size_t) (mb_info->mods_addr), npages, MAP_KERNEL_SPACE); map_region((size_t) mb_info->mods_addr & PAGE_MASK, (size_t) mb_info->mods_addr & PAGE_MASK, npages, MAP_KERNEL_SPACE);
for(i=0; i<mb_info->mods_count; i++, mmodule++) { for(i=0; i<mb_info->mods_count; i++, mmodule++) {
// map physical address to the same virtual address // map physical address to the same virtual address
npages = (mmodule->mod_end - mmodule->mod_start) >> PAGE_SHIFT; npages = (mmodule->mod_end - mmodule->mod_start) >> PAGE_SHIFT;
if (mmodule->mod_end & (PAGE_SIZE-1)) if (mmodule->mod_end & (PAGE_SIZE-1))
npages++; npages++;
map_region((size_t) (mmodule->mod_start), (size_t) (mmodule->mod_start), npages, MAP_KERNEL_SPACE); kprintf("Map module %s at 0x%x (%u pages)\n", (char*) mmodule->cmdline, mmodule->mod_start, npages);
map_region((size_t) mmodule->mod_start & PAGE_MASK, (size_t) mmodule->mod_start & PAGE_MASK, npages, MAP_KERNEL_SPACE);
} }
} }
#endif #endif
@ -814,7 +810,7 @@ int arch_paging_init(void)
#endif #endif
/* enable paging */ /* enable paging */
write_cr3((size_t) &boot_pgd); write_cr3((uint32_t) &boot_pgd);
i = read_cr0(); i = read_cr0();
i = i | (1 << 31); i = i | (1 << 31);
write_cr0(i); write_cr0(i);

View file

@ -44,9 +44,8 @@
* *
* 0x00000000 - 0x000FFFFF: reserved for IO devices (16MB) * 0x00000000 - 0x000FFFFF: reserved for IO devices (16MB)
* 0x00100000 - 0x0DEADFFF: Kernel (size depends on the configuration) (221MB) * 0x00100000 - 0x0DEADFFF: Kernel (size depends on the configuration) (221MB)
* 0x0DEAE000 - 0x3FFFEFFF: Kernel heap (801MB) * 0x0DEAE000 - 0x3FFFFFFF: Kernel heap
* 0x3FFFF000 - 0x3FFFFFFF: Page Tables are mapped in this region (4KB) *
* (The last 256 entries belongs to kernel space)
*/ */
/* /*
@ -57,147 +56,24 @@ extern const void kernel_start;
extern const void kernel_end; extern const void kernel_end;
// boot task's page directory and page directory lock // boot task's page directory and page directory lock
static page_dir_t boot_pgd = {{[0 ... PGT_ENTRIES-1] = 0}}; extern page_dir_t boot_pgd;
static spinlock_t kslock = SPINLOCK_INIT; static spinlock_t kslock = SPINLOCK_INIT;
//static int paging_enabled = 0; static int paging_enabled = 0;
page_dir_t* get_boot_pgd(void) page_dir_t* get_boot_pgd(void)
{ {
return &boot_pgd; return &boot_pgd;
} }
#if 0
/*
* TODO: We create a full copy of the current task. Copy-On-Access will be the better solution.
*
* No PGD locking is needed because onls create_pgd use this function and holds already the
* PGD lock.
*/
inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_table_t* pgt, int* counter)
{
uint32_t i;
page_table_t* new_pgt;
size_t phyaddr;
#ifdef CONFIG_X86_32
if (BUILTIN_EXPECT(!pgt, 0))
return 0;
new_pgt = kmalloc(sizeof(page_table_t));
if (!new_pgt)
return 0;
memset(new_pgt, 0x00, sizeof(page_table_t));
if (counter)
(*counter)++;
for(i=0; i<1024; i++) {
if (pgt->entries[i] & PAGE_MASK) {
if (!(pgt->entries[i] & PG_USER)) {
// Kernel page => copy only page entries
new_pgt->entries[i] = pgt->entries[i];
continue;
}
phyaddr = get_page();
if (!phyaddr)
continue;
if (counter)
(*counter)++;
copy_page_physical((void*)phyaddr, (void*) (pgt->entries[i] & PAGE_MASK));
new_pgt->entries[i] = phyaddr | (pgt->entries[i] & 0xFFF);
atomic_int32_inc(&task->user_usage);
}
}
phyaddr = virt_to_phys((size_t)new_pgt);
return phyaddr;
#else
#warning Currently, not supported
return 0;
#endif
}
#endif
int create_pgd(task_t* task, int copy) int create_pgd(task_t* task, int copy)
{ {
#if 0 // Currently, we support only kernel tasks
page_dir_t* pgd; // => all tasks are able to use the same pgd
page_table_t* pgt;
page_table_t* pgt_container;
uint32_t i;
uint32_t index1, index2;
size_t viraddr, phyaddr;
int counter = 0;
task_t* curr_task = per_core(current_task);
if (BUILTIN_EXPECT(!paging_enabled, 0)) if (BUILTIN_EXPECT(!paging_enabled, 0))
return -EINVAL; return -EINVAL;
// we already know the virtual address of the "page table container" task->pgd = get_boot_pgd();
// (see file header)
pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK);
// create new page directory for the new task
pgd = kmalloc(sizeof(page_dir_t));
if (!pgd)
return -ENOMEM;
memset(pgd, 0x00, sizeof(page_dir_t));
// create a new "page table container" for the new task
pgt = kmalloc(sizeof(page_table_t));
if (!pgt) {
kfree(pgd, sizeof(page_dir_t));
return -ENOMEM;
}
memset(pgt, 0x00, sizeof(page_table_t));
spinlock_lock(&kslock);
for(i=0; i<1024; i++) {
pgd->entries[i] = boot_pgd.entries[i];
// only kernel entries will be copied
if (pgd->entries[i] && !(pgd->entries[i] & PG_USER))
pgt->entries[i] = pgt_container->entries[i];
}
spinlock_unlock(&kslock);
// map page table container at the end of the kernel space
viraddr = (KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK;
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
// now, we create a self reference
pgd->entries[index1] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_TABLE;
pgt->entries[index2] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_PAGE;
task->pgd = pgd;
if (copy) {
spinlock_lock(&curr_task->pgd_lock);
for (i=KERNEL_SPACE/(1024*PAGE_SIZE); i<1024; i++) {
if (!(curr_task->pgd->entries[i]))
continue;
if (!(curr_task->pgd->entries[i] & PG_USER))
continue;
phyaddr = copy_page_table(task, i, (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + i*PAGE_SIZE) & PAGE_MASK), &counter);
if (phyaddr) {
pgd->entries[i] = (phyaddr & PAGE_MASK) | (curr_task->pgd->entries[i] & 0xFFF);
pgt->entries[i] = (phyaddr & PAGE_MASK) | KERN_PAGE;
}
}
spinlock_unlock(&curr_task->pgd_lock);
}
return counter;
#endif
return 0; return 0;
} }
@ -238,9 +114,11 @@ int drop_pgd(void)
size_t virt_to_phys(size_t viraddr) size_t virt_to_phys(size_t viraddr)
{ {
#if 0
task_t* task = per_core(current_task); task_t* task = per_core(current_task);
uint32_t index1, index2; uint16_t idx_pd4 = (viraddr >> 39) & 0x1FF;
uint16_t idx_dirp = (viraddr >> 30) & 0x1FF;
uint16_t idx_dir = (viraddr >> 21) & 0x1FF;
uint16_t idx_table = (viraddr >> 12) & 0x1FF;
page_table_t* pgt; page_table_t* pgt;
size_t ret = 0; size_t ret = 0;
@ -252,17 +130,24 @@ size_t virt_to_phys(size_t viraddr)
spinlock_lock(&task->pgd_lock); spinlock_lock(&task->pgd_lock);
index1 = viraddr >> 22; // Currently, we allocate pages only in kernel space.
index2 = (viraddr >> 12) & 0x3FF; // => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!(task->pgd->entries[index1] & PAGE_MASK)) if (!pgt)
goto out; goto out;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt || !(pgt->entries[index2])) if (!pgt)
goto out;
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt)
goto out;
ret = (size_t) (pgt->entries[idx_table] & PAGE_MASK);
if (!ret)
goto out; goto out;
ret = pgt->entries[index2] & PAGE_MASK; // determine page frame
ret = ret | (viraddr & 0xFFF); // add page offset ret = ret | (viraddr & 0xFFF); // add page offset
out: out:
//kprintf("vir %p to phy %p\n", viraddr, ret); //kprintf("vir %p to phy %p\n", viraddr, ret);
@ -270,18 +155,14 @@ out:
spinlock_unlock(&task->pgd_lock); spinlock_unlock(&task->pgd_lock);
return ret; return ret;
#endif
return 0;
} }
size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags) size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags)
{ {
#if 0
task_t* task = per_core(current_task); task_t* task = per_core(current_task);
spinlock_t* pgd_lock; spinlock_t* pgd_lock;
page_table_t* pgt; page_table_t* pgt;
size_t index, i; size_t i, ret;
size_t ret;
if (BUILTIN_EXPECT(!task || !task->pgd, 0)) if (BUILTIN_EXPECT(!task || !task->pgd, 0))
return 0; return 0;
@ -300,95 +181,64 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
viraddr = vm_alloc(npages, flags); viraddr = vm_alloc(npages, flags);
if (BUILTIN_EXPECT(!viraddr, 0)) { if (BUILTIN_EXPECT(!viraddr, 0)) {
spinlock_unlock(pgd_lock); spinlock_unlock(pgd_lock);
kputs("map_adress: found no valid virtual address\n"); kputs("map_region: found no valid virtual address\n");
return 0; return 0;
} }
} }
ret = viraddr; ret = viraddr;
//kprintf("map %d pages from %p to %p\n", npages, phyaddr, ret);
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE, phyaddr+=PAGE_SIZE) { for(i=0; i<npages; i++, viraddr+=PAGE_SIZE, phyaddr+=PAGE_SIZE) {
index = viraddr >> 22; uint16_t idx_pd4 = (viraddr >> 39) & 0x1FF;
uint16_t idx_dirp = (viraddr >> 30) & 0x1FF;
uint16_t idx_dir = (viraddr >> 21) & 0x1FF;
uint16_t idx_table = (viraddr >> 12) & 0x1FF;
if (!(task->pgd->entries[index])) { pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
page_table_t* pgt_container; if (!pgt) {
spinlock_unlock(pgd_lock);
kputs("map_region: out of memory\n");
return 0;
}
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
spinlock_unlock(pgd_lock);
kputs("map_region: out of memory\n");
return 0;
}
pgt = (page_table_t*) get_pages(1); pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (BUILTIN_EXPECT(!pgt, 0)) { if (!pgt) {
spinlock_unlock(pgd_lock); spinlock_unlock(pgd_lock);
kputs("map_address: out of memory\n"); kputs("map_region: out of memory\n");
return 0; return 0;
} }
// set the new page table into the directory
if (flags & MAP_USER_SPACE)
task->pgd->entries[index] = (size_t)pgt|USER_TABLE;
else
task->pgd->entries[index] = (size_t)pgt|KERN_TABLE;
// if paging is already enabled, we need to use the virtual address
if (paging_enabled)
// we already know the virtual address of the "page table container"
// (see file header)
pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK);
else
pgt_container = (page_table_t*) (task->pgd->entries[(KERNEL_SPACE - PAGE_SIZE) >> 22] & PAGE_MASK);
if (BUILTIN_EXPECT(!pgt_container, 0)) {
spinlock_unlock(pgd_lock);
kputs("map_address: internal error\n");
return 0;
}
// map the new table into the address space of the kernel space
pgt_container->entries[index] = ((size_t) pgt)|KERN_PAGE;
// clear the page table
if (paging_enabled)
memset((void*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK), 0x00, PAGE_SIZE);
else
memset(pgt, 0x00, PAGE_SIZE);
} else pgt = (page_table_t*) (task->pgd->entries[index] & PAGE_MASK);
/* convert physical address to virtual */ /* convert physical address to virtual */
if (paging_enabled) // Currently, we allocate pages only in kernel space.
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK); // => physical address of the page table is identical of the virtual address
//if (paging_enabled)
// pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK);
index = (viraddr >> 12) & 0x3FF; if (pgt->entries[idx_table] && !(flags & MAP_REMAP)) {
if (pgt->entries[index] && !(flags & MAP_REMAP)) {
spinlock_unlock(pgd_lock); spinlock_unlock(pgd_lock);
kprintf("0x%x is already mapped\n", viraddr); kprintf("0x%x is already mapped\n", viraddr);
return 0; return 0;
} }
if (flags & MAP_USER_SPACE) if (flags & MAP_USER_SPACE)
pgt->entries[index] = USER_PAGE|(phyaddr & PAGE_MASK); pgt->entries[idx_table] = USER_PAGE|(phyaddr & PAGE_MASK);
else else
pgt->entries[index] = KERN_PAGE|(phyaddr & PAGE_MASK); pgt->entries[idx_table] = KERN_PAGE|(phyaddr & PAGE_MASK);
if (flags & MAP_NO_CACHE) if (flags & MAP_NO_CACHE)
pgt->entries[index] |= PG_PCD; pgt->entries[idx_table] |= PG_PCD;
#ifdef CONFIG_ROCKCREEK
if (flags & MAP_MPE)
pgt->entries[index] |= PG_MPE;
#endif
if (flags & MAP_SVM_STRONG)
#ifndef SVM_WB
pgt->entries[index] |= PG_SVM_STRONG|PG_PWT;
#else
pgt->entries[index] |= PG_SVM;
#endif
if (flags & MAP_SVM_LAZYRELEASE)
pgt->entries[index] |= PG_SVM_LAZYRELEASE|PG_PWT;
if (flags & MAP_SVM_INIT)
pgt->entries[index] |= PG_SVM_INIT;
if (flags & MAP_NO_ACCESS) if (flags & MAP_NO_ACCESS)
pgt->entries[index] &= ~PG_PRESENT; pgt->entries[idx_table] &= ~PG_PRESENT;
if (flags & MAP_WT) if (flags & MAP_WT)
pgt->entries[index] |= PG_PWT; pgt->entries[idx_table] |= PG_PWT;
if (flags & MAP_USER_SPACE) if (flags & MAP_USER_SPACE)
atomic_int32_inc(&task->user_usage); atomic_int32_inc(&task->user_usage);
@ -399,9 +249,6 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
spinlock_unlock(pgd_lock); spinlock_unlock(pgd_lock);
return ret; return ret;
#endif
return 0;
} }
int change_page_permissions(size_t start, size_t end, uint32_t flags) int change_page_permissions(size_t start, size_t end, uint32_t flags)
@ -469,7 +316,7 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags)
spinlock_unlock(&task->pgd_lock); spinlock_unlock(&task->pgd_lock);
#endif #endif
return 0; return -EINVAL;
} }
/* /*
@ -479,11 +326,9 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags)
*/ */
size_t vm_alloc(uint32_t npages, uint32_t flags) size_t vm_alloc(uint32_t npages, uint32_t flags)
{ {
#if 0
task_t* task = per_core(current_task); task_t* task = per_core(current_task);
spinlock_t* pgd_lock; spinlock_t* pgd_lock;
uint32_t index1, index2, j; size_t viraddr, i, j, ret = 0;
size_t viraddr, i, ret = 0;
size_t start, end; size_t start, end;
page_table_t* pgt; page_table_t* pgt;
@ -508,11 +353,35 @@ size_t vm_alloc(uint32_t npages, uint32_t flags)
viraddr = i = start; viraddr = i = start;
j = 0; j = 0;
do { do {
index1 = i >> 22; uint16_t idx_pd4 = (viraddr >> 39) & 0x1FF;
index2 = (i >> 12) & 0x3FF; uint16_t idx_dirp = (viraddr >> 30) & 0x1FF;
uint16_t idx_dir = (viraddr >> 21) & 0x1FF;
uint16_t idx_table = (viraddr >> 12) & 0x1FF;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); // Currently, we allocate pages only in kernel space.
if (!pgt || !(pgt->entries[index2])) { // => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt) {
i += (size_t)PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
j += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
i += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
j += PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt) {
i += PGT_ENTRIES*PAGE_SIZE;
j += PGT_ENTRIES;
continue;
}
if (!(pgt->entries[idx_table])) {
i+=PAGE_SIZE; i+=PAGE_SIZE;
j++; j++;
} else { } else {
@ -529,21 +398,19 @@ size_t vm_alloc(uint32_t npages, uint32_t flags)
spinlock_unlock(pgd_lock); spinlock_unlock(pgd_lock);
return ret; return ret;
#endif
return 0;
} }
int unmap_region(size_t viraddr, uint32_t npages) int unmap_region(size_t viraddr, uint32_t npages)
{ {
#if 0
task_t* task = per_core(current_task); task_t* task = per_core(current_task);
spinlock_t* pgd_lock; spinlock_t* pgd_lock;
uint32_t i;
uint32_t index1, index2;
page_table_t* pgt; page_table_t* pgt;
size_t i;
uint16_t idx_pd4, idx_dirp;
uint16_t idx_dir, idx_table;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0)) if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return -EINVAL; return -EINVAL;
if (viraddr <= KERNEL_SPACE) if (viraddr <= KERNEL_SPACE)
pgd_lock = &kslock; pgd_lock = &kslock;
@ -552,16 +419,43 @@ int unmap_region(size_t viraddr, uint32_t npages)
spinlock_lock(pgd_lock); spinlock_lock(pgd_lock);
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE) i = 0;
while(i<npages)
{ {
index1 = viraddr >> 22; idx_pd4 = (viraddr >> 39) & 0x1FF;
index2 = (viraddr >> 12) & 0x3FF; idx_dirp = (viraddr >> 30) & 0x1FF;
idx_dir = (viraddr >> 21) & 0x1FF;
idx_table = (viraddr >> 12) & 0x1FF;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); // Currently, we allocate pages only in kernel space.
if (!pgt) // => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt) {
viraddr += (size_t) PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES;
continue; continue;
pgt->entries[index2] &= ~PG_PRESENT; }
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES;
continue;
}
if (pgt->entries[idx_table])
pgt->entries[idx_table] &= ~PG_PRESENT;
viraddr +=PAGE_SIZE;
i++;
if (viraddr > KERNEL_SPACE) if (viraddr > KERNEL_SPACE)
atomic_int32_dec(&task->user_usage); atomic_int32_dec(&task->user_usage);
@ -569,19 +463,18 @@ int unmap_region(size_t viraddr, uint32_t npages)
} }
spinlock_unlock(pgd_lock); spinlock_unlock(pgd_lock);
#endif
return 0; return 0;
} }
int vm_free(size_t viraddr, uint32_t npages) int vm_free(size_t viraddr, uint32_t npages)
{ {
#if 0
task_t* task = per_core(current_task); task_t* task = per_core(current_task);
spinlock_t* pgd_lock; spinlock_t* pgd_lock;
uint32_t i;
uint32_t index1, index2;
page_table_t* pgt; page_table_t* pgt;
size_t i;
uint16_t idx_pd4, idx_dirp;
uint16_t idx_dir, idx_table;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0)) if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return -EINVAL; return -EINVAL;
@ -593,61 +486,47 @@ int vm_free(size_t viraddr, uint32_t npages)
spinlock_lock(pgd_lock); spinlock_lock(pgd_lock);
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE) i = 0;
while(i<npages)
{ {
index1 = viraddr >> 22; idx_pd4 = (viraddr >> 39) & 0x1FF;
index2 = (viraddr >> 12) & 0x3FF; idx_dirp = (viraddr >> 30) & 0x1FF;
idx_dir = (viraddr >> 21) & 0x1FF;
idx_table = (viraddr >> 12) & 0x1FF;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); // Currently, we allocate pages only in kernel space.
if (!pgt) // => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt) {
viraddr += (size_t) PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES;
continue; continue;
pgt->entries[index2] = 0; }
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES;
continue;
}
if (pgt->entries[idx_table])
pgt->entries[idx_table] = 0;
viraddr +=PAGE_SIZE;
i++;
tlb_flush_one_page(viraddr); tlb_flush_one_page(viraddr);
} }
spinlock_unlock(pgd_lock); spinlock_unlock(pgd_lock);
#endif
return 0;
}
#if 0
int print_paging_tree(size_t viraddr)
{
task_t* task = per_core(current_task);
uint32_t index1, index2;
page_dir_t* pgd = NULL;
page_table_t* pgt = NULL;
if (BUILTIN_EXPECT(!viraddr, 0))
return -EINVAL;
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
spinlock_lock(&task->pgd_lock);
kprintf("Paging dump of address 0x%x\n", viraddr);
pgd = task->pgd;
kprintf("\tPage directory entry %u: ", index1);
if (pgd) {
kprintf("0x%0x\n", pgd->entries[index1]);
pgt = (page_table_t*) (pgd->entries[index1] & PAGE_MASK);
} else
kputs("invalid page directory\n");
/* convert physical address to virtual */
if (paging_enabled && pgt)
pgt = (page_table_t*) (KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE);
kprintf("\tPage table entry %u: ", index2);
if (pgt)
kprintf("0x%x\n", pgt->entries[index2]);
else
kputs("invalid page table\n");
spinlock_unlock(&task->pgd_lock);
return 0; return 0;
} }
@ -655,14 +534,12 @@ int print_paging_tree(size_t viraddr)
static void pagefault_handler(struct state *s) static void pagefault_handler(struct state *s)
{ {
task_t* task = per_core(current_task); task_t* task = per_core(current_task);
page_dir_t* pgd = task->pgd; //page_dir_t* pgd = task->pgd;
page_table_t* pgt = NULL; //page_table_t* pgt = NULL;
size_t viraddr = read_cr2(); size_t viraddr = read_cr2();
size_t phyaddr; //size_t phyaddr;
#ifdef CONFIG_ROCKCREEK
uint32_t index1, index2;
#endif
#if 0
if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) { if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) {
viraddr = viraddr & PAGE_MASK; viraddr = viraddr & PAGE_MASK;
@ -678,87 +555,28 @@ static void pagefault_handler(struct state *s)
kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr); kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr);
put_page(phyaddr); put_page(phyaddr);
} }
#ifdef CONFIG_ROCKCREEK
// does our SVM system need to handle this page fault?
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
if (!pgd || !(pgd->entries[index1] & PAGE_MASK))
goto default_handler;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt || !(pgt->entries[index2]))
goto default_handler;
if (pgt->entries[index2] & PG_SVM_INIT) {
if (BUILTIN_EXPECT(!svm_alloc_page(viraddr, pgt), 1))
return;
else
goto default_handler;
}
if (pgt->entries[index2] & PG_SVM_STRONG)
if (BUILTIN_EXPECT(!svm_access_request(viraddr), 1))
return;
#endif #endif
default_handler: default_handler:
kprintf("PAGE FAULT: Task %u got page fault at %p (irq %d, cs:eip 0x%x:0x%x)\n", task->id, viraddr, s->int_no, s->cs, s->eip); kprintf("PAGE FAULT: Task %u got page fault at %p (irq %llu, cs:rip 0x%llx:0x%llx)\n", task->id, viraddr, s->int_no, s->cs, s->rip);
kprintf("Register state: eax = 0x%x, ebx = 0x%x, ecx = 0x%x, edx = 0x%x, edi = 0x%x, esi = 0x%x, ebp = 0x%x, esp = 0x%x\n", kprintf("Register state: rax = 0x%llx, rbx = 0x%llx, rcx = 0x%llx, rdx = 0x%llx, rdi = 0x%llx, rsi = 0x%llx, rbp = 0x%llx, rsp = 0x%llx\n",
s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp); s->rax, s->rbx, s->rcx, s->rdx, s->rdi, s->rsi, s->rbp, s->rsp);
while(1);
irq_enable(); irq_enable();
abort(); abort();
} }
#endif
int arch_paging_init(void) int arch_paging_init(void)
{ {
#if 0 uint32_t i, npages;
uint32_t i, npages, index1, index2;
page_table_t* pgt;
size_t viraddr;
// uninstall default handler and install our own // uninstall default handler and install our own
irq_uninstall_handler(14); irq_uninstall_handler(14);
irq_install_handler(14, pagefault_handler); irq_install_handler(14, pagefault_handler);
// Create a page table to reference to the other page tables // kernel is already maped into the kernel space (see entry64.asm)
pgt = (page_table_t*) get_page(); // this includes .data, .bss, .text, video memory and the multiboot structure
if (!pgt) {
kputs("arch_paging_init: Not enough memory!\n");
return -ENOMEM;
}
memset(pgt, 0, PAGE_SIZE);
// map this table at the end of the kernel space
viraddr = KERNEL_SPACE - PAGE_SIZE;
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
// now, we create a self reference
per_core(current_task)->pgd->entries[index1] = (((size_t) pgt) & PAGE_MASK)|KERN_TABLE;
pgt->entries[index2] = ((size_t) pgt & PAGE_MASK)|KERN_PAGE;
// create the other PGTs for the kernel space
for(i=0; i<KERNEL_SPACE/(1024*PAGE_SIZE)-1; i++) {
size_t phyaddr = get_page();
if (!phyaddr) {
kputs("arch_paging_init: Not enough memory!\n");
return -ENOMEM;
}
memset((void*) phyaddr, 0, PAGE_SIZE);
per_core(current_task)->pgd->entries[i] = (phyaddr & PAGE_MASK)|KERN_TABLE;
pgt->entries[i] = (phyaddr & PAGE_MASK)|KERN_PAGE;
}
/*
* Set the page table and page directory entries for the kernel. We map the kernel's physical address
* to the same virtual address.
*/
npages = ((size_t) &kernel_end - (size_t) &kernel_start) >> PAGE_SHIFT;
if ((size_t)&kernel_end & (PAGE_SIZE-1))
npages++;
map_region((size_t)&kernel_start, (size_t)&kernel_start, npages, MAP_KERNEL_SPACE);
#if MAX_CORES > 1 #if MAX_CORES > 1
// Reserve page for smp boot code // Reserve page for smp boot code
@ -768,18 +586,7 @@ int arch_paging_init(void)
} }
#endif #endif
#ifdef CONFIG_VGA
// map the video memory into the kernel space
map_region(VIDEO_MEM_ADDR, VIDEO_MEM_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE);
#endif
#ifdef CONFIG_MULTIBOOT #ifdef CONFIG_MULTIBOOT
/*
* of course, mb_info has to map into the kernel space
*/
if (mb_info)
map_region((size_t) mb_info & PAGE_MASK, (size_t) mb_info & PAGE_MASK, 1, MAP_KERNEL_SPACE);
#if 0 #if 0
/* /*
* Map reserved memory regions into the kernel space * Map reserved memory regions into the kernel space
@ -817,46 +624,15 @@ int arch_paging_init(void)
npages = (mmodule->mod_end - mmodule->mod_start) >> PAGE_SHIFT; npages = (mmodule->mod_end - mmodule->mod_start) >> PAGE_SHIFT;
if (mmodule->mod_end & (PAGE_SIZE-1)) if (mmodule->mod_end & (PAGE_SIZE-1))
npages++; npages++;
//kprintf("Map module %s at 0x%x (%u pages)\n", (char*) mmodule->cmdline, mmodule->mod_start, npages);
map_region((size_t) (mmodule->mod_start), (size_t) (mmodule->mod_start), npages, MAP_KERNEL_SPACE); map_region((size_t) (mmodule->mod_start), (size_t) (mmodule->mod_start), npages, MAP_KERNEL_SPACE);
} }
} }
#endif #endif
#ifdef CONFIG_ROCKCREEK /* signalize that we are able to use paging */
// map SCC's bootinfo
viraddr = map_region(SCC_BOOTINFO, SCC_BOOTINFO, 1, MAP_KERNEL_SPACE);
kprintf("Map SCC's bootinfos at 0x%x\n", viraddr);
// map SCC's configuration registers
viraddr = map_region(CRB_X0_Y0, CRB_X0_Y0, (CRB_OWN-CRB_X0_Y0+16*1024*1024) >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE);
kprintf("Map configuration registers at 0x%x\n", viraddr);
// map SCC's message passing buffers
viraddr = map_region(MPB_X0_Y0, MPB_X0_Y0, (MPB_OWN-MPB_X0_Y0+16*1024*1024) >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_MPE);
kprintf("Map message passing buffers at 0x%x\n", viraddr);
// map the FPGA registers
viraddr = map_region(FPGA_BASE, FPGA_BASE, 0x10000 >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE);
kprintf("Map FPGA regsiters at 0x%x\n", viraddr);
#endif
/* enable paging */
write_cr3((size_t) &boot_pgd);
i = read_cr0();
i = i | (1 << 31);
write_cr0(i);
paging_enabled = 1; paging_enabled = 1;
#ifdef CONFIG_ROCKCREEK
// map the initial ramdisk
npages = bootinfo->size >> PAGE_SHIFT;
if (bootinfo->size & (PAGE_SIZE-1))
npages++;
viraddr = map_region(0, bootinfo->addr, npages, MAP_KERNEL_SPACE);
kprintf("Map initrd from 0x%x to 0x%x (size %u bytes)\n", bootinfo->addr, viraddr, bootinfo->size);
bootinfo->addr = viraddr;
#endif
/* /*
* we turned on paging * we turned on paging
* => now, we are able to register our task for Task State Switching * => now, we are able to register our task for Task State Switching
@ -865,7 +641,6 @@ int arch_paging_init(void)
// APIC registers into the kernel address space // APIC registers into the kernel address space
map_apic(); map_apic();
#endif
return 0; return 0;
} }

View file

@ -456,8 +456,8 @@ int initrd_init(void)
initrd_file_desc_t* file_desc; initrd_file_desc_t* file_desc;
vfs_node_t* new_node; vfs_node_t* new_node;
if (header->magic != INITRD_MAGIC_NUMBER) { if (BUILTIN_EXPECT(header->magic != INITRD_MAGIC_NUMBER, 0)) {
kprintf("Invalid magic number for a init ram disk\n"); kprintf("Invalid magic number for a init ram disk: 0x%x\n", header->magic);
continue; continue;
} }

View file

@ -62,17 +62,12 @@ int main(void)
{ {
tid_t id; tid_t id;
lowlevel_init();
vga_init();
vga_puts("aaa");
//lowlevel_init();
pushbg(COL_BLUE); pushbg(COL_BLUE);
kprintf("This is MetalSVM %s Build %u, %u\n", kprintf("This is MetalSVM %s Build %u, %u\n",
METALSVM_VERSION, &__BUILD_DATE, &__BUILD_TIME); METALSVM_VERSION, &__BUILD_DATE, &__BUILD_TIME);
popbg(); popbg();
while(1);
system_init(); system_init();
irq_init(); irq_init();
timer_init(); timer_init();

View file

@ -719,7 +719,6 @@ invalid:
return -EINVAL; return -EINVAL;
#else #else
#warning Currently, not supported!
return -EINVAL; return -EINVAL;
#endif #endif
} }
@ -786,7 +785,6 @@ int create_user_task(tid_t* id, const char* fname, char** argv)
while ((*dest++ = *src++) != 0); while ((*dest++ = *src++) != 0);
} }
/* create new task */ /* create new task */
return create_task(id, user_entry, load_args, NORMAL_PRIO); return create_task(id, user_entry, load_args, NORMAL_PRIO);
} }

View file

@ -20,8 +20,8 @@ SECTIONS
.data ALIGN(4096) : AT(ADDR(.data)) { .data ALIGN(4096) : AT(ADDR(.data)) {
*(.data) *(.data)
} }
bss_start = .;
.bss ALIGN(4096) : AT(ADDR(.bss)) { .bss ALIGN(4096) : AT(ADDR(.bss)) {
bss_start = .;
*(.bss) *(.bss)
} }
bss_end = .; bss_end = .;

View file

@ -20,8 +20,8 @@ SECTIONS
.data ALIGN(4096) : AT(ADDR(.data)) { .data ALIGN(4096) : AT(ADDR(.data)) {
*(.data) *(.data)
} }
bss_start = .;
.bss ALIGN(4096) : AT(ADDR(.bss)) { .bss ALIGN(4096) : AT(ADDR(.bss)) {
bss_start = .;
*(.bss) *(.bss)
} }
bss_end = .; bss_end = .;

View file

@ -96,7 +96,7 @@ int mmu_init(void)
size_t kernel_size; size_t kernel_size;
unsigned int i; unsigned int i;
size_t addr; size_t addr;
int ret; int ret = 0;
// at first, set default value of the bitmap // at first, set default value of the bitmap
memset(bitmap, 0xFF, sizeof(uint8_t)*BITMAP_SIZE); memset(bitmap, 0xFF, sizeof(uint8_t)*BITMAP_SIZE);
@ -128,35 +128,6 @@ int mmu_init(void)
HALT; HALT;
} }
} }
/*
* Modules like the init ram disk are already loaded.
* Therefore, we set these pages as used.
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
/*
* Mark the mb_info as used.
*/
page_set_mark((size_t)mb_info >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
for(addr = mb_info->mods_addr; addr < mb_info->mods_addr + mb_info->mods_count * sizeof(multiboot_module_t); addr += PAGE_SIZE) {
page_set_mark(addr >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
for(i=0; i<mb_info->mods_count; i++, mmodule++) {
for(addr=mmodule->mod_start; addr<mmodule->mod_end; addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
}
}
#elif defined(CONFIG_ROCKCREEK) #elif defined(CONFIG_ROCKCREEK)
/* of course, the first slots belong to the private memory */ /* of course, the first slots belong to the private memory */
for(addr=0x00; addr<1*0x1000000; addr+=PAGE_SIZE) { for(addr=0x00; addr<1*0x1000000; addr+=PAGE_SIZE) {
@ -210,8 +181,39 @@ int mmu_init(void)
atomic_int32_sub(&total_available_pages, 1); atomic_int32_sub(&total_available_pages, 1);
#endif #endif
ret = paging_init(); ret = paging_init();
if (ret)
return ret;
#ifdef CONFIG_ROCKCREEK #ifdef CONFIG_MULTIBOOT
/*
* Modules like the init ram disk are already loaded.
* Therefore, we set these pages as used.
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
/*
* Mark the mb_info as used.
*/
page_set_mark((size_t)mb_info >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
for(addr = mb_info->mods_addr; addr < mb_info->mods_addr + mb_info->mods_count * sizeof(multiboot_module_t); addr += PAGE_SIZE) {
page_set_mark(addr >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
for(i=0; i<mb_info->mods_count; i++, mmodule++) {
for(addr=mmodule->mod_start; addr<mmodule->mod_end; addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
}
}
#elif defined(CONFIG_ROCKCREEK)
/* /*
* Now, we are able to read the FPGA registers and to * Now, we are able to read the FPGA registers and to
* determine the number of slots for private memory. * determine the number of slots for private memory.