add alpha version of x64 support

New features:
- support of kernel tasks in 64bit mode
- support of LwIP in 64bit mode

Missing features in 64bit mode
- user-level support
- APIC support => SMP support

To create a 64bit version of the MetalSVM kernel, the compiler flags “-m64 -mno-red-zone” and the assembler flags “-felf64” has to be used. Please use qemu-system-x86_64 as test platform.

Notice, metalsvm.elf is a 32bit ELF file. However, it contains (beside the startup code) only 64bit code. This is required because GRUB doesn’t boot 64bit ELF kernels. Therefore, for disassembling via objdump the flag  “-M x86-64” has to be used.
This commit is contained in:
Stefan Lankes 2012-06-10 08:05:24 +02:00
parent ed2186ee03
commit 227cc19890
23 changed files with 517 additions and 660 deletions

View File

@ -1,5 +1,6 @@
TOPDIR = $(shell pwd)
ARCH = x86
# For 64bit support, you have define BIT as 64
BIT=32
NAME = metalsvm
LWIPDIRS = lwip/src/arch lwip/src/api lwip/src/core lwip/src/core/ipv4 lwip/src/netif
@ -32,23 +33,29 @@ READELF_FOR_TARGET = $(CROSSCOMPREFIX)readelf
MAKE = make
RM = rm -rf
NASM = nasm
# For 64bit code, you have to use qemu-system-x86_64
QEMU = qemu-system-i386
EMU = qemu
GDB = gdb
# For 64bit support, you have to define -felf64 instead of -felf32
NASMFLAGS = -felf32 -g -i$(TOPDIR)/include/metalsvm/
INCLUDE = -I$(TOPDIR)/include -I$(TOPDIR)/arch/$(ARCH)/include -I$(TOPDIR)/lwip/src/include -I$(TOPDIR)/lwip/src/include/ipv4 -I$(TOPDIR)/drivers
# For 64bit support, you have to define "-m64 -mno-red-zone" instead of "-m32 -march=i586"
# Compiler options for final code
CFLAGS = -g -m32 -march=i586 -Wall -O2 -fno-builtin -fstrength-reduce -fomit-frame-pointer -finline-functions -nostdinc $(INCLUDE) $(STACKPROT)
CFLAGS = -g -m32 -march=i586 -Wall -O2 -fstrength-reduce -fomit-frame-pointer -finline-functions -ffreestanding $(INCLUDE) $(STACKPROT)
# Compiler options for debuging
#CFLAGS = -g -O -m32 -march=i586 -Wall -fno-builtin -DWITH_FRAME_POINTER -nostdinc $(INCLUDE) $(STACKPROT)
#CFLAGS = -g -O -m32 -march=i586 -Wall -DWITH_FRAME_POINTER -ffreestanding $(INCLUDE) $(STACKPROT)
ARFLAGS = rsv
LDFLAGS = -T link$(BIT).ld -z max-page-size=4096 --defsym __BUILD_DATE=$(shell date +'%Y%m%d') --defsym __BUILD_TIME=$(shell date +'%H%M%S')
STRIP_DEBUG = --strip-debug
KEEP_DEBUG = --only-keep-debug
OUTPUT_FORMAT = -O elf32-i386
# For 64bit support, you have to define -m64 instead of "-m32 -march=i586"
CFLAGS_FOR_NEWLIB = -m32 -march=i586 -O2 $(STACKPROT)
# For 64bit support, you have to define -m64 instead of "-m32 -march=i586"
LDFLAGS_FOR_NEWLIB = -m32 -march=i586
# For 64bit support, you have to define -felf64 instead of -felf32
NASMFLAGS_FOR_NEWLIB = -felf32
# Prettify output

View File

@ -36,35 +36,37 @@ extern "C" {
#endif
/// This segment is a data segment
#define GDT_FLAG_DATASEG 0x02
#define GDT_FLAG_DATASEG 0x02
/// This segment is a code segment
#define GDT_FLAG_CODESEG 0x0a
#define GDT_FLAG_TSS 0x09
#define GDT_FLAG_CODESEG 0x0a
#define GDT_FLAG_TSS 0x09
#define GDT_FLAG_TSS_BUSY 0x02
#define GDT_FLAG_SEGMENT 0x10
#define GDT_FLAG_SEGMENT 0x10
/// Privilege level: Ring 0
#define GDT_FLAG_RING0 0x00
#define GDT_FLAG_RING0 0x00
/// Privilege level: Ring 1
#define GDT_FLAG_RING1 0x20
#define GDT_FLAG_RING1 0x20
/// Privilege level: Ring 2
#define GDT_FLAG_RING2 0x40
#define GDT_FLAG_RING2 0x40
/// Privilege level: Ring 3
#define GDT_FLAG_RING3 0x60
#define GDT_FLAG_RING3 0x60
/// Segment is present
#define GDT_FLAG_PRESENT 0x80
#define GDT_FLAG_PRESENT 0x80
/**
* @brief Granularity of segment limit
* - set: segment limit unit is 4 KB (page size)
* - not set: unit is bytes
*/
#define GDT_FLAG_4K_GRAN 0x80
#define GDT_FLAG_4K_GRAN 0x80
/**
* @brief Default operand size
* - set: 32 bit
* - not set: 16 bit
*/
#define GDT_FLAG_32_BIT 0x40
#define GDT_FLAG_16_BIT 0x00
#define GDT_FLAG_32_BIT 0x40
#define GDT_FLAG_64_BIT 0x20
/** @brief Defines a GDT entry
*
@ -74,17 +76,17 @@ extern "C" {
*/
typedef struct {
/// Lower 16 bits of limit range
unsigned short limit_low;
uint16_t limit_low;
/// Lower 16 bits of base address
unsigned short base_low;
uint16_t base_low;
/// middle 8 bits of base address
unsigned char base_middle;
uint8_t base_middle;
/// Access bits
unsigned char access;
uint8_t access;
/// Granularity bits
unsigned char granularity;
uint8_t granularity;
/// Higher 8 bits of base address
unsigned char base_high;
uint8_t base_high;
} __attribute__ ((packed)) gdt_entry_t;
/** @brief defines the GDT pointer structure
@ -93,9 +95,9 @@ typedef struct {
*/
typedef struct {
/// Size of the table in bytes (not the number of entries!)
unsigned short limit;
uint16_t limit;
/// Address of the table
unsigned int base;
size_t base;
} __attribute__ ((packed)) gdt_ptr_t;
/// Defines the maximum number of GDT entries
@ -115,16 +117,6 @@ typedef struct {
*/
void gdt_install(void);
/** @brief Configures and returns a GDT descriptor with chosen attributes
*
* Just feed this function with address, limit and the flags
* you have seen in idt.h
*
* @return a preconfigured gdt descriptor
*/
gdt_entry_t configure_gdt_entry(unsigned long base, unsigned long limit,
unsigned char access, unsigned char gran);
#ifdef __cplusplus
}
#endif

View File

@ -71,15 +71,21 @@ extern "C" {
*/
typedef struct {
/// Handler function's lower 16 address bits
unsigned short base_lo;
uint16_t base_lo;
/// Handler function's segment selector.
unsigned short sel;
uint16_t sel;
/// These bits are reserved by Intel
unsigned char always0;
uint8_t always0;
/// These 8 bits contain flags. Exact use depends on the type of interrupt gate.
unsigned char flags;
uint8_t flags;
/// Higher 16 bits of handler function's base address
unsigned short base_hi;
uint16_t base_hi;
#ifdef CONFIG_X86_64
/// In 64 bit mode, the "highest" 32 bits of the handler function's base address
uint32_t base_hi64;
/// resvered entries
uint32_t reserved;
#endif
} __attribute__ ((packed)) idt_entry_t;
/** @brief Defines the idt pointer structure.
@ -89,9 +95,9 @@ typedef struct {
*/
typedef struct {
/// Size of the IDT in bytes (not the number of entries!)
unsigned short limit;
uint16_t limit;
/// Base address of the IDT
unsigned int base;
size_t base;
} __attribute__ ((packed)) idt_ptr_t;
/** @brief Installs IDT
@ -113,16 +119,6 @@ void idt_install(void);
void idt_set_gate(unsigned char num, size_t base, unsigned short sel,
unsigned char flags);
/** @brief Configures and returns a IDT entry with chosen attributes
*
* Just feed this function with base, selector and the flags
* you have seen in idt.h
*
* @return a preconfigured idt descriptor
*/
idt_entry_t configure_idt_entry(size_t base, unsigned short sel,
unsigned char flags);
#ifdef __cplusplus
}
#endif

View File

@ -93,7 +93,7 @@
/** @brief Page table structure
*
* This structure keeps page table entries.\n
* A page table consists of 1024 entries.
* On a 32bit system, a page table consists normally of 1024 entries.
*/
typedef struct page_table
{
@ -104,7 +104,7 @@ typedef struct page_table
/** @brief Page directory structure
*
* This structure keeps page directory entries.\
* A page directory consists of 1024 entries.
* On a 32bit system, a page directory consists normally of 1024 entries.
*/
typedef struct page_dir
{

View File

@ -335,7 +335,9 @@ uint32_t read_eip(void);
inline static int system_init(void)
{
gdt_install();
#ifdef CONFIG_X86_32
apic_init();
#endif
#ifdef CONFIG_PCI
pci_init();
#endif

View File

@ -85,33 +85,79 @@ typedef unsigned int wint_t;
* All the interrupt handler routines use this type for their only parameter.
*/
struct state {
#ifdef CONFIG_X86_32
/// EDI register
unsigned int edi;
uint32_t edi;
/// ESI register
unsigned int esi;
uint32_t esi;
/// EBP register
unsigned int ebp;
uint32_t ebp;
/// ESP register
unsigned int esp;
uint32_t esp;
/// EBX register
unsigned int ebx;
uint32_t ebx;
/// EDX register
unsigned int edx;
uint32_t edx;
/// ECX register
unsigned int ecx;
uint32_t ecx;
/// EAX register
unsigned int eax; /* pushed by 'pusha' */
uint32_t eax; /* pushed by 'pusha' */
/// Interrupt number
unsigned int int_no;
uint32_t int_no;
// pushed by the processor automatically
unsigned int error;
unsigned int eip;
unsigned int cs;
unsigned int eflags;
unsigned int useresp;
unsigned int ss;
uint32_t error;
uint32_t eip;
uint32_t cs;
uint32_t eflags;
uint32_t useresp;
uint32_t ss;
#elif defined(CONFIG_X86_64)
/// R15 register
uint64_t r15;
/// R14 register
uint64_t r14;
/// R13 register
uint64_t r13;
/// R12 register
uint64_t r12;
/// R11 register
uint64_t r11;
/// R10 register
uint64_t r10;
/// R9 register
uint64_t r9;
/// R8 register
uint64_t r8;
/// RDI register
uint64_t rdi;
/// RSI register
uint64_t rsi;
/// RBP register
uint64_t rbp;
/// (pseudo) RSP register
uint64_t rsp;
/// RBX register
uint64_t rbx;
/// RDX register
uint64_t rdx;
/// RCX register
uint64_t rcx;
/// RAX register
uint64_t rax;
/// Interrupt number
uint64_t int_no;
// pushed by the processor automatically
uint64_t error;
uint64_t rip;
uint64_t cs;
uint64_t rflags;
uint64_t userrsp;
uint64_t ss;
#endif
};
uint32_t apic_cpu_id(void);

View File

@ -96,11 +96,12 @@ inline static void *memcpy(void *dest, const void *src, size_t count)
*/
inline static void *memcpy(void* dest, const void *src, size_t count)
{
int32_t i, j, k;
size_t i, j, k;
if (BUILTIN_EXPECT(!dest || !src, 0))
return dest;
#ifdef CONFIG_X86_32
asm volatile (
"cld; rep movsl\n\t"
"movl %4, %%ecx\n\t"
@ -108,6 +109,15 @@ inline static void *memcpy(void* dest, const void *src, size_t count)
"rep movsb\n\t"
: "=&c"(i), "=&D"(j), "=&S"(k)
: "0"(count/4), "g"(count), "1"(dest), "2"(src) : "memory","cc");
#elif defined(CONFIG_X86_64)
asm volatile (
"cld; rep movsq\n\t"
"movq %4, %%rcx\n\t"
"andq $7, %%rcx\n\t"
"rep movsb\n\t"
: "=&c"(i), "=&D"(j), "=&S"(k)
: "0"(count/8), "g"(count), "1"(dest), "2"(src) : "memory","cc");
#endif
return dest;
}
@ -128,7 +138,7 @@ inline static void *memcpy(void* dest, const void *src, size_t count)
*/
inline static void *memset(void* dest, int val, size_t count)
{
int32_t i, j;
size_t i, j;
if (BUILTIN_EXPECT(!dest, 0))
return dest;
@ -155,7 +165,7 @@ inline static void *memset(void* dest, int val, size_t count)
*/
inline static void *memset(void* dest, int val, size_t count)
{
int32_t i, j;
size_t i, j;
if (BUILTIN_EXPECT(!dest, 0))
return dest;
@ -184,15 +194,22 @@ inline static void *memset(void* dest, int val, size_t count)
inline static size_t strlen(const char* str)
{
size_t len = 0;
uint32_t i, j;
size_t i, j;
if (BUILTIN_EXPECT(!str, 0))
return len;
#ifdef CONFIG_X86_32
asm volatile("not %%ecx; cld; repne scasb; not %%ecx; dec %%ecx"
: "=&c"(len), "=&D"(i), "=&a"(j)
: "2"(0), "1"(str), "0"(len)
: "memory","cc");
#elif defined(CONFIG_X86_64)
asm volatile("not %%rcx; cld; repne scasb; not %%rcx; dec %%rcx"
: "=&c"(len), "=&D"(i), "=&a"(j)
: "2"(0), "1"(str), "0"(len)
: "memory","cc");
#endif
return len;
}

View File

@ -95,11 +95,11 @@ static inline int jump_to_user_code(uint32_t ep, uint32_t stack)
asm volatile ("mov %0, %%ds; mov %0, %%fs; mov %0, %%gs; mov %0, %%es" :: "r"(0x23));
asm volatile ("push $0x23; push %0; push $0x1B; push %1" :: "r"(stack), "r"(ep));
asm volatile ("lret" ::: "cc");
#else
#warning Currently, not supported!
#endif
return 0;
#else
return -22;
#endif
}
/** @brief determines the stack of a specific task

View File

@ -35,6 +35,7 @@ extern "C" {
/** @brief The tast state segment structure
*/
typedef struct {
#ifdef CONFIG_X86_32
uint16_t backlink, __blh;
uint32_t esp0;
uint16_t ss0, __ss0h;
@ -55,6 +56,24 @@ typedef struct {
uint16_t gs, __gsh;
uint16_t ldt, __ldth;
uint16_t trace, bitmap;
#endif
#ifdef CONFIG_X86_64
uint16_t res0, res1; // reserved entries
uint64_t rsp0;
uint64_t rsp1;
uint64_t rsp2;
uint32_t res2, res3; // reserved entries
uint64_t ist_rsp1;
uint64_t ist_rsp2;
uint64_t ist_rsp3;
uint64_t ist_rsp4;
uint64_t ist_rsp5;
uint64_t ist_rsp6;
uint64_t ist_rsp7;
uint32_t res4; // reserved entries
uint32_t res5; // reserved entries
uint16_t res6, bitmap;
#endif
} __attribute__ ((packed)) tss_t;
#ifdef __cplusplus

View File

@ -66,11 +66,13 @@ ALIGN 4
stublet:
; initialize stack pointer.
mov esp, [default_stack_pointer]
; save pointer to the multiboot structure
push ebx
; initialize cpu features
call cpu_init
; interpret multiboot information
extern multiboot_init
push ebx
; pointer to the multiboot structure is already pushed
call multiboot_init
add esp, 4
@ -769,8 +771,8 @@ switch_context:
pushf ; EFLAGS
push DWORD 0x8 ; CS
push DWORD rollback ; EIP
push DWORD 0 ; Interrupt number
push DWORD 0xc0edbabe ; Error code
push DWORD 0x0 ; Interrupt number
push DWORD 0x00edbabe ; Error code
pusha ; Registers...
jmp common_switch

View File

@ -91,17 +91,17 @@ SECTION .data
; create default page tables for the 64bit kernel
global boot_pgd ; aka PML4
ALIGN 4096 ; of course, the page tables have to be page aligned
NOPTS equ 2
NOPTS equ 512
boot_pgd times 512 DQ 0
pdpt times 512 DQ 0
pd times 512 DQ 0
pt times (NOPTS*512) DQ 0
boot_pdpt times 512 DQ 0
boot_pd times 512 DQ 0
boot_pt times (NOPTS*512) DQ 0
SECTION .text
ALIGN 4
stublet:
mov esp, startup_stack-4
push ebx ; save pointer to multiboot structure
push ebx ; save pointer to the multiboot structure
mov eax, cr0
; enable caching, disable paging and fpu emulation
and eax, 0x1ffffffb
@ -144,13 +144,13 @@ stublet:
mov cr3, edi
; So lets make PML4T[0] point to the PDPT and so on:
mov DWORD [edi], pdpt ; Set the double word at the destination index to pdpt.
mov DWORD [edi], boot_pdpt ; Set the double word at the destination index to pdpt.
or DWORD [edi], 0x00000003 ; Set present and writeable bit
mov edi, pdpt
mov DWORD [edi], pd ; Set the double word at the destination index to pd.
mov edi, boot_pdpt
mov DWORD [edi], boot_pd ; Set the double word at the destination index to pd.
or DWORD [edi], 0x00000003 ; Set present and writeable bit
mov edi, pd
mov ebx, pt
mov edi, boot_pd
mov ebx, boot_pt
mov ecx, NOPTS
L0:
mov DWORD [edi], ebx ; Set the double word at the destination index to pt.
@ -162,16 +162,26 @@ L0:
; map the VGA address into the virtual address space
mov edi, 0xB8000
shr edi, 9 ; (edi >> 12) * 8
add edi, pt
add edi, boot_pt
mov ebx, 0xB8000
or ebx, 0x00000003
mov DWORD [edi], ebx
; map multiboot structure into the virtual address space
mov edi, [esp]
and edi, 0xFFFFF000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, [esp]
and ebx, 0xFFFFF000
or ebx, 0x00000003
mov DWORD [edi], ebx
extern kernel_start ; defined in linker script
extern kernel_end
mov edi, kernel_start
shr edi, 9 ; (kernel_start >> 12) * 8
add edi, pt
add edi, boot_pt
mov ebx, kernel_start
or ebx, 0x00000003
mov ecx, kernel_end ; determine kernel size in number of pages
@ -246,22 +256,12 @@ cpu_init:
; mov cr4, eax
; ret
; This will set up our new segment registers. We need to do
; something special in order to set CS. We do what is called a
; far jump. A jump that includes a segment as well as an offset.
; This is declared in C as 'extern void gdt_flush();'
; This will set up our new segment registers and is declared in
; C as 'extern void gdt_flush();'
global gdt_flush
extern gp
gdt_flush:
; lgdt [gp]
; mov ax, 0x10
; mov ds, ax
; mov es, ax
; mov fs, ax
; mov gs, ax
; mov ss, ax
; jmp 0x08:flush2
flush2:
lgdt [gp]
ret
; determines the current instruction pointer (after the jmp)
@ -603,7 +603,7 @@ isrsyscall:
push rdi
push rsi
push rbp
push Qword 0
push rsp
push rbx
push rdx
push rcx
@ -616,7 +616,7 @@ isrsyscall:
pop rcx
pop rdx
pop rbx
add rsp, 1*8
add rsp, 8
pop rbp
pop rsi
pop rdi
@ -627,7 +627,7 @@ isrsyscall:
pop r12
pop r13
pop r14
iret
iretq
global irq0
global irq1
@ -926,17 +926,20 @@ ALIGN 8
switch_context:
; create on the stack a pseudo interrupt
; afterwards, we switch to the task with iret
mov rax, [rdi] ; rdi contains the address to store the old rsp
pushf ; EFLAGS
push QWORD 0x8 ; CS
mov rax, rdi ; rdi contains the address to store the old rsp
push QWORD 0x10 ; SS
push rsp ; RSP
add QWORD [rsp], 8*1
pushf ; RFLAGS
push QWORD 0x08 ; CS
push QWORD rollback ; RIP
push QWORD 0 ; Interrupt number
push QWORD 0x0edbabe ; Error code
push QWORD 0x00 ; Interrupt number
push QWORD 0x00edbabe ; Error code
push rax
push rcx
push rdx
push rbx
push Qword 0
push rsp
push rbp
push rsi
push rdi
@ -961,7 +964,7 @@ common_stub:
push rcx
push rdx
push rbx
push Qword 0
push rsp
push rbp
push rsi
push rdi
@ -994,7 +997,7 @@ common_switch:
%else
xor rax, rax
%endif
add eax, task_state_segments
add rax, task_state_segments
; set rsp0 in TSS
mov [rax+4], rsp
@ -1013,13 +1016,13 @@ no_context_switch:
pop rdi
pop rsi
pop rbp
add rsp, 1*8
add rsp, 8
pop rbx
pop rdx
pop rcx
pop rax
add rsp, 16
iret
iretq
SECTION .note.GNU-stack noalloc noexec nowrite progbits

View File

@ -61,8 +61,9 @@ size_t get_stack(uint32_t id)
return (size_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t);
}
int register_task(task_t* task) {
uint16_t sel;
int register_task(task_t* task)
{
uint16_t sel;
sel = (CORE_ID+5) << 3;
asm volatile ("mov %0, %%ax; ltr %%ax" : : "ir"(sel) : "%eax");
@ -128,8 +129,9 @@ int create_default_frame(task_t* task, entry_point_t ep, void* arg)
{
uint16_t cs = 0x08;
uint32_t id;
uint32_t *stack;
size_t *stack;
struct state *stptr;
size_t state_size;
if (BUILTIN_EXPECT(!task, 0))
return -EINVAL;
@ -137,17 +139,18 @@ int create_default_frame(task_t* task, entry_point_t ep, void* arg)
memset(kstacks[id], 0xCD, KERNEL_STACK_SIZE);
#ifdef CONFIG_X86_32
/* The difference between setting up a task for SW-task-switching
* and not for HW-task-switching is setting up a stack and not a TSS.
* This is the stack which will be activated and popped off for iret later.
*/
stack = (uint32_t*) (kstacks[id] + KERNEL_STACK_SIZE - sizeof(uint32_t));
stack = (size_t*) (kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t));
/* The next three things on the stack are a marker for debugging purposes, ... */
*stack-- = 0xDEADBEEF;
#ifdef CONFIG_X86_32
/* the first-function-to-be-called's arguments, ... */
*stack-- = (size_t) arg;
#endif
/* and the "caller" we shall return to.
* This procedure cleans the task after exit. */
*stack = (size_t) leave_kernel_task;
@ -156,58 +159,71 @@ int create_default_frame(task_t* task, entry_point_t ep, void* arg)
* The stack must look like the stack of a task which was
* scheduled away previously. */
stack = (uint32_t*) ((size_t) stack - sizeof(struct state) + 2*sizeof(size_t));
/* In 64bit mode, he stack pointer (SS:RSP) is pushed unconditionally on interrupts.
* In legacy modes, this push is conditional and based on a change in current privilege level (CPL).*/
#ifdef CONFIG_X86_32
state_size = sizeof(struct state) - 2*sizeof(size_t);
#else
state_size = sizeof(struct state);
#endif
stack = (size_t*) ((size_t) stack - state_size);
stptr = (struct state *) stack;
memset(stptr, 0x00, sizeof(struct state) - 2*sizeof(size_t));
stptr->esp = (size_t)stack + sizeof(struct state) - 2*sizeof(size_t);
memset(stptr, 0x00, state_size);
#ifdef CONFIG_X86_32
stptr->esp = (size_t)stack + state_size;
#else
stptr->rsp = (size_t)stack + state_size;
/* the first-function-to-be-called's arguments, ... */
stptr->rdi = (size_t) arg;
#endif
stptr->int_no = 0xB16B00B5;
stptr->error = 0xC03DB4B3;
/* The instruction pointer shall be set on the first function to be called
* after IRETing */
stptr->eip = (uint32_t)ep;
#ifdef CONFIG_X86_32
stptr->eip = (size_t)ep;
#else
stptr->rip = (size_t)ep;
#endif
stptr->cs = cs;
#ifdef CONFIG_X86_32
stptr->eflags = 0x1202;
// the creation of a kernel tasks didn't change the IOPL level
// => useresp & ss is not required
#else
stptr->rflags = 0x1202;
stptr->ss = 0x10;
stptr->userrsp = stptr->rsp;
#endif
/* Set the task's stack pointer entry to the stack we have crafted right now. */
task->stack = (size_t*)stack;
#else
#warning Currently, not supported
return -1;
#endif
return 0;
}
/* Setup a descriptor in the Global Descriptor Table */
/** @brief Configures GDT descriptor with chosen attributes
*
* Just feed this function with address, limit and the flags
* you have seen in gdt.h
*/
static void gdt_set_gate(int num, unsigned long base, unsigned long limit,
unsigned char access, unsigned char gran)
unsigned char access, unsigned char gran)
{
gdt[num] = configure_gdt_entry(base, limit, access, gran);
}
gdt_entry_t configure_gdt_entry(unsigned long base, unsigned long limit,
unsigned char access, unsigned char gran)
{
gdt_entry_t desc;
/* Setup the descriptor base address */
desc.base_low = (base & 0xFFFF);
desc.base_middle = (base >> 16) & 0xFF;
desc.base_high = (base >> 24) & 0xFF;
gdt[num].base_low = (base & 0xFFFF);
gdt[num].base_middle = (base >> 16) & 0xFF;
gdt[num].base_high = (base >> 24) & 0xFF;
/* Setup the descriptor limits */
desc.limit_low = (limit & 0xFFFF);
desc.granularity = ((limit >> 16) & 0x0F);
gdt[num].limit_low = (limit & 0xFFFF);
gdt[num].granularity = ((limit >> 16) & 0x0F);
/* Finally, set up the granularity and access flags */
desc.granularity |= (gran & 0xF0);
desc.access = access;
return desc;
gdt[num].granularity |= (gran & 0xF0);
gdt[num].access = access;
}
/*
@ -219,13 +235,21 @@ gdt_entry_t configure_gdt_entry(unsigned long base, unsigned long limit,
*/
void gdt_install(void)
{
unsigned int i;
unsigned int i, mode;
memset(task_state_segments, 0x00, MAX_CORES*sizeof(tss_t));
#ifdef CONFIG_X86_32
mode = GDT_FLAG_32_BIT;
#elif defined(CONFIG_X86_64)
mode = GDT_FLAG_64_BIT;
#else
#error invalid mode
#endif
/* Setup the GDT pointer and limit */
gp.limit = (sizeof(gdt_entry_t) * GDT_ENTRIES) - 1;
gp.base = (unsigned int) &gdt;
gp.base = (size_t) &gdt;
/* Our NULL descriptor */
gdt_set_gate(0, 0, 0, 0, 0);
@ -237,7 +261,7 @@ void gdt_install(void)
*/
gdt_set_gate(1, 0, 0xFFFFFFFF,
GDT_FLAG_RING0 | GDT_FLAG_SEGMENT | GDT_FLAG_CODESEG | GDT_FLAG_PRESENT,
GDT_FLAG_4K_GRAN | GDT_FLAG_32_BIT);
GDT_FLAG_4K_GRAN | mode);
/*
* The third entry is our Data Segment. It's EXACTLY the
@ -246,33 +270,37 @@ void gdt_install(void)
*/
gdt_set_gate(2, 0, 0xFFFFFFFF,
GDT_FLAG_RING0 | GDT_FLAG_SEGMENT | GDT_FLAG_DATASEG | GDT_FLAG_PRESENT,
GDT_FLAG_4K_GRAN | GDT_FLAG_32_BIT);
GDT_FLAG_4K_GRAN | mode);
/*
* Create code segement for userspace applications (ring 3)
*/
gdt_set_gate(3, 0, 0xFFFFFFFF,
GDT_FLAG_RING3 | GDT_FLAG_SEGMENT | GDT_FLAG_CODESEG | GDT_FLAG_PRESENT,
GDT_FLAG_4K_GRAN | GDT_FLAG_32_BIT);
GDT_FLAG_4K_GRAN | mode);
/*
* Create data segement for userspace applications (ring 3)
*/
gdt_set_gate(4, 0, 0xFFFFFFFF,
GDT_FLAG_RING3 | GDT_FLAG_SEGMENT | GDT_FLAG_DATASEG | GDT_FLAG_PRESENT,
GDT_FLAG_4K_GRAN | GDT_FLAG_32_BIT);
GDT_FLAG_4K_GRAN | mode);
/*
* Create TSS for each task at ring0 (we use these segments for task switching)
*/
for(i=0; i<MAX_CORES; i++) {
#ifdef CONFIG_X86_32
/* set default values */
task_state_segments[i].eflags = 0x1202;
task_state_segments[i].ss0 = 0x10; // data segment
task_state_segments[i].esp0 = 0xDEADBEEF; // invalid pseudo address
#elif defined(CONFIG_X86_64)
task_state_segments[i].rsp0 = 0xDEADBEEF; // invalid pseudo address
#endif
gdt_set_gate(5+i, (unsigned long) (task_state_segments+i), sizeof(tss_t)-1,
GDT_FLAG_PRESENT | GDT_FLAG_TSS | GDT_FLAG_RING0, GDT_FLAG_32_BIT);
GDT_FLAG_PRESENT | GDT_FLAG_TSS | GDT_FLAG_RING0, 0);
}
/* Flush out the old GDT and install the new changes! */

View File

@ -42,24 +42,6 @@
static idt_entry_t idt[256] = {[0 ... 255] = {0, 0, 0, 0, 0}};
static idt_ptr_t idtp;
idt_entry_t configure_idt_entry(size_t base, unsigned short sel,
unsigned char flags)
{
idt_entry_t desc;
/* The interrupt routine's base address */
desc.base_lo = (base & 0xFFFF);
desc.base_hi = (base >> 16) & 0xFFFF;
/* The segment or 'selector' that this IDT entry will use
* is set here, along with any access flags */
desc.sel = sel;
desc.always0 = 0;
desc.flags = flags;
return desc;
}
/*
* Use this function to set an entry in the IDT. Alot simpler
* than twiddling with the GDT ;)
@ -67,7 +49,19 @@ idt_entry_t configure_idt_entry(size_t base, unsigned short sel,
void idt_set_gate(unsigned char num, size_t base, unsigned short sel,
unsigned char flags)
{
idt[num] = configure_idt_entry(base, sel, flags);
/* The interrupt routine's base address */
idt[num].base_lo = (base & 0xFFFF);
idt[num].base_hi = (base >> 16) & 0xFFFF;
#ifdef CONFIG_X86_64
idt[num].base_hi64 = (base >> 32) & 0xFFFFFFFF;
idt[num].reserved = 0x0;
#endif
/* The segment or 'selector' that this IDT entry will use
* is set here, along with any access flags */
idt[num].sel = sel;
idt[num].always0 = 0x0;
idt[num].flags = flags;
}
extern void isrsyscall(void);
@ -82,7 +76,7 @@ void idt_install(void)
/* Sets the special IDT pointer up, just like in 'gdt.c' */
idtp.limit = (sizeof(idt_entry_t) * 256) - 1;
idtp.base = (unsigned int)&idt;
idtp.base = (size_t)&idt;
/* Add any new ISRs to the IDT here using idt_set_gate */
idt_set_gate(INT_SYSCALL, (size_t)isrsyscall, KERNEL_CODE_SELECTOR,

View File

@ -230,9 +230,14 @@ static void fault_handler(struct state *s)
{
if (s->int_no < 32) {
kputs(exception_messages[s->int_no]);
#ifdef CONFIG_X86_32
kprintf(" Exception (%d) at 0x%x:0x%x on core %u, error code 0x%x, eflags 0x%x\n",
s->int_no, s->cs, s->eip, CORE_ID, s->error, s->eflags);
#elif defined(CONFIG_X86_64)
kprintf(" Exception (%d) at 0x%llx:0x%llx on core %u, error code 0x%llx, rflags 0x%llx\n",
s->int_no, s->cs, s->rip, CORE_ID, s->error, s->rflags);
#endif
/* Now, we signalize that we have handled the interrupt */
if (apic_is_enabled())
apic_eoi();

View File

@ -8,20 +8,11 @@
; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;
; TODO
%if 0
[BITS 32]
[BITS 64]
SECTION .text
global strcpy
strcpy:
push ebp
mov ebp, esp
push edi
push esi
mov esi, [ebp+12]
mov edi, [ebp+8]
push rdi
L1:
lodsb
@ -29,25 +20,16 @@ L1:
test al, al
jne L1
mov eax, [ebp+8]
pop esi
pop edi
pop ebp
pop rax
ret
global strncpy
strncpy:
push ebp
mov ebp, esp
push edi
push esi
mov ecx, [ebp+16]
mov esi, [ebp+12]
mov edi, [ebp+8]
push rdi
mov rcx, rdx
L2:
dec ecx
dec rcx
js L3
lodsb
stosb
@ -57,12 +39,10 @@ L2:
stosb
L3:
mov eax, [ebp+8]
pop esi
pop edi
pop ebp
pop rax
ret
%if 0
; The following function is derived from JamesM's kernel development tutorials
; (http://www.jamesmolloy.co.uk/tutorial_html/)
global copy_page_physical

View File

@ -78,7 +78,6 @@ inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_tabl
page_table_t* new_pgt;
size_t phyaddr;
#ifdef CONFIG_X86_32
if (BUILTIN_EXPECT(!pgt, 0))
return 0;
@ -89,7 +88,7 @@ inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_tabl
if (counter)
(*counter)++;
for(i=0; i<1024; i++) {
for(i=0; i<PGT_ENTRIES; i++) {
if (pgt->entries[i] & PAGE_MASK) {
if (!(pgt->entries[i] & PG_USER)) {
// Kernel page => copy only page entries
@ -114,10 +113,6 @@ inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_tabl
phyaddr = virt_to_phys((size_t)new_pgt);
return phyaddr;
#else
#warning Currently, not supported
return 0;
#endif
}
int create_pgd(task_t* task, int copy)
@ -154,7 +149,7 @@ int create_pgd(task_t* task, int copy)
spinlock_lock(&kslock);
for(i=0; i<1024; i++) {
for(i=0; i<PGT_ENTRIES; i++) {
pgd->entries[i] = boot_pgd.entries[i];
// only kernel entries will be copied
if (pgd->entries[i] && !(pgd->entries[i] & PG_USER))
@ -211,7 +206,7 @@ int drop_pgd(void)
spinlock_lock(&task->pgd_lock);
for(i=0; i<1024; i++) {
for(i=0; i<PGT_ENTRIES; i++) {
if (pgd->entries[i] & PG_USER) {
put_page(pgd->entries[i] & PAGE_MASK);
pgd->entries[i] = 0;
@ -310,9 +305,9 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
// set the new page table into the directory
if (flags & MAP_USER_SPACE)
task->pgd->entries[index] = (size_t)pgt|USER_TABLE;
task->pgd->entries[index] = (uint32_t)pgt|USER_TABLE;
else
task->pgd->entries[index] = (size_t)pgt|KERN_TABLE;
task->pgd->entries[index] = (uint32_t)pgt|KERN_TABLE;
// if paging is already enabled, we need to use the virtual address
if (paging_enabled)
@ -392,7 +387,7 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
int change_page_permissions(size_t start, size_t end, uint32_t flags)
{
uint32_t index1, index2, newflags;
size_t viraddr = start & PAGE_MASK;
size_t viraddr = start & 0xFFFFF000;
size_t phyaddr;
page_table_t* pgt;
page_dir_t* pgd;
@ -708,7 +703,7 @@ int arch_paging_init(void)
// now, we create a self reference
per_core(current_task)->pgd->entries[index1] = (((size_t) pgt) & PAGE_MASK)|KERN_TABLE;
pgt->entries[index2] = ((size_t) pgt & PAGE_MASK)|KERN_PAGE;
pgt->entries[index2] = ((size_t) pgt & 0xFFFFF000)|KERN_PAGE;
// create the other PGTs for the kernel space
for(i=0; i<KERNEL_SPACE/(1024*PAGE_SIZE)-1; i++) {
@ -783,14 +778,15 @@ int arch_paging_init(void)
npages = mb_info->mods_count * sizeof(multiboot_module_t) >> PAGE_SHIFT;
if (mb_info->mods_count * sizeof(multiboot_module_t) & (PAGE_SIZE-1))
npages++;
map_region((size_t) (mb_info->mods_addr), (size_t) (mb_info->mods_addr), npages, MAP_KERNEL_SPACE);
map_region((size_t) mb_info->mods_addr & PAGE_MASK, (size_t) mb_info->mods_addr & PAGE_MASK, npages, MAP_KERNEL_SPACE);
for(i=0; i<mb_info->mods_count; i++, mmodule++) {
// map physical address to the same virtual address
npages = (mmodule->mod_end - mmodule->mod_start) >> PAGE_SHIFT;
if (mmodule->mod_end & (PAGE_SIZE-1))
npages++;
map_region((size_t) (mmodule->mod_start), (size_t) (mmodule->mod_start), npages, MAP_KERNEL_SPACE);
kprintf("Map module %s at 0x%x (%u pages)\n", (char*) mmodule->cmdline, mmodule->mod_start, npages);
map_region((size_t) mmodule->mod_start & PAGE_MASK, (size_t) mmodule->mod_start & PAGE_MASK, npages, MAP_KERNEL_SPACE);
}
}
#endif
@ -814,7 +810,7 @@ int arch_paging_init(void)
#endif
/* enable paging */
write_cr3((size_t) &boot_pgd);
write_cr3((uint32_t) &boot_pgd);
i = read_cr0();
i = i | (1 << 31);
write_cr0(i);

View File

@ -44,9 +44,8 @@
*
* 0x00000000 - 0x000FFFFF: reserved for IO devices (16MB)
* 0x00100000 - 0x0DEADFFF: Kernel (size depends on the configuration) (221MB)
* 0x0DEAE000 - 0x3FFFEFFF: Kernel heap (801MB)
* 0x3FFFF000 - 0x3FFFFFFF: Page Tables are mapped in this region (4KB)
* (The last 256 entries belongs to kernel space)
* 0x0DEAE000 - 0x3FFFFFFF: Kernel heap
*
*/
/*
@ -57,147 +56,24 @@ extern const void kernel_start;
extern const void kernel_end;
// boot task's page directory and page directory lock
static page_dir_t boot_pgd = {{[0 ... PGT_ENTRIES-1] = 0}};
extern page_dir_t boot_pgd;
static spinlock_t kslock = SPINLOCK_INIT;
//static int paging_enabled = 0;
static int paging_enabled = 0;
page_dir_t* get_boot_pgd(void)
{
return &boot_pgd;
}
#if 0
/*
* TODO: We create a full copy of the current task. Copy-On-Access will be the better solution.
*
* No PGD locking is needed because onls create_pgd use this function and holds already the
* PGD lock.
*/
inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_table_t* pgt, int* counter)
{
uint32_t i;
page_table_t* new_pgt;
size_t phyaddr;
#ifdef CONFIG_X86_32
if (BUILTIN_EXPECT(!pgt, 0))
return 0;
new_pgt = kmalloc(sizeof(page_table_t));
if (!new_pgt)
return 0;
memset(new_pgt, 0x00, sizeof(page_table_t));
if (counter)
(*counter)++;
for(i=0; i<1024; i++) {
if (pgt->entries[i] & PAGE_MASK) {
if (!(pgt->entries[i] & PG_USER)) {
// Kernel page => copy only page entries
new_pgt->entries[i] = pgt->entries[i];
continue;
}
phyaddr = get_page();
if (!phyaddr)
continue;
if (counter)
(*counter)++;
copy_page_physical((void*)phyaddr, (void*) (pgt->entries[i] & PAGE_MASK));
new_pgt->entries[i] = phyaddr | (pgt->entries[i] & 0xFFF);
atomic_int32_inc(&task->user_usage);
}
}
phyaddr = virt_to_phys((size_t)new_pgt);
return phyaddr;
#else
#warning Currently, not supported
return 0;
#endif
}
#endif
int create_pgd(task_t* task, int copy)
{
#if 0
page_dir_t* pgd;
page_table_t* pgt;
page_table_t* pgt_container;
uint32_t i;
uint32_t index1, index2;
size_t viraddr, phyaddr;
int counter = 0;
task_t* curr_task = per_core(current_task);
// Currently, we support only kernel tasks
// => all tasks are able to use the same pgd
if (BUILTIN_EXPECT(!paging_enabled, 0))
return -EINVAL;
// we already know the virtual address of the "page table container"
// (see file header)
pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK);
// create new page directory for the new task
pgd = kmalloc(sizeof(page_dir_t));
if (!pgd)
return -ENOMEM;
memset(pgd, 0x00, sizeof(page_dir_t));
// create a new "page table container" for the new task
pgt = kmalloc(sizeof(page_table_t));
if (!pgt) {
kfree(pgd, sizeof(page_dir_t));
return -ENOMEM;
}
memset(pgt, 0x00, sizeof(page_table_t));
spinlock_lock(&kslock);
for(i=0; i<1024; i++) {
pgd->entries[i] = boot_pgd.entries[i];
// only kernel entries will be copied
if (pgd->entries[i] && !(pgd->entries[i] & PG_USER))
pgt->entries[i] = pgt_container->entries[i];
}
spinlock_unlock(&kslock);
// map page table container at the end of the kernel space
viraddr = (KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK;
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
// now, we create a self reference
pgd->entries[index1] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_TABLE;
pgt->entries[index2] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_PAGE;
task->pgd = pgd;
if (copy) {
spinlock_lock(&curr_task->pgd_lock);
for (i=KERNEL_SPACE/(1024*PAGE_SIZE); i<1024; i++) {
if (!(curr_task->pgd->entries[i]))
continue;
if (!(curr_task->pgd->entries[i] & PG_USER))
continue;
phyaddr = copy_page_table(task, i, (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + i*PAGE_SIZE) & PAGE_MASK), &counter);
if (phyaddr) {
pgd->entries[i] = (phyaddr & PAGE_MASK) | (curr_task->pgd->entries[i] & 0xFFF);
pgt->entries[i] = (phyaddr & PAGE_MASK) | KERN_PAGE;
}
}
spinlock_unlock(&curr_task->pgd_lock);
}
return counter;
#endif
task->pgd = get_boot_pgd();
return 0;
}
@ -238,9 +114,11 @@ int drop_pgd(void)
size_t virt_to_phys(size_t viraddr)
{
#if 0
task_t* task = per_core(current_task);
uint32_t index1, index2;
uint16_t idx_pd4 = (viraddr >> 39) & 0x1FF;
uint16_t idx_dirp = (viraddr >> 30) & 0x1FF;
uint16_t idx_dir = (viraddr >> 21) & 0x1FF;
uint16_t idx_table = (viraddr >> 12) & 0x1FF;
page_table_t* pgt;
size_t ret = 0;
@ -252,17 +130,24 @@ size_t virt_to_phys(size_t viraddr)
spinlock_lock(&task->pgd_lock);
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
if (!(task->pgd->entries[index1] & PAGE_MASK))
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt)
goto out;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt || !(pgt->entries[index2]))
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt)
goto out;
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt)
goto out;
ret = (size_t) (pgt->entries[idx_table] & PAGE_MASK);
if (!ret)
goto out;
ret = pgt->entries[index2] & PAGE_MASK; // determine page frame
ret = ret | (viraddr & 0xFFF); // add page offset
out:
//kprintf("vir %p to phy %p\n", viraddr, ret);
@ -270,18 +155,14 @@ out:
spinlock_unlock(&task->pgd_lock);
return ret;
#endif
return 0;
}
size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags)
{
#if 0
task_t* task = per_core(current_task);
spinlock_t* pgd_lock;
page_table_t* pgt;
size_t index, i;
size_t ret;
size_t i, ret;
if (BUILTIN_EXPECT(!task || !task->pgd, 0))
return 0;
@ -300,95 +181,64 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
viraddr = vm_alloc(npages, flags);
if (BUILTIN_EXPECT(!viraddr, 0)) {
spinlock_unlock(pgd_lock);
kputs("map_adress: found no valid virtual address\n");
kputs("map_region: found no valid virtual address\n");
return 0;
}
}
ret = viraddr;
//kprintf("map %d pages from %p to %p\n", npages, phyaddr, ret);
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE, phyaddr+=PAGE_SIZE) {
index = viraddr >> 22;
uint16_t idx_pd4 = (viraddr >> 39) & 0x1FF;
uint16_t idx_dirp = (viraddr >> 30) & 0x1FF;
uint16_t idx_dir = (viraddr >> 21) & 0x1FF;
uint16_t idx_table = (viraddr >> 12) & 0x1FF;
if (!(task->pgd->entries[index])) {
page_table_t* pgt_container;
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt) {
spinlock_unlock(pgd_lock);
kputs("map_region: out of memory\n");
return 0;
}
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
spinlock_unlock(pgd_lock);
kputs("map_region: out of memory\n");
return 0;
}
pgt = (page_table_t*) get_pages(1);
if (BUILTIN_EXPECT(!pgt, 0)) {
spinlock_unlock(pgd_lock);
kputs("map_address: out of memory\n");
return 0;
}
// set the new page table into the directory
if (flags & MAP_USER_SPACE)
task->pgd->entries[index] = (size_t)pgt|USER_TABLE;
else
task->pgd->entries[index] = (size_t)pgt|KERN_TABLE;
// if paging is already enabled, we need to use the virtual address
if (paging_enabled)
// we already know the virtual address of the "page table container"
// (see file header)
pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK);
else
pgt_container = (page_table_t*) (task->pgd->entries[(KERNEL_SPACE - PAGE_SIZE) >> 22] & PAGE_MASK);
if (BUILTIN_EXPECT(!pgt_container, 0)) {
spinlock_unlock(pgd_lock);
kputs("map_address: internal error\n");
return 0;
}
// map the new table into the address space of the kernel space
pgt_container->entries[index] = ((size_t) pgt)|KERN_PAGE;
// clear the page table
if (paging_enabled)
memset((void*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK), 0x00, PAGE_SIZE);
else
memset(pgt, 0x00, PAGE_SIZE);
} else pgt = (page_table_t*) (task->pgd->entries[index] & PAGE_MASK);
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt) {
spinlock_unlock(pgd_lock);
kputs("map_region: out of memory\n");
return 0;
}
/* convert physical address to virtual */
if (paging_enabled)
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK);
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
//if (paging_enabled)
// pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK);
index = (viraddr >> 12) & 0x3FF;
if (pgt->entries[index] && !(flags & MAP_REMAP)) {
if (pgt->entries[idx_table] && !(flags & MAP_REMAP)) {
spinlock_unlock(pgd_lock);
kprintf("0x%x is already mapped\n", viraddr);
return 0;
}
if (flags & MAP_USER_SPACE)
pgt->entries[index] = USER_PAGE|(phyaddr & PAGE_MASK);
pgt->entries[idx_table] = USER_PAGE|(phyaddr & PAGE_MASK);
else
pgt->entries[index] = KERN_PAGE|(phyaddr & PAGE_MASK);
pgt->entries[idx_table] = KERN_PAGE|(phyaddr & PAGE_MASK);
if (flags & MAP_NO_CACHE)
pgt->entries[index] |= PG_PCD;
#ifdef CONFIG_ROCKCREEK
if (flags & MAP_MPE)
pgt->entries[index] |= PG_MPE;
#endif
if (flags & MAP_SVM_STRONG)
#ifndef SVM_WB
pgt->entries[index] |= PG_SVM_STRONG|PG_PWT;
#else
pgt->entries[index] |= PG_SVM;
#endif
if (flags & MAP_SVM_LAZYRELEASE)
pgt->entries[index] |= PG_SVM_LAZYRELEASE|PG_PWT;
if (flags & MAP_SVM_INIT)
pgt->entries[index] |= PG_SVM_INIT;
pgt->entries[idx_table] |= PG_PCD;
if (flags & MAP_NO_ACCESS)
pgt->entries[index] &= ~PG_PRESENT;
pgt->entries[idx_table] &= ~PG_PRESENT;
if (flags & MAP_WT)
pgt->entries[index] |= PG_PWT;
pgt->entries[idx_table] |= PG_PWT;
if (flags & MAP_USER_SPACE)
atomic_int32_inc(&task->user_usage);
@ -399,9 +249,6 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
spinlock_unlock(pgd_lock);
return ret;
#endif
return 0;
}
int change_page_permissions(size_t start, size_t end, uint32_t flags)
@ -469,7 +316,7 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags)
spinlock_unlock(&task->pgd_lock);
#endif
return 0;
return -EINVAL;
}
/*
@ -479,11 +326,9 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags)
*/
size_t vm_alloc(uint32_t npages, uint32_t flags)
{
#if 0
task_t* task = per_core(current_task);
spinlock_t* pgd_lock;
uint32_t index1, index2, j;
size_t viraddr, i, ret = 0;
size_t viraddr, i, j, ret = 0;
size_t start, end;
page_table_t* pgt;
@ -508,11 +353,35 @@ size_t vm_alloc(uint32_t npages, uint32_t flags)
viraddr = i = start;
j = 0;
do {
index1 = i >> 22;
index2 = (i >> 12) & 0x3FF;
uint16_t idx_pd4 = (viraddr >> 39) & 0x1FF;
uint16_t idx_dirp = (viraddr >> 30) & 0x1FF;
uint16_t idx_dir = (viraddr >> 21) & 0x1FF;
uint16_t idx_table = (viraddr >> 12) & 0x1FF;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt || !(pgt->entries[index2])) {
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt) {
i += (size_t)PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
j += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
i += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
j += PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt) {
i += PGT_ENTRIES*PAGE_SIZE;
j += PGT_ENTRIES;
continue;
}
if (!(pgt->entries[idx_table])) {
i+=PAGE_SIZE;
j++;
} else {
@ -529,21 +398,19 @@ size_t vm_alloc(uint32_t npages, uint32_t flags)
spinlock_unlock(pgd_lock);
return ret;
#endif
return 0;
}
int unmap_region(size_t viraddr, uint32_t npages)
{
#if 0
task_t* task = per_core(current_task);
spinlock_t* pgd_lock;
uint32_t i;
uint32_t index1, index2;
page_table_t* pgt;
size_t i;
uint16_t idx_pd4, idx_dirp;
uint16_t idx_dir, idx_table;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return -EINVAL;
return -EINVAL;
if (viraddr <= KERNEL_SPACE)
pgd_lock = &kslock;
@ -552,16 +419,43 @@ int unmap_region(size_t viraddr, uint32_t npages)
spinlock_lock(pgd_lock);
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE)
i = 0;
while(i<npages)
{
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
idx_pd4 = (viraddr >> 39) & 0x1FF;
idx_dirp = (viraddr >> 30) & 0x1FF;
idx_dir = (viraddr >> 21) & 0x1FF;
idx_table = (viraddr >> 12) & 0x1FF;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt)
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt) {
viraddr += (size_t) PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES;
continue;
pgt->entries[index2] &= ~PG_PRESENT;
}
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES;
continue;
}
if (pgt->entries[idx_table])
pgt->entries[idx_table] &= ~PG_PRESENT;
viraddr +=PAGE_SIZE;
i++;
if (viraddr > KERNEL_SPACE)
atomic_int32_dec(&task->user_usage);
@ -569,19 +463,18 @@ int unmap_region(size_t viraddr, uint32_t npages)
}
spinlock_unlock(pgd_lock);
#endif
return 0;
}
int vm_free(size_t viraddr, uint32_t npages)
{
#if 0
task_t* task = per_core(current_task);
spinlock_t* pgd_lock;
uint32_t i;
uint32_t index1, index2;
page_table_t* pgt;
size_t i;
uint16_t idx_pd4, idx_dirp;
uint16_t idx_dir, idx_table;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return -EINVAL;
@ -593,61 +486,47 @@ int vm_free(size_t viraddr, uint32_t npages)
spinlock_lock(pgd_lock);
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE)
i = 0;
while(i<npages)
{
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
idx_pd4 = (viraddr >> 39) & 0x1FF;
idx_dirp = (viraddr >> 30) & 0x1FF;
idx_dir = (viraddr >> 21) & 0x1FF;
idx_table = (viraddr >> 12) & 0x1FF;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt)
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt) {
viraddr += (size_t) PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES;
continue;
pgt->entries[index2] = 0;
}
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES;
continue;
}
if (pgt->entries[idx_table])
pgt->entries[idx_table] = 0;
viraddr +=PAGE_SIZE;
i++;
tlb_flush_one_page(viraddr);
}
spinlock_unlock(pgd_lock);
#endif
return 0;
}
#if 0
int print_paging_tree(size_t viraddr)
{
task_t* task = per_core(current_task);
uint32_t index1, index2;
page_dir_t* pgd = NULL;
page_table_t* pgt = NULL;
if (BUILTIN_EXPECT(!viraddr, 0))
return -EINVAL;
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
spinlock_lock(&task->pgd_lock);
kprintf("Paging dump of address 0x%x\n", viraddr);
pgd = task->pgd;
kprintf("\tPage directory entry %u: ", index1);
if (pgd) {
kprintf("0x%0x\n", pgd->entries[index1]);
pgt = (page_table_t*) (pgd->entries[index1] & PAGE_MASK);
} else
kputs("invalid page directory\n");
/* convert physical address to virtual */
if (paging_enabled && pgt)
pgt = (page_table_t*) (KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE);
kprintf("\tPage table entry %u: ", index2);
if (pgt)
kprintf("0x%x\n", pgt->entries[index2]);
else
kputs("invalid page table\n");
spinlock_unlock(&task->pgd_lock);
return 0;
}
@ -655,14 +534,12 @@ int print_paging_tree(size_t viraddr)
static void pagefault_handler(struct state *s)
{
task_t* task = per_core(current_task);
page_dir_t* pgd = task->pgd;
page_table_t* pgt = NULL;
//page_dir_t* pgd = task->pgd;
//page_table_t* pgt = NULL;
size_t viraddr = read_cr2();
size_t phyaddr;
#ifdef CONFIG_ROCKCREEK
uint32_t index1, index2;
#endif
//size_t phyaddr;
#if 0
if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) {
viraddr = viraddr & PAGE_MASK;
@ -678,87 +555,28 @@ static void pagefault_handler(struct state *s)
kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr);
put_page(phyaddr);
}
#ifdef CONFIG_ROCKCREEK
// does our SVM system need to handle this page fault?
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
if (!pgd || !(pgd->entries[index1] & PAGE_MASK))
goto default_handler;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt || !(pgt->entries[index2]))
goto default_handler;
if (pgt->entries[index2] & PG_SVM_INIT) {
if (BUILTIN_EXPECT(!svm_alloc_page(viraddr, pgt), 1))
return;
else
goto default_handler;
}
if (pgt->entries[index2] & PG_SVM_STRONG)
if (BUILTIN_EXPECT(!svm_access_request(viraddr), 1))
return;
#endif
default_handler:
kprintf("PAGE FAULT: Task %u got page fault at %p (irq %d, cs:eip 0x%x:0x%x)\n", task->id, viraddr, s->int_no, s->cs, s->eip);
kprintf("Register state: eax = 0x%x, ebx = 0x%x, ecx = 0x%x, edx = 0x%x, edi = 0x%x, esi = 0x%x, ebp = 0x%x, esp = 0x%x\n",
s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp);
kprintf("PAGE FAULT: Task %u got page fault at %p (irq %llu, cs:rip 0x%llx:0x%llx)\n", task->id, viraddr, s->int_no, s->cs, s->rip);
kprintf("Register state: rax = 0x%llx, rbx = 0x%llx, rcx = 0x%llx, rdx = 0x%llx, rdi = 0x%llx, rsi = 0x%llx, rbp = 0x%llx, rsp = 0x%llx\n",
s->rax, s->rbx, s->rcx, s->rdx, s->rdi, s->rsi, s->rbp, s->rsp);
while(1);
irq_enable();
abort();
}
#endif
int arch_paging_init(void)
{
#if 0
uint32_t i, npages, index1, index2;
page_table_t* pgt;
size_t viraddr;
uint32_t i, npages;
// uninstall default handler and install our own
irq_uninstall_handler(14);
irq_install_handler(14, pagefault_handler);
// Create a page table to reference to the other page tables
pgt = (page_table_t*) get_page();
if (!pgt) {
kputs("arch_paging_init: Not enough memory!\n");
return -ENOMEM;
}
memset(pgt, 0, PAGE_SIZE);
// map this table at the end of the kernel space
viraddr = KERNEL_SPACE - PAGE_SIZE;
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
// now, we create a self reference
per_core(current_task)->pgd->entries[index1] = (((size_t) pgt) & PAGE_MASK)|KERN_TABLE;
pgt->entries[index2] = ((size_t) pgt & PAGE_MASK)|KERN_PAGE;
// create the other PGTs for the kernel space
for(i=0; i<KERNEL_SPACE/(1024*PAGE_SIZE)-1; i++) {
size_t phyaddr = get_page();
if (!phyaddr) {
kputs("arch_paging_init: Not enough memory!\n");
return -ENOMEM;
}
memset((void*) phyaddr, 0, PAGE_SIZE);
per_core(current_task)->pgd->entries[i] = (phyaddr & PAGE_MASK)|KERN_TABLE;
pgt->entries[i] = (phyaddr & PAGE_MASK)|KERN_PAGE;
}
/*
* Set the page table and page directory entries for the kernel. We map the kernel's physical address
* to the same virtual address.
*/
npages = ((size_t) &kernel_end - (size_t) &kernel_start) >> PAGE_SHIFT;
if ((size_t)&kernel_end & (PAGE_SIZE-1))
npages++;
map_region((size_t)&kernel_start, (size_t)&kernel_start, npages, MAP_KERNEL_SPACE);
// kernel is already maped into the kernel space (see entry64.asm)
// this includes .data, .bss, .text, video memory and the multiboot structure
#if MAX_CORES > 1
// Reserve page for smp boot code
@ -768,18 +586,7 @@ int arch_paging_init(void)
}
#endif
#ifdef CONFIG_VGA
// map the video memory into the kernel space
map_region(VIDEO_MEM_ADDR, VIDEO_MEM_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE);
#endif
#ifdef CONFIG_MULTIBOOT
/*
* of course, mb_info has to map into the kernel space
*/
if (mb_info)
map_region((size_t) mb_info & PAGE_MASK, (size_t) mb_info & PAGE_MASK, 1, MAP_KERNEL_SPACE);
#if 0
/*
* Map reserved memory regions into the kernel space
@ -817,46 +624,15 @@ int arch_paging_init(void)
npages = (mmodule->mod_end - mmodule->mod_start) >> PAGE_SHIFT;
if (mmodule->mod_end & (PAGE_SIZE-1))
npages++;
//kprintf("Map module %s at 0x%x (%u pages)\n", (char*) mmodule->cmdline, mmodule->mod_start, npages);
map_region((size_t) (mmodule->mod_start), (size_t) (mmodule->mod_start), npages, MAP_KERNEL_SPACE);
}
}
#endif
#ifdef CONFIG_ROCKCREEK
// map SCC's bootinfo
viraddr = map_region(SCC_BOOTINFO, SCC_BOOTINFO, 1, MAP_KERNEL_SPACE);
kprintf("Map SCC's bootinfos at 0x%x\n", viraddr);
// map SCC's configuration registers
viraddr = map_region(CRB_X0_Y0, CRB_X0_Y0, (CRB_OWN-CRB_X0_Y0+16*1024*1024) >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE);
kprintf("Map configuration registers at 0x%x\n", viraddr);
// map SCC's message passing buffers
viraddr = map_region(MPB_X0_Y0, MPB_X0_Y0, (MPB_OWN-MPB_X0_Y0+16*1024*1024) >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_MPE);
kprintf("Map message passing buffers at 0x%x\n", viraddr);
// map the FPGA registers
viraddr = map_region(FPGA_BASE, FPGA_BASE, 0x10000 >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE);
kprintf("Map FPGA regsiters at 0x%x\n", viraddr);
#endif
/* enable paging */
write_cr3((size_t) &boot_pgd);
i = read_cr0();
i = i | (1 << 31);
write_cr0(i);
/* signalize that we are able to use paging */
paging_enabled = 1;
#ifdef CONFIG_ROCKCREEK
// map the initial ramdisk
npages = bootinfo->size >> PAGE_SHIFT;
if (bootinfo->size & (PAGE_SIZE-1))
npages++;
viraddr = map_region(0, bootinfo->addr, npages, MAP_KERNEL_SPACE);
kprintf("Map initrd from 0x%x to 0x%x (size %u bytes)\n", bootinfo->addr, viraddr, bootinfo->size);
bootinfo->addr = viraddr;
#endif
/*
* we turned on paging
* => now, we are able to register our task for Task State Switching
@ -865,7 +641,6 @@ int arch_paging_init(void)
// APIC registers into the kernel address space
map_apic();
#endif
return 0;
}

View File

@ -456,8 +456,8 @@ int initrd_init(void)
initrd_file_desc_t* file_desc;
vfs_node_t* new_node;
if (header->magic != INITRD_MAGIC_NUMBER) {
kprintf("Invalid magic number for a init ram disk\n");
if (BUILTIN_EXPECT(header->magic != INITRD_MAGIC_NUMBER, 0)) {
kprintf("Invalid magic number for a init ram disk: 0x%x\n", header->magic);
continue;
}

View File

@ -62,17 +62,12 @@ int main(void)
{
tid_t id;
vga_init();
vga_puts("aaa");
//lowlevel_init();
lowlevel_init();
pushbg(COL_BLUE);
kprintf("This is MetalSVM %s Build %u, %u\n",
METALSVM_VERSION, &__BUILD_DATE, &__BUILD_TIME);
popbg();
while(1);
system_init();
irq_init();
timer_init();

View File

@ -719,7 +719,6 @@ invalid:
return -EINVAL;
#else
#warning Currently, not supported!
return -EINVAL;
#endif
}
@ -786,7 +785,6 @@ int create_user_task(tid_t* id, const char* fname, char** argv)
while ((*dest++ = *src++) != 0);
}
/* create new task */
return create_task(id, user_entry, load_args, NORMAL_PRIO);
}

View File

@ -20,8 +20,8 @@ SECTIONS
.data ALIGN(4096) : AT(ADDR(.data)) {
*(.data)
}
bss_start = .;
.bss ALIGN(4096) : AT(ADDR(.bss)) {
bss_start = .;
*(.bss)
}
bss_end = .;

View File

@ -20,8 +20,8 @@ SECTIONS
.data ALIGN(4096) : AT(ADDR(.data)) {
*(.data)
}
bss_start = .;
.bss ALIGN(4096) : AT(ADDR(.bss)) {
bss_start = .;
*(.bss)
}
bss_end = .;

View File

@ -96,7 +96,7 @@ int mmu_init(void)
size_t kernel_size;
unsigned int i;
size_t addr;
int ret;
int ret = 0;
// at first, set default value of the bitmap
memset(bitmap, 0xFF, sizeof(uint8_t)*BITMAP_SIZE);
@ -128,35 +128,6 @@ int mmu_init(void)
HALT;
}
}
/*
* Modules like the init ram disk are already loaded.
* Therefore, we set these pages as used.
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
/*
* Mark the mb_info as used.
*/
page_set_mark((size_t)mb_info >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
for(addr = mb_info->mods_addr; addr < mb_info->mods_addr + mb_info->mods_count * sizeof(multiboot_module_t); addr += PAGE_SIZE) {
page_set_mark(addr >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
for(i=0; i<mb_info->mods_count; i++, mmodule++) {
for(addr=mmodule->mod_start; addr<mmodule->mod_end; addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
}
}
#elif defined(CONFIG_ROCKCREEK)
/* of course, the first slots belong to the private memory */
for(addr=0x00; addr<1*0x1000000; addr+=PAGE_SIZE) {
@ -210,8 +181,39 @@ int mmu_init(void)
atomic_int32_sub(&total_available_pages, 1);
#endif
ret = paging_init();
if (ret)
return ret;
#ifdef CONFIG_ROCKCREEK
#ifdef CONFIG_MULTIBOOT
/*
* Modules like the init ram disk are already loaded.
* Therefore, we set these pages as used.
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
/*
* Mark the mb_info as used.
*/
page_set_mark((size_t)mb_info >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
for(addr = mb_info->mods_addr; addr < mb_info->mods_addr + mb_info->mods_count * sizeof(multiboot_module_t); addr += PAGE_SIZE) {
page_set_mark(addr >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
for(i=0; i<mb_info->mods_count; i++, mmodule++) {
for(addr=mmodule->mod_start; addr<mmodule->mod_end; addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
}
}
#elif defined(CONFIG_ROCKCREEK)
/*
* Now, we are able to read the FPGA registers and to
* determine the number of slots for private memory.