diff --git a/hermit/Makefile b/hermit/Makefile index 2aa1efc44..4a723b5e3 100644 --- a/hermit/Makefile +++ b/hermit/Makefile @@ -73,7 +73,7 @@ endif default: all -all: bootstrap $(NAME) toolchain tools +all: arch/x86/kernel/boot.h bootstrap $(NAME) toolchain tools toolchain: $Q$(MAKE) ARCH=$(ARCH) \ @@ -134,6 +134,16 @@ doc: @echo [DEP] $*.dep $Q$(CC_FOR_TARGET) -MF $*.dep -MT $*.o -MM $(CFLAGS) $< +arch/x86/kernel/boot.bin: arch/x86/kernel/boot.asm + @echo " NASM $@" + $Q$(NASM) -fbin -o $@ $< + +arch/x86/kernel/boot.h: arch/x86/kernel/boot.bin + @echo " HEXDUMP $@" + $Qecho "static const uint8_t boot_code[] = { \ + $(shell hexdump -v -e '"0x" 1/1 "%02X" ", "' $<) \ + };" > $@ + include/hermit/config.inc: include/hermit/config.h @echo "; This file is generated automatically from the config.h file." > include/hermit/config.inc @echo "; Before editing this, you should consider editing config.h." >> include/hermit/config.inc diff --git a/hermit/arch/x86/include/asm/io.h b/hermit/arch/x86/include/asm/io.h index e122ae9d7..17dc8cf20 100644 --- a/hermit/arch/x86/include/asm/io.h +++ b/hermit/arch/x86/include/asm/io.h @@ -40,6 +40,13 @@ extern "C" { #endif +#ifndef CMOS_PORT_ADDRESS +#define CMOS_PORT_ADDRESS 0x70 +#endif +#ifndef CMOS_PORT_DATA +#define CMOS_PORT_DATA 0x71 +#endif + /** @brief Read a byte from an IO port * * @param _port The port you want to read from @@ -101,6 +108,18 @@ inline static void outportl(unsigned short _port, unsigned int _data) asm volatile("outl %1, %0"::"dN"(_port), "a"(_data)); } +/** + * write a byte in CMOS + * @param offset CMOS offset + * @param val the value you want wto write + */ +inline static void cmos_write(uint8_t offset, uint8_t val) +{ + outportb(CMOS_PORT_ADDRESS, offset); + outportb(CMOS_PORT_DATA, val); +} + + #ifdef __cplusplus } #endif diff --git a/hermit/arch/x86/kernel/apic.c b/hermit/arch/x86/kernel/apic.c index c8a2a2143..645503fcc 100644 --- a/hermit/arch/x86/kernel/apic.c +++ b/hermit/arch/x86/kernel/apic.c @@ -41,6 +41,7 @@ #include #include #include +#include "boot.h" /* * Note that linker symbols are not variables, they have no memory allocated for @@ -51,6 +52,7 @@ extern const void kernel_start; #define IOAPIC_ADDR ((size_t) &kernel_start - 2*PAGE_SIZE) #define LAPIC_ADDR ((size_t) &kernel_start - 1*PAGE_SIZE) #define MAX_APIC_CORES MAX_CORES +#define SMP_SETUP_ADDR 0x8000ULL // IO APIC MMIO structure: write reg, then read or write data. typedef struct { @@ -65,6 +67,8 @@ extern uint32_t cpu_freq; extern atomic_int32_t cpu_online; extern int32_t isle; extern int32_t possible_isles; +extern int32_t possible_cpus; +extern atomic_int32_t current_boot_id; apic_mp_t* apic_mp __attribute__ ((section (".data"))) = NULL; static apic_config_table_t* apic_config = NULL; static size_t lapic = 0; @@ -80,8 +84,6 @@ spinlock_t bootlock = SPINLOCK_INIT; // forward declaration static int lapic_reset(void); -extern atomic_int32_t cpu_online; - static uint32_t lapic_read_default(uint32_t addr) { return *((const volatile uint32_t*) (lapic+addr)); @@ -270,6 +272,15 @@ static inline uint32_t apic_lvt_entries(void) return 0; } +static inline void set_ipi_dest(uint32_t cpu_id) { + uint32_t tmp; + + tmp = lapic_read(APIC_ICR2); + tmp &= 0x00FFFFFF; + tmp |= (cpu_id << 24); + lapic_write(APIC_ICR2, tmp); +} + int apic_timer_deadline(uint32_t t) { if (BUILTIN_EXPECT(apic_is_enabled() && icr, 1)) { @@ -382,6 +393,135 @@ static int lapic_reset(void) return 0; } +#if MAX_CORES > 1 +/* + * use the universal startup algorithm of Intel's MultiProcessor Specification + */ +static int wakeup_ap(uint32_t start_eip, uint32_t id) +{ + static char* reset_vector = 0; + uint32_t i; + + kprintf("Wakeup application processor %d via IPI\n", id); + + // set shutdown code to 0x0A + cmos_write(0x0F, 0x0A); + + if (!reset_vector) { + reset_vector = (char*) vma_alloc(PAGE_SIZE, VMA_READ|VMA_WRITE); + page_map((size_t)reset_vector, 0x00, 1, PG_RW|PG_GLOBAL|PG_PCD); + reset_vector += 0x467; // add base address of the reset vector + kprintf("Map reset vector to %p\n", reset_vector); + } + *((volatile unsigned short *) (reset_vector+2)) = start_eip >> 4; + *((volatile unsigned short *) reset_vector) = 0x00; + + if (lapic_read(APIC_ICR1) & APIC_ICR_BUSY) { + kputs("ERROR: previous send not complete"); + return -EIO; + } + + //kputs("Send IPI\n"); + // send out INIT to AP + if (has_x2apic()) { + uint64_t dest = ((uint64_t)id << 32); + + wrmsr(0x800 + (APIC_ICR1 >> 4), dest|APIC_INT_LEVELTRIG|APIC_INT_ASSERT|APIC_DM_INIT); + udelay(200); + // reset INIT + wrmsr(0x800 + (APIC_ICR1 >> 4), APIC_INT_LEVELTRIG|APIC_DM_INIT); + udelay(10000); + // send out the startup + wrmsr(0x800 + (APIC_ICR1 >> 4), dest|APIC_DM_STARTUP|(start_eip >> 12)); + udelay(200); + // do it again + wrmsr(0x800 + (APIC_ICR1 >> 4), dest|APIC_DM_STARTUP|(start_eip >> 12)); + udelay(200); + + //kputs("IPI done...\n"); + + return 0; + } else { + set_ipi_dest(id); + lapic_write(APIC_ICR1, APIC_INT_LEVELTRIG|APIC_INT_ASSERT|APIC_DM_INIT); + udelay(200); + // reset INIT + lapic_write(APIC_ICR1, APIC_INT_LEVELTRIG|APIC_DM_INIT); + udelay(10000); + // send out the startup + set_ipi_dest(id); + lapic_write(APIC_ICR1, APIC_DM_STARTUP|(start_eip >> 12)); + udelay(200); + // do it again + set_ipi_dest(id); + lapic_write(APIC_ICR1, APIC_DM_STARTUP|(start_eip >> 12)); + udelay(200); + + //kputs("IPI done...\n"); + + i = 0; + while((lapic_read(APIC_ICR1) & APIC_ICR_BUSY) && (i < 1000)) + i++; // wait for it to finish, give up eventualy tho + + return ((lapic_read(APIC_ICR1) & APIC_ICR_BUSY) ? -EIO : 0); // did it fail (still delivering) or succeed ? + } +} + +int smp_init(void) +{ + uint32_t i, j; + int err; + + if (ncores <= 1) + return -EINVAL; + + kprintf("CR0 of core %u: 0x%x\n", apic_cpu_id(), read_cr0()); + + /* + * dirty hack: Reserve memory for the bootup code. + * In a single core enviroment is everythink below 8 MB free. + * + * Copy 16bit startup code to a 16bit address. + * Wakeup the other cores via IPI. They start at this address + * in real mode, switch to protected and finally they jump to smp_main. + */ + page_map(SMP_SETUP_ADDR, SMP_SETUP_ADDR, PAGE_FLOOR(sizeof(boot_code)) >> PAGE_BITS, PG_RW|PG_GLOBAL); + vma_add(SMP_SETUP_ADDR, SMP_SETUP_ADDR + PAGE_FLOOR(sizeof(boot_code)), VMA_READ|VMA_WRITE|VMA_CACHEABLE); + memcpy((void*)SMP_SETUP_ADDR, boot_code, sizeof(boot_code)); + + for(i=0; i= atomic_int32_read(&cpu_online)) && (j < 1000); j++) + udelay(1000); + + if (i >= atomic_int32_read(&cpu_online)) { + kprintf("Unable to wakeup processor %d, cpu_online %d\n", i, atomic_int32_read(&cpu_online)); + return -EIO; + } + } + + kprintf("%d cores online\n", atomic_int32_read(&cpu_online)); + + return 0; +} +#endif + /* * detects the timer frequency of the APIC and restart * the APIC timer with the correct period @@ -467,6 +607,9 @@ int apic_calibration(void) } initialized = 1; +#if MAX_CORES > 1 + smp_init(); +#endif irq_nested_enable(flags); return 0; @@ -601,6 +744,8 @@ found_mp: goto no_mp; } ncores = count; + if (is_single_kernel()) + possible_cpus = count; check_lapic: if (apic_config) @@ -658,7 +803,6 @@ no_mp: extern int smp_main(void); extern void gdt_flush(void); extern int set_idle_task(void); -extern atomic_int32_t current_boot_id; #if MAX_CORES > 1 int smp_start(void) @@ -701,15 +845,6 @@ int smp_start(void) return smp_main(); } -static inline void set_ipi_dest(uint32_t cpu_id) { - uint32_t tmp; - - tmp = lapic_read(APIC_ICR2); - tmp &= 0x00FFFFFF; - tmp |= (cpu_id << 24); - lapic_write(APIC_ICR2, tmp); -} - #if 0 int ipi_tlb_flush(void) { diff --git a/hermit/arch/x86/kernel/boot.asm b/hermit/arch/x86/kernel/boot.asm new file mode 100644 index 000000000..ffec666ac --- /dev/null +++ b/hermit/arch/x86/kernel/boot.asm @@ -0,0 +1,223 @@ +; Copyright 2010-2016 Stefan Lankes, RWTH Aachen University +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in the +; documentation and/or other materials provided with the distribution. +; * Neither the name of the University nor the names of its contributors +; may be used to endorse or promote products derived from this software +; without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +; DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +KERNEL_STACK_SIZE equ 0x100 +kernel_start equ 0x800000 + +[BITS 16] +SECTION .text +GLOBAL _start +ORG 0x8000 +_start: + cli + lgdt [gdtr] + + ; switch to protected mode by setting PE bit + mov eax, cr0 + or al, 0x1 + mov cr0, eax + + ; far jump to the 32bit code + jmp dword codesel : _pmstart + +[BITS 32] +ALIGN 4 +_pmstart: + xor eax, eax + mov ax, datasel + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + mov ss, ax + + mov esp, boot_stack+KERNEL_STACK_SIZE-16 + jmp short stublet + jmp $ + +; GDT for the protected mode +ALIGN 4 +gdtr: ; descritor table + dw gdt_end-gdt-1 ; limit + dd gdt ; base adresse +gdt: + dd 0,0 ; null descriptor +codesel equ $-gdt + dw 0xFFFF ; segment size 0..15 + dw 0x0000 ; segment address 0..15 + db 0x00 ; segment address 16..23 + db 0x9A ; access permissions und type + db 0xCF ; additional information and segment size 16...19 + db 0x00 ; segment address 24..31 +datasel equ $-gdt + dw 0xFFFF ; segment size 0..15 + dw 0x0000 ; segment address 0..15 + db 0x00 ; segment address 16..23 + db 0x92 ; access permissions and type + db 0xCF ; additional informationen and degment size 16...19 + db 0x00 ; segment address 24..31 +gdt_end: + +ALIGN 4 +GDTR64: + dw GDT64_end - GDT64 - 1 ; Limit. + dq GDT64 ; Base. + +; we need a new GDT to switch in the 64bit modus +GDT64: ; Global Descriptor Table (64-bit). + .Null: equ $ - GDT64 ; The null descriptor. + dw 0 ; Limit (low). + dw 0 ; Base (low). + db 0 ; Base (middle) + db 0 ; Access. + db 0 ; Granularity. + db 0 ; Base (high). + .Code: equ $ - GDT64 ; The code descriptor. + dw 0 ; Limit (low). + dw 0 ; Base (low). + db 0 ; Base (middle) + db 10011010b ; Access. + db 00100000b ; Granularity. + db 0 ; Base (high). + .Data: equ $ - GDT64 ; The data descriptor. + dw 0 ; Limit (low). + dw 0 ; Base (low). + db 0 ; Base (middle) + db 10010010b ; Access. + db 00000000b ; Granularity. + db 0 ; Base (high). +GDT64_end: + +ALIGN 4 +stublet: + +; This will set up the x86 control registers: +; Caching and the floating point unit are enabled +; Bootstrap page tables are loaded and page size +; extensions (huge pages) enabled. +; +; HermitCore's boot processor map its kernel into +; the address space of this trampoline code. +; => more information in apic.c +cpu_init: + ; check for long mode + + ; do we have the instruction cpuid? + pushfd + pop eax + mov ecx, eax + xor eax, 1 << 21 + push eax + popfd + pushfd + pop eax + push ecx + popfd + xor eax, ecx + jz $ ; there is no long mode + + ; cpuid > 0x80000000? + mov eax, 0x80000000 + cpuid + cmp eax, 0x80000001 + jb $ ; It is less, there is no long mode. + + ; do we have a long mode? + mov eax, 0x80000001 + cpuid + test edx, 1 << 29 ; Test if the LM-bit, which is bit 29, is set in the D-register. + jz $ ; They aren't, there is no long mode. + + ; we need to enable PAE modus + mov eax, cr4 + or eax, 1 << 5 + mov cr4, eax + + mov ecx, 0xC0000080 + rdmsr + or eax, 1 << 8 + wrmsr + + ; Set CR3 + mov eax, 0xDEADBEAF ;boot_pml4 + add eax, ebp + or eax, (1 << 0) ; set present bit + mov cr3, eax + + ; Set CR4 (PAE is already set) + mov eax, cr4 + and eax, 0xfffbf9ff ; disable SSE + or eax, (1 << 7) ; enable PGE + mov cr4, eax + + ; Set CR0 (PM-bit is already set) + mov eax, cr0 + and eax, ~(1 << 2) ; disable FPU emulation + or eax, (1 << 1) ; enable FPU montitoring + and eax, ~(1 << 30) ; enable caching + and eax, ~(1 << 29) ; disable write through caching + and eax, ~(1 << 16) ; allow kernel write access to read-only pages + or eax, (1 << 31) ; enable paging + mov cr0, eax + + lgdt [GDTR64] ; Load the 64-bit global descriptor table. + mov ax, GDT64.Data + mov ss, ax + mov ds, ax + mov es, ax + + jmp GDT64.Code:start64 ; Set the code segment and enter 64-bit long mode. + +[BITS 64] +ALIGN 8 +start64: + push kernel_start + ret + +ALIGN 16 +global boot_stack +boot_stack: + TIMES (KERNEL_STACK_SIZE) DB 0xcd + +; Bootstrap page tables are used during the initialization. +;ALIGN 4096 +;boot_pml4: +; DQ boot_pdpt + 0x7 ; PG_PRESENT | PG_RW | PG_USER +; times 510 DQ 0 ; PAGE_MAP_ENTRIES - 2 +; DQ boot_pml4 + 0x203 ; PG_PRESENT | PG_RW | PG_SELF (self-reference) +;boot_pdpt: +; DQ boot_pgd + 0x7 ; PG_PRESENT | PG_RW | PG_USER +; times 510 DQ 0 ; PAGE_MAP_ENTRIES - 2 +; DQ boot_pml4 + 0x203 ; PG_PRESENT | PG_RW | PG_SELF (self-reference) +;boot_pgd: +; DQ boot_pgt + 0x7 ; PG_PRESENT | PG_RW | PG_USER +; times 510 DQ 0 ; PAGE_MAP_ENTRIES - 2 +; DQ boot_pml4 + 0x203 ; PG_PRESENT | PG_RW | PG_SELF (self-reference) +;boot_pgt: +;%assign i 0 +;%rep 512 +; DQ i*0x1000 + 0x103 +;%assign i i+1 +;%endrep diff --git a/hermit/kernel/main.c b/hermit/kernel/main.c index 9b319fa55..24a936c02 100644 --- a/hermit/kernel/main.c +++ b/hermit/kernel/main.c @@ -130,6 +130,7 @@ static int hermit_init(void) #ifndef CONFIG_VGA uart_init(); #endif + atomic_int32_inc(&cpu_online); return 0; } @@ -260,18 +261,18 @@ int network_shutdown(void) #if MAX_CORES > 1 int smp_main(void) { - atomic_int32_inc(&cpu_online); - #ifdef DYNAMIC_TICKS enable_dynticks(); #endif + print_status(); + + atomic_int32_inc(&cpu_online); + /* wait for the other cpus */ while(atomic_int32_read(&cpu_online) < atomic_int32_read(&possible_cpus)) PAUSE; - print_status(); - //create_kernel_task(NULL, foo, "foo2", NORMAL_PRIO); while(1) { @@ -545,7 +546,6 @@ int hermit_main(void) hermit_init(); system_calibration(); // enables also interrupts - atomic_int32_inc(&cpu_online); kprintf("This is Hermit %s, build date %u\n", VERSION, &__DATE__); kprintf("Isle %d of %d possible isles\n", isle, possible_isles);