diff --git a/hermit/arch/x86/include/asm/processor.h b/hermit/arch/x86/include/asm/processor.h index 34a7a250f..277cf883d 100644 --- a/hermit/arch/x86/include/asm/processor.h +++ b/hermit/arch/x86/include/asm/processor.h @@ -309,7 +309,7 @@ static inline void clts(void) /** @brief Read out time stamp counter * - * The rdtsc asm command puts a 64 bit time stamp value + * The rdtsc instruction puts a 64 bit time stamp value * into EDX:EAX. * * @return The 64 bit time stamp value @@ -317,7 +317,28 @@ static inline void clts(void) inline static uint64_t rdtsc(void) { uint64_t lo, hi; + asm volatile ("rdtsc" : "=a"(lo), "=d"(hi) ); + + return (hi << 32 | lo); +} + +/** @brief Read time stamp counter and processor id + * + * The rdtscp instruction puts a 64 bit trime stamp value + * into EDX:EAX and the processor id into ECX. + * + * @return The 64 bit time stamp value + */ +inline static unsigned long long rdtscp(uint32_t* cpu_id) +{ + uint64_t lo, hi; + uint32_t id; + + asm volatile ("rdtscp" : "=a"(lo), "=c"(id), "=d"(hi) :: "memory"); + if (cpu_id) + *cpu_id = id; + return (hi << 32 | lo); } @@ -347,8 +368,8 @@ inline static uint64_t rdmsr(uint32_t msr) { */ inline static void wrmsr(uint32_t msr, uint64_t value) { - uint32_t low = value & 0xFFFFFFFF; - uint32_t high = value >> 32; + uint32_t low = (uint32_t) (value & 0xFFFFFFFFULL); + uint32_t high = (uint32_t) (value >> 32); asm volatile("wrmsr" :: "a"(low), "c"(msr), "d"(high)); } @@ -654,8 +675,6 @@ inline static int system_calibration(void) detect_cpu_frequency(); apic_calibration(); - //kprintf("CR0 of core %u: 0x%x\n", apic_cpu_id(), read_cr0()); - return 0; } diff --git a/hermit/arch/x86/include/asm/stddef.h b/hermit/arch/x86/include/asm/stddef.h index ac4b8ce96..f9d21e64e 100644 --- a/hermit/arch/x86/include/asm/stddef.h +++ b/hermit/arch/x86/include/asm/stddef.h @@ -165,10 +165,6 @@ struct state { uint64_t ss; }; -uint32_t apic_cpu_id(void); - -#define smp_id apic_cpu_id - #ifdef __cplusplus } #endif diff --git a/hermit/arch/x86/kernel/apic.c b/hermit/arch/x86/kernel/apic.c index abac78958..a5d7fc755 100644 --- a/hermit/arch/x86/kernel/apic.c +++ b/hermit/arch/x86/kernel/apic.c @@ -182,19 +182,19 @@ void apic_eoi(size_t int_no) uint32_t apic_cpu_id(void) { - if (apic_is_enabled()) - return ((lapic_read(APIC_ID)) >> 24); + int32_t id = -1; - if (boot_processor >= 0) + if (apic_is_enabled()) + id = lapic_read(APIC_ID); + + if ((id >= 0) && has_x2apic()) + return id; + else if (id >= 0) + return (id >> 24); + else if (boot_processor >= 0) return boot_processor; - - return 0; -} - -static inline void apic_set_cpu_id(uint32_t id) -{ - if (apic_is_enabled()) - lapic_write(APIC_ID, id << 24); + else + return 0; } static inline uint32_t apic_version(void) @@ -568,17 +568,18 @@ no_mp: extern int smp_main(void); extern void gdt_flush(void); extern int set_idle_task(void); +extern atomic_int32_t current_boot_id; #if MAX_CORES > 1 int smp_start(void) { - if (lapic && has_x2apic()) // enable x2APIC support - wrmsr(0x1B, 0xFEE00C00); + if (has_x2apic()) // enable x2APIC support + wrmsr(MSR_APIC_BASE, lapic | 0xD00); // reset APIC and set id lapic_reset(); - kprintf("Processor %d is entering its idle task\n", apic_cpu_id()); + kprintf("Processor %d (local id %d) is entering its idle task\n", apic_cpu_id(), atomic_int32_read(¤t_boot_id)); // use the same gdt like the boot processors gdt_flush(); @@ -595,8 +596,8 @@ int smp_start(void) // enable additional cpu features cpu_detection(); - //kprintf("CR0 of core %u: 0x%x\n", apic_cpu_id(), read_cr0()); - online[apic_cpu_id()] = 1; + //kprintf("CR0 of core %u: 0x%x\n", atomic_int32_read(¤t_boot_id), read_cr0()); + online[atomic_int32_read(¤t_boot_id)] = 1; set_idle_task(); @@ -616,46 +617,62 @@ static inline void set_ipi_dest(uint32_t cpu_id) { int ipi_tlb_flush(void) { - uint32_t id = smp_id(); + uint32_t id = CORE_ID; uint32_t flags; - uint32_t i, j; + uint32_t j; + uint64_t i; if (atomic_int32_read(&cpu_online) == 1) return 0; - if (lapic_read(APIC_ICR1) & APIC_ICR_BUSY) { - kputs("ERROR: previous send not complete"); - return -EIO; + if (BUILTIN_EXPECT(has_x2apic(), 1)) { + flags = irq_nested_disable(); + for(i=0; i 1 Lsmp_main: - ; dirty to hack to determine the cpu id - ; with a temporary stack - mov rsp, tmp_stack-16 - extern apic_cpu_id - call apic_cpu_id + xor rax, rax + mov eax, DWORD [current_boot_id] ; set default stack pointer imul rax, KERNEL_STACK_SIZE @@ -165,12 +166,9 @@ Lsmp_main: extern smp_start call smp_start jmp $ - - DQ 0, 0, 0, 0 - DQ 0, 0, 0, 0 -tmp_stack: %endif +ALIGN 4 global gdt_flush extern gp diff --git a/hermit/arch/x86/kernel/processor.c b/hermit/arch/x86/kernel/processor.c index e90e90963..09da48374 100644 --- a/hermit/arch/x86/kernel/processor.c +++ b/hermit/arch/x86/kernel/processor.c @@ -42,6 +42,7 @@ extern const void percore_end; extern void* Lpatch0; extern void* Lpatch1; extern void* Lpatch2; +extern atomic_int32_t current_boot_id; extern void isrsyscall(void); @@ -268,7 +269,7 @@ int cpu_detection(void) { cr4 &= ~CR4_TSD; // => every privilege level is able to use rdtsc write_cr4(cr4); - if (has_fsgsbase()) + if (first_time && has_fsgsbase()) { readfs = rdfsbase; readgs = rdgsbase; @@ -310,18 +311,16 @@ int cpu_detection(void) { writefs(0); #if MAX_CORES > 1 - writegs(apic_cpu_id() * ((size_t) &percore_end0 - (size_t) &percore_start)); + writegs(atomic_int32_read(¤t_boot_id) * ((size_t) &percore_end0 - (size_t) &percore_start)); #else writegs(0); #endif wrmsr(MSR_KERNEL_GS_BASE, 0); - kprintf("Core %d set per_core offset to 0x%x\n", apic_cpu_id(), rdmsr(MSR_GS_BASE)); + kprintf("Core %d set per_core offset to 0x%x\n", atomic_int32_read(¤t_boot_id), rdmsr(MSR_GS_BASE)); -#if MAX_CORES > 1 - /* set core id to apic_cpu_id */ - set_per_core(__core_id, apic_cpu_id()); -#endif + /* set core id to the current boor id */ + set_per_core(__core_id, atomic_int32_read(¤t_boot_id)); if (first_time && has_sse()) wmb = sfence; @@ -420,14 +419,27 @@ uint32_t get_cpu_frequency(void) void udelay(uint32_t usecs) { - uint64_t diff, end, start = rdtsc(); - uint64_t deadline = get_cpu_frequency() * usecs; + if (BUILTIN_EXPECT(has_rdtscp(), 1)) { + uint64_t diff, end, start = rdtscp(NULL); + uint64_t deadline = get_cpu_frequency() * usecs; - do { - mb(); - end = rdtsc(); - diff = end > start ? end - start : start - end; - if ((diff < deadline) && (deadline - diff > 50000)) - check_workqueues(); - } while(diff < deadline); + do { + end = rdtscp(NULL); + rmb(); + diff = end > start ? end - start : start - end; + if ((diff < deadline) && (deadline - diff > 50000)) + check_workqueues(); + } while(diff < deadline); + } else { + uint64_t diff, end, start = rdtsc(); + uint64_t deadline = get_cpu_frequency() * usecs; + + do { + mb(); + end = rdtsc(); + diff = end > start ? end - start : start - end; + if ((diff < deadline) && (deadline - diff > 50000)) + check_workqueues(); + } while(diff < deadline); + } } diff --git a/hermit/arch/x86/kernel/timer.c b/hermit/arch/x86/kernel/timer.c index a69dd76a1..c674dcb40 100644 --- a/hermit/arch/x86/kernel/timer.c +++ b/hermit/arch/x86/kernel/timer.c @@ -49,8 +49,11 @@ static uint64_t last_rdtsc = 0; void start_tickless(void) { use_tickless = 1; + if (BUILTIN_EXPECT(has_rdtscp(), 1)) + last_rdtsc = rdtscp(NULL); + else + last_rdtsc = rdtsc(); rmb(); - last_rdtsc = rdtsc(); } void end_tickless(void) @@ -68,15 +71,28 @@ void check_ticks(void) if (CORE_ID == boot_processor) #endif { - uint64_t curr_rdtsc = rdtsc(); - uint64_t diff; + if (BUILTIN_EXPECT(has_rdtscp(), 1)){ + uint64_t curr_rdtsc = rdtscp(NULL); + uint64_t diff; - rmb(); - diff = ((curr_rdtsc - last_rdtsc) * (uint64_t)TIMER_FREQ) / (1000000ULL*(uint64_t)get_cpu_frequency()); - if (diff > 0) { - timer_ticks += diff; - last_rdtsc = curr_rdtsc; rmb(); + diff = ((curr_rdtsc - last_rdtsc) * (uint64_t)TIMER_FREQ) / (1000000ULL*(uint64_t)get_cpu_frequency()); + if (diff > 0) { + timer_ticks += diff; + last_rdtsc = curr_rdtsc; + rmb(); + } + } else { + uint64_t curr_rdtsc = rdtsc(); + uint64_t diff; + + rmb(); + diff = ((curr_rdtsc - last_rdtsc) * (uint64_t)TIMER_FREQ) / (1000000ULL*(uint64_t)get_cpu_frequency()); + if (diff > 0) { + timer_ticks += diff; + last_rdtsc = curr_rdtsc; + rmb(); + } } } } @@ -93,16 +109,19 @@ uint64_t get_clock_tick(void) */ static void timer_handler(struct state *s) { - /* Increment our 'tick counter' */ - timer_ticks++; + if (CORE_ID == boot_processor) + /* Increment our 'tick counter' */ + timer_ticks++; +#if 0 /* * Every TIMER_FREQ clocks (approximately 1 second), we will * display a message on the screen */ - /*if (timer_ticks % TIMER_FREQ == 0) { - kputs("One second has passed\n"); - }*/ + if (timer_ticks % TIMER_FREQ == 0) { + kprintf("One second has passed %d\n", CORE_ID); + } +#endif } int timer_wait(unsigned int ticks) diff --git a/hermit/kernel/main.c b/hermit/kernel/main.c index cc9f7ade5..da72db1ce 100644 --- a/hermit/kernel/main.c +++ b/hermit/kernel/main.c @@ -34,9 +34,9 @@ #include #include #include +#include #include #include -#include #include /* @@ -58,6 +58,7 @@ extern atomic_int64_t total_allocated_pages; extern atomic_int64_t total_available_pages; extern atomic_int32_t cpu_online; +extern atomic_int32_t possible_cpus; static int foo(void* arg) { @@ -94,18 +95,31 @@ static int hermit_init(void) return 0; } +static void print_status(void) +{ + static spinlock_t status_lock = SPINLOCK_INIT; + + spinlock_lock(&status_lock); + kprintf("%d CPU is now online (CR0 0x%zx, CR4 0x%zx)\n", CORE_ID, read_cr0(), read_cr4()); + spinlock_unlock(&status_lock); +} + #if MAX_CORES > 1 int smp_main(void) { int32_t cpu = atomic_int32_inc(&cpu_online); - kprintf("%d CPUs are now online\n", cpu); - #ifdef CONFIG_TICKLESS disable_timer_irq(); #endif - create_kernel_task(NULL, foo, "foo2", NORMAL_PRIO); + /* wait for the other cpus */ + while(atomic_int32_read(&cpu_online) < atomic_int32_read(&possible_cpus)) + PAUSE; + + print_status(); + + //create_kernel_task(NULL, foo, "foo2", NORMAL_PRIO); while(1) { check_workqueues(); @@ -160,6 +174,12 @@ int main(void) disable_timer_irq(); #endif + /* wait for the other cpus */ + while(atomic_int32_read(&cpu_online) < atomic_int32_read(&possible_cpus)) + PAUSE; + + print_status(); + create_kernel_task(NULL, initd, NULL, NORMAL_PRIO); while(1) { diff --git a/hermit/usr/examples/jacobi.c b/hermit/usr/examples/jacobi.c index d2aa65563..258dbc7de 100644 --- a/hermit/usr/examples/jacobi.c +++ b/hermit/usr/examples/jacobi.c @@ -44,7 +44,7 @@ extern unsigned int get_cpufreq(); inline static unsigned long long rdtscp(void) { unsigned long long lo, hi; - asm volatile ("rdtscp" : "=a"(lo), "=d"(hi) :: "%rcx"); + asm volatile ("rdtscp; lfence" : "=a"(lo), "=d"(hi) :: "%rcx", "memory"); return (hi << 32 | lo); } diff --git a/hermit/usr/examples/stream.c b/hermit/usr/examples/stream.c index 0337fc42d..e3631b711 100644 --- a/hermit/usr/examples/stream.c +++ b/hermit/usr/examples/stream.c @@ -421,7 +421,7 @@ extern unsigned int get_cpufreq(); inline static unsigned long long rdtscp(void) { unsigned long long lo, hi; - asm volatile ("rdtscp" : "=a"(lo), "=d"(hi) :: "%rcx"); + asm volatile ("rdtscp; lfence" : "=a"(lo), "=d"(hi) :: "%rcx", "memory"); return (hi << 32 | lo); } diff --git a/hermit/usr/newlib b/hermit/usr/newlib index 598908f95..2eea5f979 160000 --- a/hermit/usr/newlib +++ b/hermit/usr/newlib @@ -1 +1 @@ -Subproject commit 598908f955e6a146b14fbda1813874f113965313 +Subproject commit 2eea5f979ee40e23741efcfaf6d863dd594a822f