From fcd1404ccd9d2d23bbddace3b99a73d0b20989fb Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Thu, 9 Jul 2015 10:16:25 +0200 Subject: [PATCH] add version of AVX support, detect more CPU features --- hermit/arch/x86/include/asm/processor.h | 77 +++++++++++++++++++---- hermit/arch/x86/include/asm/tasks_types.h | 15 ++++- hermit/arch/x86/kernel/processor.c | 76 ++++++++++++++++++---- 3 files changed, 144 insertions(+), 24 deletions(-) diff --git a/hermit/arch/x86/include/asm/processor.h b/hermit/arch/x86/include/asm/processor.h index f9f080885..05a13aa93 100644 --- a/hermit/arch/x86/include/asm/processor.h +++ b/hermit/arch/x86/include/asm/processor.h @@ -45,14 +45,13 @@ extern "C" { #endif -// feature list 1 +// feature list 0x00000001 (ebx) #define CPU_FEATURE_FPU (1 << 0) #define CPU_FEATUE_PSE (1 << 3) #define CPU_FEATURE_MSR (1 << 5) #define CPU_FEATURE_PAE (1 << 6) #define CPU_FEATURE_APIC (1 << 9) #define CPU_FEATURE_SEP (1 << 11) -#define CPU_FEATURE_FMA (1 << 12) #define CPU_FEATURE_PGE (1 << 13) #define CPU_FEATURE_PAT (1 << 16) #define CPU_FEATURE_PSE36 (1 << 17) @@ -61,19 +60,26 @@ extern "C" { #define CPU_FEATURE_SSE (1 << 25) #define CPU_FEATURE_SSE2 (1 << 26) -// feature list 2 -#define CPU_FEATURE_X2APIC (1 << 21) -#define CPU_FEATURE_AVX (1 << 28) +// feature list 0x00000001 (ecx) +#define CPU_FEATURE_SSE3 (1 << 9) +#define CPU_FEATURE_FMA (1 << 12) +#define CPU_FEATURE_SSE4_1 (1 << 19) +#define CPU_FEATURE_SSE4_2 (1 << 20) +#define CPU_FEATURE_X2APIC (1 << 21) +#define CPU_FEATURE_MOVBE (1 << 22) +#define CPU_FEATURE_XSAVE (1 << 26) +#define CPU_FEATURE_OSXSAVE (1 << 27) +#define CPU_FEATURE_AVX (1 << 28) #define CPU_FEATURE_HYPERVISOR (1 << 31) // CPUID.80000001H:EDX feature list #define CPU_FEATURE_SYSCALL (1 << 11) -#define CPU_FEATURE_NX (1 << 20) -#define CPU_FEATURE_1GBHP (1 << 26) -#define CPU_FEATURE_LM (1 << 29) +#define CPU_FEATURE_NX (1 << 20) +#define CPU_FEATURE_1GBHP (1 << 26) +#define CPU_FEATURE_LM (1 << 29) -// feature list 4 -#define CPU_FEATURE_AVX2 (1 << 5) +// feature list 0x00000007:0 +#define CPU_FEATURE_AVX2 (1 << 5) // x86 control registers @@ -212,14 +218,34 @@ inline static uint32_t has_sep(void) { return (cpu_info.feature1 & CPU_FEATURE_SEP); } +inline static uint32_t has_movbe(void) { + return (cpu_info.feature2 & CPU_FEATURE_MOVBE); +} + inline static uint32_t has_fma(void) { - return (cpu_info.feature1 & CPU_FEATURE_FMA); + return (cpu_info.feature2 & CPU_FEATURE_FMA); +} + +inline static uint32_t has_sse3(void) { + return (cpu_info.feature2 & CPU_FEATURE_SSE3); +} + +inline static uint32_t has_sse4_1(void) { + return (cpu_info.feature2 & CPU_FEATURE_SSE4_1); +} + +inline static uint32_t has_sse4_2(void) { + return (cpu_info.feature2 & CPU_FEATURE_SSE4_2); } inline static uint32_t has_x2apic(void) { return (cpu_info.feature2 & CPU_FEATURE_X2APIC); } +inline static uint32_t has_xsave(void) { + return (cpu_info.feature2 & CPU_FEATURE_XSAVE); +} + inline static uint32_t has_avx(void) { return (cpu_info.feature2 & CPU_FEATURE_AVX); } @@ -414,6 +440,35 @@ extern func_memory_barrier rmb; /// Force strict CPU ordering, serializes store operations. extern func_memory_barrier wmb; +/** @brief Get Extended Control Register + * + * Reads the contents of the extended control register (XCR) specified + * in the ECX register. + */ +static inline uint64_t xgetbv(uint32_t index) +{ + uint32_t edx, eax; + + asm volatile ("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); + + return (uint64_t) eax | ((uint64_t) edx << 32ULL); +} + +/** @brief Set Extended Control Register + * + * Writes a 64-bit value into the extended control register (XCR) specified + * in the ECX register. + */ +static inline void xsetbv(uint32_t index, uint64_t value) +{ + uint32_t edx, eax; + + edx = (uint32_t) (value >> 32ULL); + eax = (uint32_t) value; + + asm volatile ("xsetbv" :: "a"(eax), "c"(index), "d"(edx)); +} + /** @brief Read out CPU ID * * The cpuid asm-instruction does fill some information into registers and diff --git a/hermit/arch/x86/include/asm/tasks_types.h b/hermit/arch/x86/include/asm/tasks_types.h index 837983c97..87782bc28 100644 --- a/hermit/arch/x86/include/asm/tasks_types.h +++ b/hermit/arch/x86/include/asm/tasks_types.h @@ -30,7 +30,7 @@ * @file arch/x86/include/asm/tasks_types.h * @brief Task related structure definitions * - * This file contains the task_t structure definition + * This file contains the task_t structure definition * and task state define constants */ @@ -76,9 +76,22 @@ typedef struct i387_fxsave_struct { }; } i387_fxsave_t __attribute__ ((aligned (16))); +typedef struct { + uint64_t xstate_bv; + uint64_t xcomp_bv; + uint64_t reserved[6]; +} xsave_header_t; + +typedef struct { + i387_fxsave_t fxsave; + xsave_header_t hdr; + uint32_t ymmh[64]; +} xsave_t; + union fpu_state { i387_fsave_t fsave; i387_fxsave_t fxsave; + xsave_t xsave; }; typedef void (*handle_fpu_state)(union fpu_state* state); diff --git a/hermit/arch/x86/kernel/processor.c b/hermit/arch/x86/kernel/processor.c index 86d83d43b..3601fe269 100644 --- a/hermit/arch/x86/kernel/processor.c +++ b/hermit/arch/x86/kernel/processor.c @@ -76,7 +76,7 @@ handle_fpu_state fpu_init = default_fpu_init; static void save_fpu_state_fxsr(union fpu_state* state) { - asm volatile ("fxsave %0; fnclex" : "=m"((*state).fxsave) :: "memory"); + asm volatile ("fxsave %0; fnclex" : "=m"(state->fxsave) :: "memory"); } static void restore_fpu_state_fxsr(union fpu_state* state) @@ -94,6 +94,30 @@ static void fpu_init_fxsr(union fpu_state* fpu) fx->mxcsr = 0x1f80; } +static void save_fpu_state_xsave(union fpu_state* state) +{ + uint32_t eax = 1, edx = 1; + + asm volatile ("xsave %0" : "=m"(state->xsave) : "a"(eax), "d"(edx) : "memory"); +} + +static void restore_fpu_state_xsave(union fpu_state* state) +{ + uint32_t eax = 1, edx = 1; + + asm volatile ("xrstor %0" :: "m"(state->xsave), "a"(eax), "d"(edx)); +} + +static void fpu_init_xsave(union fpu_state* fpu) +{ + xsave_t* xs = &fpu->xsave; + + memset(xs, 0x00, sizeof(xsave_t)); + xs->fxsave.cwd = 0x37f; + if (BUILTIN_EXPECT(has_sse(), 1)) + xs->fxsave.mxcsr = 0x1f80; +} + uint32_t detect_cpu_frequency(void) { uint64_t start, end, diff; @@ -123,6 +147,7 @@ uint32_t detect_cpu_frequency(void) } int cpu_detection(void) { + uint64_t xcr0; uint32_t a=0, b=0, c=0, d=0; uint32_t family, model, stepping; size_t cr4; @@ -141,7 +166,7 @@ int cpu_detection(void) { cpuid(0x80000001, &a, &b, &c, &cpu_info.feature3); cpuid(0x80000008, &cpu_info.addr_width, &b, &c, &d); - a = c = d = 0; + a = b = c = d = 0; cpuid(7, &a, &cpu_info.feature4, &c, &d); } @@ -167,10 +192,25 @@ int cpu_detection(void) { cr4 |= CR4_OSFXSR; // set the OSFXSR bit if (has_sse()) cr4 |= CR4_OSXMMEXCPT; // set the OSXMMEXCPT bit + if (has_xsave()) + cr4 |= CR4_OSXSAVE; if (has_pge()) cr4 |= CR4_PGE; write_cr4(cr4); + if (has_xsave()) + { + xcr0 = xgetbv(0); + if (has_fpu()) + xcr0 |= 0x1; + if (has_sse()) + xcr0 |= 0x2; + if (has_avx()) + xcr0 |= 0x3; + //kprintf("Set XCR to 0x%llx\n", xcr0); + xsetbv(0, xcr0); + } + if (cpu_info.feature3 & CPU_FEATURE_SYSCALL) { wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_LMA | EFER_SCE); wrmsr(MSR_STAR, (0x1BULL << 48) | (0x08ULL << 32)); @@ -189,22 +229,34 @@ int cpu_detection(void) { mb = mfence; } - if (first_time && has_avx()) - kprintf("The CPU owns the Advanced Vector Extensions (AVX). However, HermitCore doesn't support AVX!\n"); - - if (first_time && has_avx2()) - kprintf("The CPU owns the Advanced Vector Extensions (AVX2). However, HermitCore doesn't support AVX2!\n"); - - if (first_time && has_fma()) - kprintf("The CPU supports Fused Multiply-Add!\n"); - if (has_fpu()) { if (first_time) kputs("Found and initialized FPU!\n"); asm volatile ("fninit"); } - if (first_time && has_fxsr()) { + if (first_time) { + kprintf("CPU features: %s%s%s%s%s%s%s%s%s%s%s%s%s\n", + has_sse() ? "SSE " : "", + has_sse2() ? "SSE2 " : "", + has_sse3() ? "SSE3 " : "", + has_sse4_1() ? "SSE4.1 " : "", + has_sse4_2() ? "SSE4.2 " : "", + has_avx() ? "AVX " : "", + has_avx2() ? "AVX2 " : "", + has_fma() ? "FMA " : "", + has_movbe() ? "MOVBE " : "", + has_x2apic() ? "X2APIC " : "", + has_fpu() ? "FPU " : "", + has_fxsr() ? "FXSR " : "", + has_xsave() ? "XSAVE " : ""); + } + + if (first_time && has_xsave()) { + save_fpu_state = save_fpu_state_xsave; + restore_fpu_state = restore_fpu_state_xsave; + fpu_init = fpu_init_xsave; + } else if (first_time && has_fxsr()) { save_fpu_state = save_fpu_state_fxsr; restore_fpu_state = restore_fpu_state_fxsr; fpu_init = fpu_init_fxsr;