1
0
Fork 0
mirror of https://github.com/hermitcore/libhermit.git synced 2025-03-09 00:00:03 +01:00

add version of AVX support, detect more CPU features

This commit is contained in:
Stefan Lankes 2015-07-09 10:16:25 +02:00
parent 37f45b70d5
commit fcd1404ccd
3 changed files with 144 additions and 24 deletions

View file

@ -45,14 +45,13 @@
extern "C" {
#endif
// feature list 1
// feature list 0x00000001 (ebx)
#define CPU_FEATURE_FPU (1 << 0)
#define CPU_FEATUE_PSE (1 << 3)
#define CPU_FEATURE_MSR (1 << 5)
#define CPU_FEATURE_PAE (1 << 6)
#define CPU_FEATURE_APIC (1 << 9)
#define CPU_FEATURE_SEP (1 << 11)
#define CPU_FEATURE_FMA (1 << 12)
#define CPU_FEATURE_PGE (1 << 13)
#define CPU_FEATURE_PAT (1 << 16)
#define CPU_FEATURE_PSE36 (1 << 17)
@ -61,19 +60,26 @@ extern "C" {
#define CPU_FEATURE_SSE (1 << 25)
#define CPU_FEATURE_SSE2 (1 << 26)
// feature list 2
#define CPU_FEATURE_X2APIC (1 << 21)
#define CPU_FEATURE_AVX (1 << 28)
// feature list 0x00000001 (ecx)
#define CPU_FEATURE_SSE3 (1 << 9)
#define CPU_FEATURE_FMA (1 << 12)
#define CPU_FEATURE_SSE4_1 (1 << 19)
#define CPU_FEATURE_SSE4_2 (1 << 20)
#define CPU_FEATURE_X2APIC (1 << 21)
#define CPU_FEATURE_MOVBE (1 << 22)
#define CPU_FEATURE_XSAVE (1 << 26)
#define CPU_FEATURE_OSXSAVE (1 << 27)
#define CPU_FEATURE_AVX (1 << 28)
#define CPU_FEATURE_HYPERVISOR (1 << 31)
// CPUID.80000001H:EDX feature list
#define CPU_FEATURE_SYSCALL (1 << 11)
#define CPU_FEATURE_NX (1 << 20)
#define CPU_FEATURE_1GBHP (1 << 26)
#define CPU_FEATURE_LM (1 << 29)
#define CPU_FEATURE_NX (1 << 20)
#define CPU_FEATURE_1GBHP (1 << 26)
#define CPU_FEATURE_LM (1 << 29)
// feature list 4
#define CPU_FEATURE_AVX2 (1 << 5)
// feature list 0x00000007:0
#define CPU_FEATURE_AVX2 (1 << 5)
// x86 control registers
@ -212,14 +218,34 @@ inline static uint32_t has_sep(void) {
return (cpu_info.feature1 & CPU_FEATURE_SEP);
}
inline static uint32_t has_movbe(void) {
return (cpu_info.feature2 & CPU_FEATURE_MOVBE);
}
inline static uint32_t has_fma(void) {
return (cpu_info.feature1 & CPU_FEATURE_FMA);
return (cpu_info.feature2 & CPU_FEATURE_FMA);
}
inline static uint32_t has_sse3(void) {
return (cpu_info.feature2 & CPU_FEATURE_SSE3);
}
inline static uint32_t has_sse4_1(void) {
return (cpu_info.feature2 & CPU_FEATURE_SSE4_1);
}
inline static uint32_t has_sse4_2(void) {
return (cpu_info.feature2 & CPU_FEATURE_SSE4_2);
}
inline static uint32_t has_x2apic(void) {
return (cpu_info.feature2 & CPU_FEATURE_X2APIC);
}
inline static uint32_t has_xsave(void) {
return (cpu_info.feature2 & CPU_FEATURE_XSAVE);
}
inline static uint32_t has_avx(void) {
return (cpu_info.feature2 & CPU_FEATURE_AVX);
}
@ -414,6 +440,35 @@ extern func_memory_barrier rmb;
/// Force strict CPU ordering, serializes store operations.
extern func_memory_barrier wmb;
/** @brief Get Extended Control Register
*
* Reads the contents of the extended control register (XCR) specified
* in the ECX register.
*/
static inline uint64_t xgetbv(uint32_t index)
{
uint32_t edx, eax;
asm volatile ("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
return (uint64_t) eax | ((uint64_t) edx << 32ULL);
}
/** @brief Set Extended Control Register
*
* Writes a 64-bit value into the extended control register (XCR) specified
* in the ECX register.
*/
static inline void xsetbv(uint32_t index, uint64_t value)
{
uint32_t edx, eax;
edx = (uint32_t) (value >> 32ULL);
eax = (uint32_t) value;
asm volatile ("xsetbv" :: "a"(eax), "c"(index), "d"(edx));
}
/** @brief Read out CPU ID
*
* The cpuid asm-instruction does fill some information into registers and

View file

@ -30,7 +30,7 @@
* @file arch/x86/include/asm/tasks_types.h
* @brief Task related structure definitions
*
* This file contains the task_t structure definition
* This file contains the task_t structure definition
* and task state define constants
*/
@ -76,9 +76,22 @@ typedef struct i387_fxsave_struct {
};
} i387_fxsave_t __attribute__ ((aligned (16)));
typedef struct {
uint64_t xstate_bv;
uint64_t xcomp_bv;
uint64_t reserved[6];
} xsave_header_t;
typedef struct {
i387_fxsave_t fxsave;
xsave_header_t hdr;
uint32_t ymmh[64];
} xsave_t;
union fpu_state {
i387_fsave_t fsave;
i387_fxsave_t fxsave;
xsave_t xsave;
};
typedef void (*handle_fpu_state)(union fpu_state* state);

View file

@ -76,7 +76,7 @@ handle_fpu_state fpu_init = default_fpu_init;
static void save_fpu_state_fxsr(union fpu_state* state)
{
asm volatile ("fxsave %0; fnclex" : "=m"((*state).fxsave) :: "memory");
asm volatile ("fxsave %0; fnclex" : "=m"(state->fxsave) :: "memory");
}
static void restore_fpu_state_fxsr(union fpu_state* state)
@ -94,6 +94,30 @@ static void fpu_init_fxsr(union fpu_state* fpu)
fx->mxcsr = 0x1f80;
}
static void save_fpu_state_xsave(union fpu_state* state)
{
uint32_t eax = 1, edx = 1;
asm volatile ("xsave %0" : "=m"(state->xsave) : "a"(eax), "d"(edx) : "memory");
}
static void restore_fpu_state_xsave(union fpu_state* state)
{
uint32_t eax = 1, edx = 1;
asm volatile ("xrstor %0" :: "m"(state->xsave), "a"(eax), "d"(edx));
}
static void fpu_init_xsave(union fpu_state* fpu)
{
xsave_t* xs = &fpu->xsave;
memset(xs, 0x00, sizeof(xsave_t));
xs->fxsave.cwd = 0x37f;
if (BUILTIN_EXPECT(has_sse(), 1))
xs->fxsave.mxcsr = 0x1f80;
}
uint32_t detect_cpu_frequency(void)
{
uint64_t start, end, diff;
@ -123,6 +147,7 @@ uint32_t detect_cpu_frequency(void)
}
int cpu_detection(void) {
uint64_t xcr0;
uint32_t a=0, b=0, c=0, d=0;
uint32_t family, model, stepping;
size_t cr4;
@ -141,7 +166,7 @@ int cpu_detection(void) {
cpuid(0x80000001, &a, &b, &c, &cpu_info.feature3);
cpuid(0x80000008, &cpu_info.addr_width, &b, &c, &d);
a = c = d = 0;
a = b = c = d = 0;
cpuid(7, &a, &cpu_info.feature4, &c, &d);
}
@ -167,10 +192,25 @@ int cpu_detection(void) {
cr4 |= CR4_OSFXSR; // set the OSFXSR bit
if (has_sse())
cr4 |= CR4_OSXMMEXCPT; // set the OSXMMEXCPT bit
if (has_xsave())
cr4 |= CR4_OSXSAVE;
if (has_pge())
cr4 |= CR4_PGE;
write_cr4(cr4);
if (has_xsave())
{
xcr0 = xgetbv(0);
if (has_fpu())
xcr0 |= 0x1;
if (has_sse())
xcr0 |= 0x2;
if (has_avx())
xcr0 |= 0x3;
//kprintf("Set XCR to 0x%llx\n", xcr0);
xsetbv(0, xcr0);
}
if (cpu_info.feature3 & CPU_FEATURE_SYSCALL) {
wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_LMA | EFER_SCE);
wrmsr(MSR_STAR, (0x1BULL << 48) | (0x08ULL << 32));
@ -189,22 +229,34 @@ int cpu_detection(void) {
mb = mfence;
}
if (first_time && has_avx())
kprintf("The CPU owns the Advanced Vector Extensions (AVX). However, HermitCore doesn't support AVX!\n");
if (first_time && has_avx2())
kprintf("The CPU owns the Advanced Vector Extensions (AVX2). However, HermitCore doesn't support AVX2!\n");
if (first_time && has_fma())
kprintf("The CPU supports Fused Multiply-Add!\n");
if (has_fpu()) {
if (first_time)
kputs("Found and initialized FPU!\n");
asm volatile ("fninit");
}
if (first_time && has_fxsr()) {
if (first_time) {
kprintf("CPU features: %s%s%s%s%s%s%s%s%s%s%s%s%s\n",
has_sse() ? "SSE " : "",
has_sse2() ? "SSE2 " : "",
has_sse3() ? "SSE3 " : "",
has_sse4_1() ? "SSE4.1 " : "",
has_sse4_2() ? "SSE4.2 " : "",
has_avx() ? "AVX " : "",
has_avx2() ? "AVX2 " : "",
has_fma() ? "FMA " : "",
has_movbe() ? "MOVBE " : "",
has_x2apic() ? "X2APIC " : "",
has_fpu() ? "FPU " : "",
has_fxsr() ? "FXSR " : "",
has_xsave() ? "XSAVE " : "");
}
if (first_time && has_xsave()) {
save_fpu_state = save_fpu_state_xsave;
restore_fpu_state = restore_fpu_state_xsave;
fpu_init = fpu_init_xsave;
} else if (first_time && has_fxsr()) {
save_fpu_state = save_fpu_state_fxsr;
restore_fpu_state = restore_fpu_state_fxsr;
fpu_init = fpu_init_fxsr;