mirror of
https://github.com/hermitcore/libhermit.git
synced 2025-03-09 00:00:03 +01:00
add version of AVX support, detect more CPU features
This commit is contained in:
parent
37f45b70d5
commit
fcd1404ccd
3 changed files with 144 additions and 24 deletions
|
@ -45,14 +45,13 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
// feature list 1
|
||||
// feature list 0x00000001 (ebx)
|
||||
#define CPU_FEATURE_FPU (1 << 0)
|
||||
#define CPU_FEATUE_PSE (1 << 3)
|
||||
#define CPU_FEATURE_MSR (1 << 5)
|
||||
#define CPU_FEATURE_PAE (1 << 6)
|
||||
#define CPU_FEATURE_APIC (1 << 9)
|
||||
#define CPU_FEATURE_SEP (1 << 11)
|
||||
#define CPU_FEATURE_FMA (1 << 12)
|
||||
#define CPU_FEATURE_PGE (1 << 13)
|
||||
#define CPU_FEATURE_PAT (1 << 16)
|
||||
#define CPU_FEATURE_PSE36 (1 << 17)
|
||||
|
@ -61,19 +60,26 @@ extern "C" {
|
|||
#define CPU_FEATURE_SSE (1 << 25)
|
||||
#define CPU_FEATURE_SSE2 (1 << 26)
|
||||
|
||||
// feature list 2
|
||||
#define CPU_FEATURE_X2APIC (1 << 21)
|
||||
#define CPU_FEATURE_AVX (1 << 28)
|
||||
// feature list 0x00000001 (ecx)
|
||||
#define CPU_FEATURE_SSE3 (1 << 9)
|
||||
#define CPU_FEATURE_FMA (1 << 12)
|
||||
#define CPU_FEATURE_SSE4_1 (1 << 19)
|
||||
#define CPU_FEATURE_SSE4_2 (1 << 20)
|
||||
#define CPU_FEATURE_X2APIC (1 << 21)
|
||||
#define CPU_FEATURE_MOVBE (1 << 22)
|
||||
#define CPU_FEATURE_XSAVE (1 << 26)
|
||||
#define CPU_FEATURE_OSXSAVE (1 << 27)
|
||||
#define CPU_FEATURE_AVX (1 << 28)
|
||||
#define CPU_FEATURE_HYPERVISOR (1 << 31)
|
||||
|
||||
// CPUID.80000001H:EDX feature list
|
||||
#define CPU_FEATURE_SYSCALL (1 << 11)
|
||||
#define CPU_FEATURE_NX (1 << 20)
|
||||
#define CPU_FEATURE_1GBHP (1 << 26)
|
||||
#define CPU_FEATURE_LM (1 << 29)
|
||||
#define CPU_FEATURE_NX (1 << 20)
|
||||
#define CPU_FEATURE_1GBHP (1 << 26)
|
||||
#define CPU_FEATURE_LM (1 << 29)
|
||||
|
||||
// feature list 4
|
||||
#define CPU_FEATURE_AVX2 (1 << 5)
|
||||
// feature list 0x00000007:0
|
||||
#define CPU_FEATURE_AVX2 (1 << 5)
|
||||
|
||||
// x86 control registers
|
||||
|
||||
|
@ -212,14 +218,34 @@ inline static uint32_t has_sep(void) {
|
|||
return (cpu_info.feature1 & CPU_FEATURE_SEP);
|
||||
}
|
||||
|
||||
inline static uint32_t has_movbe(void) {
|
||||
return (cpu_info.feature2 & CPU_FEATURE_MOVBE);
|
||||
}
|
||||
|
||||
inline static uint32_t has_fma(void) {
|
||||
return (cpu_info.feature1 & CPU_FEATURE_FMA);
|
||||
return (cpu_info.feature2 & CPU_FEATURE_FMA);
|
||||
}
|
||||
|
||||
inline static uint32_t has_sse3(void) {
|
||||
return (cpu_info.feature2 & CPU_FEATURE_SSE3);
|
||||
}
|
||||
|
||||
inline static uint32_t has_sse4_1(void) {
|
||||
return (cpu_info.feature2 & CPU_FEATURE_SSE4_1);
|
||||
}
|
||||
|
||||
inline static uint32_t has_sse4_2(void) {
|
||||
return (cpu_info.feature2 & CPU_FEATURE_SSE4_2);
|
||||
}
|
||||
|
||||
inline static uint32_t has_x2apic(void) {
|
||||
return (cpu_info.feature2 & CPU_FEATURE_X2APIC);
|
||||
}
|
||||
|
||||
inline static uint32_t has_xsave(void) {
|
||||
return (cpu_info.feature2 & CPU_FEATURE_XSAVE);
|
||||
}
|
||||
|
||||
inline static uint32_t has_avx(void) {
|
||||
return (cpu_info.feature2 & CPU_FEATURE_AVX);
|
||||
}
|
||||
|
@ -414,6 +440,35 @@ extern func_memory_barrier rmb;
|
|||
/// Force strict CPU ordering, serializes store operations.
|
||||
extern func_memory_barrier wmb;
|
||||
|
||||
/** @brief Get Extended Control Register
|
||||
*
|
||||
* Reads the contents of the extended control register (XCR) specified
|
||||
* in the ECX register.
|
||||
*/
|
||||
static inline uint64_t xgetbv(uint32_t index)
|
||||
{
|
||||
uint32_t edx, eax;
|
||||
|
||||
asm volatile ("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
|
||||
|
||||
return (uint64_t) eax | ((uint64_t) edx << 32ULL);
|
||||
}
|
||||
|
||||
/** @brief Set Extended Control Register
|
||||
*
|
||||
* Writes a 64-bit value into the extended control register (XCR) specified
|
||||
* in the ECX register.
|
||||
*/
|
||||
static inline void xsetbv(uint32_t index, uint64_t value)
|
||||
{
|
||||
uint32_t edx, eax;
|
||||
|
||||
edx = (uint32_t) (value >> 32ULL);
|
||||
eax = (uint32_t) value;
|
||||
|
||||
asm volatile ("xsetbv" :: "a"(eax), "c"(index), "d"(edx));
|
||||
}
|
||||
|
||||
/** @brief Read out CPU ID
|
||||
*
|
||||
* The cpuid asm-instruction does fill some information into registers and
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
* @file arch/x86/include/asm/tasks_types.h
|
||||
* @brief Task related structure definitions
|
||||
*
|
||||
* This file contains the task_t structure definition
|
||||
* This file contains the task_t structure definition
|
||||
* and task state define constants
|
||||
*/
|
||||
|
||||
|
@ -76,9 +76,22 @@ typedef struct i387_fxsave_struct {
|
|||
};
|
||||
} i387_fxsave_t __attribute__ ((aligned (16)));
|
||||
|
||||
typedef struct {
|
||||
uint64_t xstate_bv;
|
||||
uint64_t xcomp_bv;
|
||||
uint64_t reserved[6];
|
||||
} xsave_header_t;
|
||||
|
||||
typedef struct {
|
||||
i387_fxsave_t fxsave;
|
||||
xsave_header_t hdr;
|
||||
uint32_t ymmh[64];
|
||||
} xsave_t;
|
||||
|
||||
union fpu_state {
|
||||
i387_fsave_t fsave;
|
||||
i387_fxsave_t fxsave;
|
||||
xsave_t xsave;
|
||||
};
|
||||
|
||||
typedef void (*handle_fpu_state)(union fpu_state* state);
|
||||
|
|
|
@ -76,7 +76,7 @@ handle_fpu_state fpu_init = default_fpu_init;
|
|||
|
||||
static void save_fpu_state_fxsr(union fpu_state* state)
|
||||
{
|
||||
asm volatile ("fxsave %0; fnclex" : "=m"((*state).fxsave) :: "memory");
|
||||
asm volatile ("fxsave %0; fnclex" : "=m"(state->fxsave) :: "memory");
|
||||
}
|
||||
|
||||
static void restore_fpu_state_fxsr(union fpu_state* state)
|
||||
|
@ -94,6 +94,30 @@ static void fpu_init_fxsr(union fpu_state* fpu)
|
|||
fx->mxcsr = 0x1f80;
|
||||
}
|
||||
|
||||
static void save_fpu_state_xsave(union fpu_state* state)
|
||||
{
|
||||
uint32_t eax = 1, edx = 1;
|
||||
|
||||
asm volatile ("xsave %0" : "=m"(state->xsave) : "a"(eax), "d"(edx) : "memory");
|
||||
}
|
||||
|
||||
static void restore_fpu_state_xsave(union fpu_state* state)
|
||||
{
|
||||
uint32_t eax = 1, edx = 1;
|
||||
|
||||
asm volatile ("xrstor %0" :: "m"(state->xsave), "a"(eax), "d"(edx));
|
||||
}
|
||||
|
||||
static void fpu_init_xsave(union fpu_state* fpu)
|
||||
{
|
||||
xsave_t* xs = &fpu->xsave;
|
||||
|
||||
memset(xs, 0x00, sizeof(xsave_t));
|
||||
xs->fxsave.cwd = 0x37f;
|
||||
if (BUILTIN_EXPECT(has_sse(), 1))
|
||||
xs->fxsave.mxcsr = 0x1f80;
|
||||
}
|
||||
|
||||
uint32_t detect_cpu_frequency(void)
|
||||
{
|
||||
uint64_t start, end, diff;
|
||||
|
@ -123,6 +147,7 @@ uint32_t detect_cpu_frequency(void)
|
|||
}
|
||||
|
||||
int cpu_detection(void) {
|
||||
uint64_t xcr0;
|
||||
uint32_t a=0, b=0, c=0, d=0;
|
||||
uint32_t family, model, stepping;
|
||||
size_t cr4;
|
||||
|
@ -141,7 +166,7 @@ int cpu_detection(void) {
|
|||
cpuid(0x80000001, &a, &b, &c, &cpu_info.feature3);
|
||||
cpuid(0x80000008, &cpu_info.addr_width, &b, &c, &d);
|
||||
|
||||
a = c = d = 0;
|
||||
a = b = c = d = 0;
|
||||
cpuid(7, &a, &cpu_info.feature4, &c, &d);
|
||||
}
|
||||
|
||||
|
@ -167,10 +192,25 @@ int cpu_detection(void) {
|
|||
cr4 |= CR4_OSFXSR; // set the OSFXSR bit
|
||||
if (has_sse())
|
||||
cr4 |= CR4_OSXMMEXCPT; // set the OSXMMEXCPT bit
|
||||
if (has_xsave())
|
||||
cr4 |= CR4_OSXSAVE;
|
||||
if (has_pge())
|
||||
cr4 |= CR4_PGE;
|
||||
write_cr4(cr4);
|
||||
|
||||
if (has_xsave())
|
||||
{
|
||||
xcr0 = xgetbv(0);
|
||||
if (has_fpu())
|
||||
xcr0 |= 0x1;
|
||||
if (has_sse())
|
||||
xcr0 |= 0x2;
|
||||
if (has_avx())
|
||||
xcr0 |= 0x3;
|
||||
//kprintf("Set XCR to 0x%llx\n", xcr0);
|
||||
xsetbv(0, xcr0);
|
||||
}
|
||||
|
||||
if (cpu_info.feature3 & CPU_FEATURE_SYSCALL) {
|
||||
wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_LMA | EFER_SCE);
|
||||
wrmsr(MSR_STAR, (0x1BULL << 48) | (0x08ULL << 32));
|
||||
|
@ -189,22 +229,34 @@ int cpu_detection(void) {
|
|||
mb = mfence;
|
||||
}
|
||||
|
||||
if (first_time && has_avx())
|
||||
kprintf("The CPU owns the Advanced Vector Extensions (AVX). However, HermitCore doesn't support AVX!\n");
|
||||
|
||||
if (first_time && has_avx2())
|
||||
kprintf("The CPU owns the Advanced Vector Extensions (AVX2). However, HermitCore doesn't support AVX2!\n");
|
||||
|
||||
if (first_time && has_fma())
|
||||
kprintf("The CPU supports Fused Multiply-Add!\n");
|
||||
|
||||
if (has_fpu()) {
|
||||
if (first_time)
|
||||
kputs("Found and initialized FPU!\n");
|
||||
asm volatile ("fninit");
|
||||
}
|
||||
|
||||
if (first_time && has_fxsr()) {
|
||||
if (first_time) {
|
||||
kprintf("CPU features: %s%s%s%s%s%s%s%s%s%s%s%s%s\n",
|
||||
has_sse() ? "SSE " : "",
|
||||
has_sse2() ? "SSE2 " : "",
|
||||
has_sse3() ? "SSE3 " : "",
|
||||
has_sse4_1() ? "SSE4.1 " : "",
|
||||
has_sse4_2() ? "SSE4.2 " : "",
|
||||
has_avx() ? "AVX " : "",
|
||||
has_avx2() ? "AVX2 " : "",
|
||||
has_fma() ? "FMA " : "",
|
||||
has_movbe() ? "MOVBE " : "",
|
||||
has_x2apic() ? "X2APIC " : "",
|
||||
has_fpu() ? "FPU " : "",
|
||||
has_fxsr() ? "FXSR " : "",
|
||||
has_xsave() ? "XSAVE " : "");
|
||||
}
|
||||
|
||||
if (first_time && has_xsave()) {
|
||||
save_fpu_state = save_fpu_state_xsave;
|
||||
restore_fpu_state = restore_fpu_state_xsave;
|
||||
fpu_init = fpu_init_xsave;
|
||||
} else if (first_time && has_fxsr()) {
|
||||
save_fpu_state = save_fpu_state_fxsr;
|
||||
restore_fpu_state = restore_fpu_state_fxsr;
|
||||
fpu_init = fpu_init_fxsr;
|
||||
|
|
Loading…
Add table
Reference in a new issue