1
0
Fork 0
mirror of https://github.com/hermitcore/libhermit.git synced 2025-03-09 00:00:03 +01:00

revise APIC code, add rdtscp support

- fix bug by a TLB shoot down
- fix bug in x2APIC code, read APIC ID correctly
This commit is contained in:
Stefan Lankes 2015-08-13 22:26:04 +02:00
parent ea68816194
commit 919d1d14aa
10 changed files with 176 additions and 95 deletions

View file

@ -309,7 +309,7 @@ static inline void clts(void)
/** @brief Read out time stamp counter
*
* The rdtsc asm command puts a 64 bit time stamp value
* The rdtsc instruction puts a 64 bit time stamp value
* into EDX:EAX.
*
* @return The 64 bit time stamp value
@ -317,7 +317,28 @@ static inline void clts(void)
inline static uint64_t rdtsc(void)
{
uint64_t lo, hi;
asm volatile ("rdtsc" : "=a"(lo), "=d"(hi) );
return (hi << 32 | lo);
}
/** @brief Read time stamp counter and processor id
*
* The rdtscp instruction puts a 64 bit trime stamp value
* into EDX:EAX and the processor id into ECX.
*
* @return The 64 bit time stamp value
*/
inline static unsigned long long rdtscp(uint32_t* cpu_id)
{
uint64_t lo, hi;
uint32_t id;
asm volatile ("rdtscp" : "=a"(lo), "=c"(id), "=d"(hi) :: "memory");
if (cpu_id)
*cpu_id = id;
return (hi << 32 | lo);
}
@ -347,8 +368,8 @@ inline static uint64_t rdmsr(uint32_t msr) {
*/
inline static void wrmsr(uint32_t msr, uint64_t value)
{
uint32_t low = value & 0xFFFFFFFF;
uint32_t high = value >> 32;
uint32_t low = (uint32_t) (value & 0xFFFFFFFFULL);
uint32_t high = (uint32_t) (value >> 32);
asm volatile("wrmsr" :: "a"(low), "c"(msr), "d"(high));
}
@ -654,8 +675,6 @@ inline static int system_calibration(void)
detect_cpu_frequency();
apic_calibration();
//kprintf("CR0 of core %u: 0x%x\n", apic_cpu_id(), read_cr0());
return 0;
}

View file

@ -165,10 +165,6 @@ struct state {
uint64_t ss;
};
uint32_t apic_cpu_id(void);
#define smp_id apic_cpu_id
#ifdef __cplusplus
}
#endif

View file

@ -182,19 +182,19 @@ void apic_eoi(size_t int_no)
uint32_t apic_cpu_id(void)
{
if (apic_is_enabled())
return ((lapic_read(APIC_ID)) >> 24);
int32_t id = -1;
if (boot_processor >= 0)
if (apic_is_enabled())
id = lapic_read(APIC_ID);
if ((id >= 0) && has_x2apic())
return id;
else if (id >= 0)
return (id >> 24);
else if (boot_processor >= 0)
return boot_processor;
return 0;
}
static inline void apic_set_cpu_id(uint32_t id)
{
if (apic_is_enabled())
lapic_write(APIC_ID, id << 24);
else
return 0;
}
static inline uint32_t apic_version(void)
@ -568,17 +568,18 @@ no_mp:
extern int smp_main(void);
extern void gdt_flush(void);
extern int set_idle_task(void);
extern atomic_int32_t current_boot_id;
#if MAX_CORES > 1
int smp_start(void)
{
if (lapic && has_x2apic()) // enable x2APIC support
wrmsr(0x1B, 0xFEE00C00);
if (has_x2apic()) // enable x2APIC support
wrmsr(MSR_APIC_BASE, lapic | 0xD00);
// reset APIC and set id
lapic_reset();
kprintf("Processor %d is entering its idle task\n", apic_cpu_id());
kprintf("Processor %d (local id %d) is entering its idle task\n", apic_cpu_id(), atomic_int32_read(&current_boot_id));
// use the same gdt like the boot processors
gdt_flush();
@ -595,8 +596,8 @@ int smp_start(void)
// enable additional cpu features
cpu_detection();
//kprintf("CR0 of core %u: 0x%x\n", apic_cpu_id(), read_cr0());
online[apic_cpu_id()] = 1;
//kprintf("CR0 of core %u: 0x%x\n", atomic_int32_read(&current_boot_id), read_cr0());
online[atomic_int32_read(&current_boot_id)] = 1;
set_idle_task();
@ -616,46 +617,62 @@ static inline void set_ipi_dest(uint32_t cpu_id) {
int ipi_tlb_flush(void)
{
uint32_t id = smp_id();
uint32_t id = CORE_ID;
uint32_t flags;
uint32_t i, j;
uint32_t j;
uint64_t i;
if (atomic_int32_read(&cpu_online) == 1)
return 0;
if (lapic_read(APIC_ICR1) & APIC_ICR_BUSY) {
kputs("ERROR: previous send not complete");
return -EIO;
if (BUILTIN_EXPECT(has_x2apic(), 1)) {
flags = irq_nested_disable();
for(i=0; i<MAX_APIC_CORES; i++)
{
if (i == id)
continue;
if (!online[i])
continue;
//kprintf("send IPI to %zd\n", i);
wrmsr(0x830, (i << 32)|APIC_INT_ASSERT|APIC_DM_FIXED|124);
}
irq_nested_enable(flags);
} else {
if (lapic_read(APIC_ICR1) & APIC_ICR_BUSY) {
kputs("ERROR: previous send not complete");
return -EIO;
}
flags = irq_nested_disable();
for(i=0; i<MAX_APIC_CORES; i++)
{
if (i == id)
continue;
if (!online[i])
continue;
//kprintf("send IPI to %zd\n", i);
set_ipi_dest(i);
lapic_write(APIC_ICR1, APIC_INT_ASSERT|APIC_DM_FIXED|124);
j = 0;
while((lapic_read(APIC_ICR1) & APIC_ICR_BUSY) && (j < 1000))
j++; // wait for it to finish, give up eventualy tho
}
irq_nested_enable(flags);
}
flags = irq_nested_disable();
for(i=0; i<MAX_APIC_CORES; i++)
{
if (i == id)
continue;
if (!online[i])
continue;
//kprintf("send IPI to %i\n", i);
set_ipi_dest(i);
lapic_write(APIC_ICR1, APIC_INT_ASSERT|APIC_DM_FIXED|124);
j = 0;
while((lapic_read(APIC_ICR1) & APIC_ICR_BUSY) && (j < 1000))
j++; // wait for it to finish, give up eventualy tho
}
irq_nested_enable(flags);
return 0;
}
static void apic_tlb_handler(struct state *s)
{
uint32_t val = read_cr3();
size_t val;
val = read_cr3();
if (val)
write_cr3(val);
kputs("Flush TLB!\n");
}
#endif

View file

@ -52,13 +52,17 @@ align 4
global cpu_freq
global boot_processor
global cpu_online
global possible_cpus
global timer_ticks
global current_boot_id
base dq 0
limit dq 0
cpu_freq dd 0
boot_processor dd -1
cpu_online dd 0
possible_cpus dd 0
timer_ticks dq 0
current_boot_id dd 0
SECTION .text
align 4
@ -149,11 +153,8 @@ L1:
%if MAX_CORES > 1
Lsmp_main:
; dirty to hack to determine the cpu id
; with a temporary stack
mov rsp, tmp_stack-16
extern apic_cpu_id
call apic_cpu_id
xor rax, rax
mov eax, DWORD [current_boot_id]
; set default stack pointer
imul rax, KERNEL_STACK_SIZE
@ -165,12 +166,9 @@ Lsmp_main:
extern smp_start
call smp_start
jmp $
DQ 0, 0, 0, 0
DQ 0, 0, 0, 0
tmp_stack:
%endif
ALIGN 4
global gdt_flush
extern gp

View file

@ -42,6 +42,7 @@ extern const void percore_end;
extern void* Lpatch0;
extern void* Lpatch1;
extern void* Lpatch2;
extern atomic_int32_t current_boot_id;
extern void isrsyscall(void);
@ -268,7 +269,7 @@ int cpu_detection(void) {
cr4 &= ~CR4_TSD; // => every privilege level is able to use rdtsc
write_cr4(cr4);
if (has_fsgsbase())
if (first_time && has_fsgsbase())
{
readfs = rdfsbase;
readgs = rdgsbase;
@ -310,18 +311,16 @@ int cpu_detection(void) {
writefs(0);
#if MAX_CORES > 1
writegs(apic_cpu_id() * ((size_t) &percore_end0 - (size_t) &percore_start));
writegs(atomic_int32_read(&current_boot_id) * ((size_t) &percore_end0 - (size_t) &percore_start));
#else
writegs(0);
#endif
wrmsr(MSR_KERNEL_GS_BASE, 0);
kprintf("Core %d set per_core offset to 0x%x\n", apic_cpu_id(), rdmsr(MSR_GS_BASE));
kprintf("Core %d set per_core offset to 0x%x\n", atomic_int32_read(&current_boot_id), rdmsr(MSR_GS_BASE));
#if MAX_CORES > 1
/* set core id to apic_cpu_id */
set_per_core(__core_id, apic_cpu_id());
#endif
/* set core id to the current boor id */
set_per_core(__core_id, atomic_int32_read(&current_boot_id));
if (first_time && has_sse())
wmb = sfence;
@ -420,14 +419,27 @@ uint32_t get_cpu_frequency(void)
void udelay(uint32_t usecs)
{
uint64_t diff, end, start = rdtsc();
uint64_t deadline = get_cpu_frequency() * usecs;
if (BUILTIN_EXPECT(has_rdtscp(), 1)) {
uint64_t diff, end, start = rdtscp(NULL);
uint64_t deadline = get_cpu_frequency() * usecs;
do {
mb();
end = rdtsc();
diff = end > start ? end - start : start - end;
if ((diff < deadline) && (deadline - diff > 50000))
check_workqueues();
} while(diff < deadline);
do {
end = rdtscp(NULL);
rmb();
diff = end > start ? end - start : start - end;
if ((diff < deadline) && (deadline - diff > 50000))
check_workqueues();
} while(diff < deadline);
} else {
uint64_t diff, end, start = rdtsc();
uint64_t deadline = get_cpu_frequency() * usecs;
do {
mb();
end = rdtsc();
diff = end > start ? end - start : start - end;
if ((diff < deadline) && (deadline - diff > 50000))
check_workqueues();
} while(diff < deadline);
}
}

View file

@ -49,8 +49,11 @@ static uint64_t last_rdtsc = 0;
void start_tickless(void)
{
use_tickless = 1;
if (BUILTIN_EXPECT(has_rdtscp(), 1))
last_rdtsc = rdtscp(NULL);
else
last_rdtsc = rdtsc();
rmb();
last_rdtsc = rdtsc();
}
void end_tickless(void)
@ -68,15 +71,28 @@ void check_ticks(void)
if (CORE_ID == boot_processor)
#endif
{
uint64_t curr_rdtsc = rdtsc();
uint64_t diff;
if (BUILTIN_EXPECT(has_rdtscp(), 1)){
uint64_t curr_rdtsc = rdtscp(NULL);
uint64_t diff;
rmb();
diff = ((curr_rdtsc - last_rdtsc) * (uint64_t)TIMER_FREQ) / (1000000ULL*(uint64_t)get_cpu_frequency());
if (diff > 0) {
timer_ticks += diff;
last_rdtsc = curr_rdtsc;
rmb();
diff = ((curr_rdtsc - last_rdtsc) * (uint64_t)TIMER_FREQ) / (1000000ULL*(uint64_t)get_cpu_frequency());
if (diff > 0) {
timer_ticks += diff;
last_rdtsc = curr_rdtsc;
rmb();
}
} else {
uint64_t curr_rdtsc = rdtsc();
uint64_t diff;
rmb();
diff = ((curr_rdtsc - last_rdtsc) * (uint64_t)TIMER_FREQ) / (1000000ULL*(uint64_t)get_cpu_frequency());
if (diff > 0) {
timer_ticks += diff;
last_rdtsc = curr_rdtsc;
rmb();
}
}
}
}
@ -93,16 +109,19 @@ uint64_t get_clock_tick(void)
*/
static void timer_handler(struct state *s)
{
/* Increment our 'tick counter' */
timer_ticks++;
if (CORE_ID == boot_processor)
/* Increment our 'tick counter' */
timer_ticks++;
#if 0
/*
* Every TIMER_FREQ clocks (approximately 1 second), we will
* display a message on the screen
*/
/*if (timer_ticks % TIMER_FREQ == 0) {
kputs("One second has passed\n");
}*/
if (timer_ticks % TIMER_FREQ == 0) {
kprintf("One second has passed %d\n", CORE_ID);
}
#endif
}
int timer_wait(unsigned int ticks)

View file

@ -34,9 +34,9 @@
#include <hermit/tasks.h>
#include <hermit/syscall.h>
#include <hermit/memory.h>
#include <hermit/spinlock.h>
#include <hermit/fs.h>
#include <asm/irq.h>
#include <asm/atomic.h>
#include <asm/page.h>
/*
@ -58,6 +58,7 @@ extern atomic_int64_t total_allocated_pages;
extern atomic_int64_t total_available_pages;
extern atomic_int32_t cpu_online;
extern atomic_int32_t possible_cpus;
static int foo(void* arg)
{
@ -94,18 +95,31 @@ static int hermit_init(void)
return 0;
}
static void print_status(void)
{
static spinlock_t status_lock = SPINLOCK_INIT;
spinlock_lock(&status_lock);
kprintf("%d CPU is now online (CR0 0x%zx, CR4 0x%zx)\n", CORE_ID, read_cr0(), read_cr4());
spinlock_unlock(&status_lock);
}
#if MAX_CORES > 1
int smp_main(void)
{
int32_t cpu = atomic_int32_inc(&cpu_online);
kprintf("%d CPUs are now online\n", cpu);
#ifdef CONFIG_TICKLESS
disable_timer_irq();
#endif
create_kernel_task(NULL, foo, "foo2", NORMAL_PRIO);
/* wait for the other cpus */
while(atomic_int32_read(&cpu_online) < atomic_int32_read(&possible_cpus))
PAUSE;
print_status();
//create_kernel_task(NULL, foo, "foo2", NORMAL_PRIO);
while(1) {
check_workqueues();
@ -160,6 +174,12 @@ int main(void)
disable_timer_irq();
#endif
/* wait for the other cpus */
while(atomic_int32_read(&cpu_online) < atomic_int32_read(&possible_cpus))
PAUSE;
print_status();
create_kernel_task(NULL, initd, NULL, NORMAL_PRIO);
while(1) {

View file

@ -44,7 +44,7 @@ extern unsigned int get_cpufreq();
inline static unsigned long long rdtscp(void)
{
unsigned long long lo, hi;
asm volatile ("rdtscp" : "=a"(lo), "=d"(hi) :: "%rcx");
asm volatile ("rdtscp; lfence" : "=a"(lo), "=d"(hi) :: "%rcx", "memory");
return (hi << 32 | lo);
}

View file

@ -421,7 +421,7 @@ extern unsigned int get_cpufreq();
inline static unsigned long long rdtscp(void)
{
unsigned long long lo, hi;
asm volatile ("rdtscp" : "=a"(lo), "=d"(hi) :: "%rcx");
asm volatile ("rdtscp; lfence" : "=a"(lo), "=d"(hi) :: "%rcx", "memory");
return (hi << 32 | lo);
}

@ -1 +1 @@
Subproject commit 598908f955e6a146b14fbda1813874f113965313
Subproject commit 2eea5f979ee40e23741efcfaf6d863dd594a822f