mirror of
https://github.com/hermitcore/libhermit.git
synced 2025-03-09 00:00:03 +01:00
318 lines
8.5 KiB
C
318 lines
8.5 KiB
C
/*
|
|
* =====================================================================================
|
|
*
|
|
* Filename: rdtsc.c
|
|
*
|
|
* Description:
|
|
*
|
|
* Version: 1.0
|
|
* Created: 31.01.2011 10:56:58
|
|
* Revision: none
|
|
* Compiler: gcc
|
|
*
|
|
* Author: Georg Wassen (gw) (),
|
|
* Company:
|
|
*
|
|
* =====================================================================================
|
|
*/
|
|
|
|
#define _SVID_SOURCE
|
|
#define _XOPEN_SOURCE 500
|
|
#include <sys/time.h>
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <errno.h>
|
|
#include <ctype.h>
|
|
#include "rdtsc.h"
|
|
|
|
#ifdef __hermit__
|
|
extern unsigned int get_cpufreq();
|
|
#endif
|
|
|
|
static inline void cpuid(unsigned func, unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) {
|
|
__asm__ volatile ("cpuid" : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) : "a"(func));
|
|
}
|
|
|
|
static inline uint32_t cpuid_edx(uint32_t code) {
|
|
uint32_t eax, ebx, ecx, edx;
|
|
|
|
cpuid(code, &eax, &ebx, &ecx, &edx);
|
|
|
|
return edx;
|
|
}
|
|
|
|
static uint64_t tps = 0;
|
|
|
|
#if 0
|
|
/*
|
|
* === FUNCTION ======================================================================
|
|
* Name: second()
|
|
* Description: returns a double representation of gettimeofday (seconds.microseconds)
|
|
* =====================================================================================
|
|
*/
|
|
static inline double second()
|
|
{
|
|
struct timeval tv;
|
|
gettimeofday(&tv, 0);
|
|
return tv.tv_sec + 1e-6*tv.tv_usec;
|
|
}
|
|
|
|
/*
|
|
* === FUNCTION ======================================================================
|
|
* Name: selectsleep(us)
|
|
* Description: calls select() to sleep (wait) the given microseconds
|
|
* =====================================================================================
|
|
*/
|
|
static inline void selectsleep(unsigned us)
|
|
{
|
|
struct timeval tv;
|
|
tv.tv_sec = 0;
|
|
tv.tv_usec = us;
|
|
select(0,0,0,0,&tv); // portable way to sleep with subsecond precision
|
|
}
|
|
|
|
/*
|
|
* === FUNCTION ======================================================================
|
|
* Name: rdtsc_ticks_per_sec()
|
|
* Description: uses least squares regressen to measure the frequency of the TSC
|
|
* =====================================================================================
|
|
*/
|
|
uint64_t rdtsc_ticks_per_sec(void)
|
|
{
|
|
double sumx = 0, sumy = 0;
|
|
double sumxx = 0, sumxy = 0;
|
|
double slope;
|
|
// least squared linear regression taken from mcert/misc/realfeed/realfeel.c
|
|
|
|
const unsigned n = 30;
|
|
unsigned i;
|
|
|
|
for (i=0; i<n; i++) {
|
|
double breal, real, ticks;
|
|
uint64_t bticks = 0, aticks = 0;
|
|
rdtsc(&bticks);
|
|
breal = second();
|
|
|
|
selectsleep((unsigned)(10000 + drand48() * 20000));
|
|
|
|
rdtsc(&aticks);
|
|
real = second() -breal;
|
|
ticks = (double)(aticks - bticks);
|
|
|
|
sumx += real;
|
|
sumxx += real * real;
|
|
sumxy += real * ticks;
|
|
sumy += ticks;
|
|
}
|
|
slope = ((sumxy - (sumx*sumy) / n) / (sumxx - (sumx*sumx) / n));
|
|
|
|
tps = (uint64_t)slope;
|
|
return tps;
|
|
}
|
|
#endif
|
|
uint64_t rdtsc_ticks_per_sec(void)
|
|
{
|
|
#ifdef __hermit__
|
|
uint64_t tps = (uint64_t) get_cpufreq() * 1000000ULL;
|
|
|
|
return tps;
|
|
#else
|
|
uint64_t t1, t2, t3, t4;
|
|
struct timeval tv1, tv2;
|
|
|
|
uint64_t diff_tsc, diff_usec;
|
|
|
|
rdtsc(&t1);
|
|
gettimeofday(&tv1, 0);
|
|
rdtsc(&t2);
|
|
|
|
usleep(500000); // 0.5 sec
|
|
|
|
rdtsc(&t3);
|
|
gettimeofday(&tv2, 0);
|
|
rdtsc(&t4);
|
|
|
|
//printf("t2-t1 : %llu\n", (unsigned long long)t2-t1);
|
|
//printf("t4-t3 : %llu\n", (unsigned long long)t4-t3);
|
|
|
|
t1 = (t1+t2)/2;
|
|
t2 = (t3+t4)/2;
|
|
|
|
diff_tsc = t2-t1;
|
|
|
|
//printf("diff tsc: %llu\n", (unsigned long long)diff_tsc);
|
|
|
|
diff_usec = (tv2.tv_sec - tv1.tv_sec) * 1000000;
|
|
if (tv2.tv_usec > tv1.tv_usec)
|
|
diff_usec += tv2.tv_usec - tv1.tv_usec;
|
|
else
|
|
diff_usec += tv1.tv_usec - tv2.tv_usec;
|
|
|
|
//printf("diff usec: %llu\n",
|
|
// (unsigned long long)diff_usec);
|
|
|
|
return (diff_tsc*1000000) / diff_usec;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* === FUNCTION ======================================================================
|
|
* Name: rdtsc_max_freq(id)
|
|
* Description: reads the maximum frequency of given CPU-ID from /sys/.../cpuN/cpufreq
|
|
* Changes: WASSEN, 24.5.2011: taken from hourglass via irqlab
|
|
* =====================================================================================
|
|
*/
|
|
uint64_t rdtsc_max_freq(int id)
|
|
{
|
|
uint64_t mhz = -1;
|
|
char fname[BUFSIZ];
|
|
char processor[BUFSIZ];
|
|
char *buffer, *loc;
|
|
FILE *fp;
|
|
double tmhz;
|
|
int ret;
|
|
|
|
sprintf(fname, "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", id);
|
|
|
|
if (NULL == (fp = fopen(fname, "r"))) {
|
|
fprintf(stderr, "Can't open <%s>.\n", fname);
|
|
fprintf(stderr, "Trying </proc/cpuinfo> (not as accurate)\n");
|
|
|
|
if(NULL == (fp = fopen("/proc/cpuinfo","r"))) {
|
|
fprintf(stderr, "Can't open </proc/cpuinfo>.\n");
|
|
return -1;
|
|
}
|
|
|
|
|
|
buffer = (void *) malloc(sizeof(char) * 1024 * 512);
|
|
ret = fread(buffer, sizeof(char), 1024 * 512, fp);
|
|
if (ret == 0) {
|
|
fprintf(stderr, "fread() returned 0: %s\n", strerror(errno));
|
|
return -1;
|
|
}
|
|
|
|
sprintf(processor, "processor\t: %d", id);
|
|
if(NULL == (loc = strstr(buffer, processor))) {
|
|
fprintf(stderr, "Unable to parse /proc/cpuinfo\n");
|
|
return -1;
|
|
}
|
|
if(NULL == (loc = strstr(loc, "cpu MHz"))) {
|
|
fprintf(stderr, "Unable to parse /proc/cpuinfo\n");
|
|
return -1;
|
|
}
|
|
loc += strlen("cpu MHz");
|
|
while(!isdigit(*loc))
|
|
loc++;
|
|
loc--;
|
|
sscanf(loc, "%lf", &tmhz);
|
|
mhz = (uint64_t)(tmhz);
|
|
mhz*=1000;
|
|
free(buffer);
|
|
} else {
|
|
ret = fscanf(fp, "%lld", (unsigned long long*)&mhz);
|
|
if (ret == 0) {
|
|
fprintf(stderr, "fscanf() returned 0: %s\n", strerror(errno));
|
|
return -1;
|
|
}
|
|
fclose(fp);
|
|
}
|
|
|
|
return mhz*1000;
|
|
}
|
|
|
|
/*
|
|
* === FUNCTION ======================================================================
|
|
* Name: rdtsc_loop(ticks)
|
|
* Description: waits actively for given TSC ticks
|
|
* =====================================================================================
|
|
*/
|
|
void rdtsc_loop(uint64_t ticks)
|
|
{
|
|
uint64_t t_now = 0, t_end;
|
|
rdtsc(&t_now);
|
|
t_end = t_now + ticks;
|
|
while (t_now < t_end) {
|
|
rdtsc(&t_now);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* === FUNCTION ======================================================================
|
|
* Name: rdtsc_loop_sec(ticks)
|
|
* Description: waits actively for given seconds
|
|
* ATTN: needs rdtsc_ticks_per_sec() unless that function was previously
|
|
* called (this function MIGHT use syscalls!)
|
|
* =====================================================================================
|
|
*/
|
|
void rdtsc_loop_sec(unsigned seconds)
|
|
{
|
|
uint64_t t_now = 0, t_end;
|
|
rdtsc(&t_now);
|
|
if (tps == 0) rdtsc_ticks_per_sec();
|
|
t_end = t_now + (uint64_t)seconds * tps;
|
|
while (t_now < t_end) {
|
|
rdtsc(&t_now);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* test if TSC is invariant (return value 1)
|
|
*/
|
|
int rdtsc_is_invariant(void) {
|
|
if (cpuid_edx(0x80000007) & (1 << 8)) { // TSC is invariant
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* measure overhead of *not* serialized rdtsc() (SHL, MOV, OR)
|
|
*/
|
|
uint64_t rdtsc_get_overhead(const uint64_t iterations) {
|
|
uint64_t c;
|
|
uint64_t tsc_overhead_notserial = 0;
|
|
uint64_t tsc_start;
|
|
uint64_t tsc_end;
|
|
|
|
if (iterations == 0) {
|
|
return 0;
|
|
}
|
|
|
|
for (c = 0; c < iterations; c++) {
|
|
#if ! __MIC__
|
|
__asm__ volatile("lfence");
|
|
#else
|
|
__asm__ volatile("lock; add $0, 0(%%rsp)" ::: "memory");
|
|
#endif
|
|
rdtsc(&tsc_start);
|
|
rdtsc(&tsc_end);
|
|
tsc_overhead_notserial += tsc_end - tsc_start;
|
|
}
|
|
|
|
return tsc_overhead_notserial / iterations;
|
|
}
|
|
|
|
/*
|
|
* measure overhead of serialized rdtsc_serialized() (LFENCE, SHL, MOV, OR, LFENCE)
|
|
*/
|
|
uint64_t rdtsc_get_overhead_serialized(const uint64_t iterations) {
|
|
uint64_t c;
|
|
uint64_t tsc_overhead_serial = 0;
|
|
uint64_t tsc_start;
|
|
uint64_t tsc_end;
|
|
|
|
if (iterations == 0) {
|
|
return 0;
|
|
}
|
|
|
|
for (c = 0; c < iterations; c++) {
|
|
rdtsc_serialized(&tsc_start);
|
|
rdtsc_serialized(&tsc_end);
|
|
tsc_overhead_serial += tsc_end - tsc_start;
|
|
}
|
|
|
|
return tsc_overhead_serial / iterations;
|
|
}
|