diff --git a/include/villas/utils.h b/include/villas/utils.h index 6664b2232..82b7cabb7 100644 --- a/include/villas/utils.h +++ b/include/villas/utils.h @@ -204,11 +204,11 @@ void printb(void *mem, size_t len); void printdw(void *mem, size_t len); /** Get CPU timestep counter */ -__attribute__((always_inline)) static inline uint64_t rdtscp() +__attribute__((always_inline)) static inline uint64_t rdtsc() { uint64_t tsc; - __asm__ ("rdtscp;" + __asm__ ("rdtsc;" "shl $32, %%rdx;" "or %%rdx,%%rax" : "=a" (tsc) diff --git a/lib/utils.c b/lib/utils.c index 66512c3bd..eff72a954 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -370,12 +370,13 @@ void rdtsc_sleep(uint64_t nanosecs, uint64_t start) { uint64_t cycles; + /** @todo Replace the hard coded CPU clock frequency */ cycles = (double) nanosecs / (1e9 / 3392389000); if (start == 0) - start = rdtscp(); + start = rdtsc(); do { __asm__("nop"); - } while (rdtscp() - start < cycles); + } while (rdtsc() - start < cycles); } \ No newline at end of file diff --git a/src/fpga-bench-overruns.c b/src/fpga-bench-overruns.c index f161c7696..73346b5e9 100644 --- a/src/fpga-bench-overruns.c +++ b/src/fpga-bench-overruns.c @@ -115,9 +115,9 @@ int fpga_benchmark_overruns(struct fpga *f) for (int i = 0; i < runs + BENCH_WARMUP; i++) { dma_read(dm, mem.base_phys, 0x200); - start = rdtscp(); + start = rdtsc(); lapack_workload(p, A); - stop = rdtscp(); + stop = rdtsc(); dma_read_complete(dm, NULL, NULL); diff --git a/src/fpga-bench.c b/src/fpga-bench.c index 2ab2523a0..1de87f1f3 100644 --- a/src/fpga-bench.c +++ b/src/fpga-bench.c @@ -108,7 +108,7 @@ int fpga_benchmark_jitter(struct fpga *f) XTmrCtr_SetResetValue(xtmr, 0, period * FPGA_AXI_HZ); XTmrCtr_Start(xtmr, 0); - uint64_t end, start = rdtscp(); + uint64_t end, start = rdtsc(); for (int i = 0; i < runs; i++) { uint64_t cnt = intc_wait(f->intc, tmr->irq); if (cnt != 1) @@ -117,7 +117,7 @@ int fpga_benchmark_jitter(struct fpga *f) /* Ackowledge IRQ */ XTmrCtr_WriteReg((uintptr_t) f->map + tmr->baseaddr, 0, XTC_TCSR_OFFSET, XTmrCtr_ReadReg((uintptr_t) f->map + tmr->baseaddr, 0, XTC_TCSR_OFFSET)); - end = rdtscp(); + end = rdtsc(); hist[i] = end - start; start = end; } @@ -157,11 +157,11 @@ int fpga_benchmark_latency(struct fpga *f) error("Failed to enable interrupts"); for (int i = 0; i < runs; i++) { - start = rdtscp(); + start = rdtsc(); XIntc_Out32((uintptr_t) f->map + f->intc->baseaddr + XIN_ISR_OFFSET, 0x100); intc_wait(f->intc, 8); - end = rdtscp(); + end = rdtsc(); hist[i] = end - start; } @@ -239,7 +239,7 @@ int fpga_benchmark_datamover(struct fpga *f) uint64_t runs = BENCH_RUNS >> exp; for (int i = 0; i < runs + BENCH_WARMUP; i++) { - start = rdtscp(); + start = rdtsc(); #if BENCH_DM == 1 ssize_t ret; @@ -255,7 +255,7 @@ int fpga_benchmark_datamover(struct fpga *f) if (ret) error("DMA ping pong failed"); #endif - stop = rdtscp(); + stop = rdtsc(); if (memcmp(src.base_virt, dst.base_virt, len)) warn("Compare failed"); @@ -304,13 +304,13 @@ int fpga_benchmark_memcpy(struct fpga *f) uint64_t runs = (BENCH_RUNS << 2) >> exp; for (int i = 0; i < runs + BENCH_WARMUP; i++) { - start = rdtscp(); + start = rdtsc(); for (int j = 0; j < len / 4; j++) // mapi[j] = j; // write dummy += mapi[j]; // read - end = rdtscp(); + end = rdtsc(); if (i > BENCH_WARMUP) total += end - start;