1
0
Fork 0
mirror of https://git.rwth-aachen.de/acs/public/villas/node/ synced 2025-03-09 00:00:00 +01:00

add missing benchmarks

This commit is contained in:
Steffen Vogel 2017-11-21 22:30:21 +01:00
parent 8e0e7a4098
commit a7c15e618c
10 changed files with 667 additions and 0 deletions

View file

@ -6,3 +6,4 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/cmake)
add_subdirectory(lib)
add_subdirectory(tests)
add_subdirectory(src)

28
fpga/src/CMakeLists.txt Normal file
View file

@ -0,0 +1,28 @@
set(SOURCES
bench-datamovers.c
bench-jitter.c
bench-latency.c
bench-memcpy.c
bench.c
fpga.c
)
add_executable(fpga ${SOURCES})
target_include_directories(fpga PUBLIC
../include/villas
../include
)
target_link_libraries(fpga PUBLIC
villas-fpga
)
find_package(LAPACK)
if(LAPACK_FOUND)
target_sources(fpga PUBLIC bench-overruns.c)
target_link_libraries(fpga PUBLIC ${LAPACK_LIBRARIES})
target_include_directories(fpga PUBLIC ${LAPACK_INCLUDE_DIRS})
target_compile_definitions(fpga PUBLIC WITH_LAPACK)
endif()

119
fpga/src/bench-datamovers.c Normal file
View file

@ -0,0 +1,119 @@
#include <villas/utils.h>
#include <villas/log.h>
#include <villas/fpga/card.h>
#include <villas/fpga/ip.h>
#include <villas/fpga/ips/dma.h>
#include <villas/fpga/ips/switch.h>
#include <villas/fpga/ips/intc.h>
#include "bench.h"
int fpga_benchmark_datamover(struct fpga_card *c)
{
int ret;
struct fpga_ip *dm;
struct dma_mem mem, src, dst;
#if BENCH_DM == 1
char *dm_name = "fifo_mm_s_0";
#elif BENCH_DM == 2
char *dm_name = "dma_0";
#elif BENCH_DM == 3
char *dm_name = "dma_1";
#else
#error "Invalid DM selected"
#endif
dm = list_lookup(&c->ips, dm_name);
if (!dm)
error("Unknown datamover");
ret = switch_connect(c->sw, dm, dm);
if (ret)
error("Failed to configure switch");
ret = intc_enable(c->intc, (1 << dm->irq) | (1 << (dm->irq + 1)), intc_flags);
if (ret)
error("Failed to enable interrupt");
/* Allocate DMA memory */
ret = dma_alloc(dm, &mem, 2 * (1 << BENCH_DM_EXP_MAX), 0);
if (ret)
error("Failed to allocate DMA memory");
ret = dma_mem_split(&mem, &src, &dst);
if (ret)
return -1;
/* Open file for results */
char fn[256];
snprintf(fn, sizeof(fn), "results/datamover_%s_%s_%s.dat", dm_name, intc_flags & INTC_POLLING ? "polling" : "irq", uts.release);
FILE *g = fopen(fn, "w");
for (int exp = BENCH_DM_EXP_MIN; exp <= BENCH_DM_EXP_MAX; exp++) {
uint64_t start, stop, total = 0, len = 1 << exp;
#if BENCH_DM == 1
if (exp > 11)
break; /* FIFO and Simple DMA are limited to 4kb */
#elif BENCH_DM == 3
if (exp >= 12)
break; /* FIFO and Simple DMA are limited to 4kb */
#endif
read_random(src.base_virt, len);
memset(dst.base_virt, 0, len);
info("Start DM bench: len=%#jx", len);
uint64_t runs = BENCH_RUNS >> exp;
for (int i = 0; i < runs + BENCH_WARMUP; i++) {
start = rdtsc();
#if BENCH_DM == 1
ssize_t ret;
ret = fifo_write(dm, src.base_virt, len);
if (ret < 0)
error("Failed write to FIFO with len = %zu", len);
ret = fifo_read(dm, dst.base_virt, dst.len);
if (ret < 0)
error("Failed read from FIFO with len = %zu", len);
#else
ret = dma_ping_pong(dm, src.base_phys, dst.base_phys, len);
if (ret)
error("DMA ping pong failed");
#endif
stop = rdtsc();
if (memcmp(src.base_virt, dst.base_virt, len))
warn("Compare failed");
if (i > BENCH_WARMUP)
total += stop - start;
}
info("exp %u avg %lu", exp, total / runs);
fprintf(g, "%lu %lu\n", len, total / runs);
}
fclose(g);
ret = switch_disconnect(c->sw, dm, dm);
if (ret)
error("Failed to configure switch");
ret = dma_free(dm, &mem);
if (ret)
error("Failed to release DMA memory");
ret = intc_disable(c->intc, (1 << dm->irq) | (1 << (dm->irq + 1)));
if (ret)
error("Failed to enable interrupt");
return 0;
}

65
fpga/src/bench-jitter.c Normal file
View file

@ -0,0 +1,65 @@
#include <villas/utils.h>
#include <villas/fpga/card.h>
#include <villas/fpga/ip.h>
#include <villas/fpga/ips/timer.h>
#include "bench.h"
int fpga_benchmark_jitter(struct fpga_card *c)
{
int ret;
struct fpga_ip *ip = list_lookup(&c->ips, "timer_0");
if (!ip || !c->intc)
return -1;
struct timer *tmr = (struct timer *) ip->_vd;
XTmrCtr *xtmr = &tmr->inst;
ret = intc_enable(c->intc, (1 << ip->irq), intc_flags);
if (ret)
error("Failed to enable interrupt");
float period = 50e-6;
int runs = 300.0 / period;
int *hist = alloc(8 << 20);
XTmrCtr_SetOptions(xtmr, 0, XTC_INT_MODE_OPTION | XTC_EXT_COMPARE_OPTION | XTC_DOWN_COUNT_OPTION | XTC_AUTO_RELOAD_OPTION);
XTmrCtr_SetResetValue(xtmr, 0, period * FPGA_AXI_HZ);
XTmrCtr_Start(xtmr, 0);
uint64_t end, start = rdtsc();
for (int i = 0; i < runs; i++) {
uint64_t cnt = intc_wait(c->intc, ip->irq);
if (cnt != 1)
warn("fail");
/* Ackowledge IRQ */
XTmrCtr_WriteReg((uintptr_t) c->map + ip->baseaddr, 0, XTC_TCSR_OFFSET, XTmrCtr_ReadReg((uintptr_t) c->map + ip->baseaddr, 0, XTC_TCSR_OFFSET));
end = rdtsc();
hist[i] = end - start;
start = end;
}
XTmrCtr_Stop(xtmr, 0);
char fn[256];
snprintf(fn, sizeof(fn), "results/jitter_%s_%s.dat", intc_flags & INTC_POLLING ? "polling" : "irq", uts.release);
FILE *g = fopen(fn, "w");
for (int i = 0; i < runs; i++)
fprintf(g, "%u\n", hist[i]);
fclose(g);
free(hist);
ret = intc_disable(c->intc, (1 << ip->irq));
if (ret)
error("Failed to disable interrupt");
return 0;
}

49
fpga/src/bench-latency.c Normal file
View file

@ -0,0 +1,49 @@
#include <stdio.h>
#include <villas/log.h>
#include <villas/utils.h>
#include <villas/fpga/card.h>
#include <villas/fpga/ip.h>
#include "bench.h"
int fpga_benchmark_latency(struct fpga_card *c)
{
int ret;
uint64_t start, end;
if (!c->intc)
return -1;
int runs = 1000000;
int hist[runs];
ret = intc_enable(c->intc, 0x100, intc_flags);
if (ret)
error("Failed to enable interrupts");
for (int i = 0; i < runs; i++) {
start = rdtsc();
XIntc_Out32((uintptr_t) c->map + c->intc->baseaddr + XIN_ISR_OFFSET, 0x100);
intc_wait(c->intc, 8);
end = rdtsc();
hist[i] = end - start;
}
char fn[256];
snprintf(fn, sizeof(fn), "results/latency_%s_%s.dat", intc_flags & INTC_POLLING ? "polling" : "irq", uts.release);
FILE *g = fopen(fn, "w");
for (int i = 0; i < runs; i++)
fprintf(g, "%u\n", hist[i]);
fclose(g);
ret = intc_disable(c->intc, 0x100);
if (ret)
error("Failed to disable interrupt");
return 0;
}

46
fpga/src/bench-memcpy.c Normal file
View file

@ -0,0 +1,46 @@
#include <villas/utils.h>
#include <villas/fpga/card.h>
#include <villas/fpga/ips/intc.h>
#include "bench.h"
int fpga_benchmark_memcpy(struct fpga_card *c)
{
char *map = c->map + 0x200000;
uint32_t *mapi = (uint32_t *) map;
char fn[256];
snprintf(fn, sizeof(fn), "results/bar0_%s_%s.dat", intc_flags & INTC_POLLING ? "polling" : "irq", uts.release);
FILE *g = fopen(fn, "w");
fprintf(g, "# bytes cycles\n");
uint32_t dummy = 0;
for (int exp = BENCH_DM_EXP_MIN; exp <= BENCH_DM_EXP_MAX; exp++) {
uint64_t len = 1 << exp;
uint64_t start, end, total = 0;
uint64_t runs = (BENCH_RUNS << 2) >> exp;
for (int i = 0; i < runs + BENCH_WARMUP; i++) {
start = rdtsc();
for (int j = 0; j < len / 4; j++)
// mapi[j] = j; // write
dummy += mapi[j]; // read
end = rdtsc();
if (i > BENCH_WARMUP)
total += end - start;
}
info("exp = %u\truns = %ju\ttotal = %ju\tavg = %ju\tavgw = %ju", exp, runs, total, total / runs, total / (runs * len));
fprintf(g, "%zu %lu %ju\n", len, total / runs, runs);
}
fclose(g);
return 0;
}

152
fpga/src/bench-overruns.c Normal file
View file

@ -0,0 +1,152 @@
/** Benchmarks for VILLASfpga: LAPACK & BLAS
*
* @author Steffen Vogel <stvogel@eonerc.rwth-aachen.de>
* @copyright 2017, Steffen Vogel
**********************************************************************************/
#include <stdio.h>
#include <sys/utsname.h>
#include <fpga/card.h>
#include <fpga/ip.h>
#include <fpga/ips/switch.h>
#include <fpga/ips/intc.h>
#include <utils.h>
#include <villas/log.h>
#include "bench.h"
/* Some hard-coded configuration for the FPGA benchmarks */
#define BENCH_WARMUP 100
/* Declared in fpga-bench.c */
extern int intc_flags;
extern struct utsname uts;
/* LAPACK & BLAS Fortran prototypes */
extern int dgemm_(char *transa, char *transb, int *m, int *n, int *k, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc);
extern int dgetrf_(int *m, int *n, double *a, int *lda, int *ipiv, int *info);
extern int dgetri_(int *n, double *a, int *lda, int *ipiv, double *work, int *lwork, int *info);
static int lapack_generate_workload(int N, double *C)
{
double *A = alloc(N * N * sizeof(double));
srand(time(NULL));
for (int i = 0; i < N * N; i++)
A[i] = 100 * (double) rand() / RAND_MAX + 1;
char transA = 'T';
char transB = 'N';
double alpha = 1;
double beta = 1;
/* C = A' * A, to get an invertible matrix */
dgemm_(&transA, &transB, &N, &N, &N, &alpha, A, &N, A, &N, &beta, C, &N);
free(A);
return 0;
}
static int lapack_workload(int N, double *A)
{
int info = 0;
int lworkspace = N;
int ipiv[N];
double workspace[N];
dgetrf_(&N, &N, A, &N, ipiv, &info);
if (info > 0)
error("Failed to pivot matrix");
dgetri_(&N, A, &N, ipiv, workspace, &lworkspace, &info);
if (info > 0)
error("Failed to LU factorized matrix");
return 0;
}
int fpga_benchmark_overruns(struct fpga_card *c)
{
struct fpga_ip *rtds, *dm;
dm = list_lookup(&c->ips, "dma_1");
rtds = list_lookup(&c->ips, "rtds_axis_0");
if (!rtds || !c->intc)
return -1;
int ret;
float period = 50e-6;
int runs = 1.0 / period;
int overruns;
info("runs = %u", runs);
switch_connect(c->sw, dm, rtds);
switch_connect(c->sw, rtds, dm);
intc_enable(c->intc, (1 << (dm->irq + 1 )), intc_flags);
/* Dump results */
char fn[256];
snprintf(fn, sizeof(fn), "results/overruns_lu_rtds_axis_%s_%s.dat", intc_flags & INTC_POLLING ? "polling" : "irq", uts.release);
FILE *g = fopen(fn, "w");
fprintf(g, "# period = %f\n", period);
fprintf(g, "# runs = %u\n", runs);
struct dma_mem mem;
ret = dma_alloc(dm, &mem, 0x1000, 0);
if (ret)
error("Failed to allocate DMA memory");
uint32_t *data_rx = (uint32_t *) mem.base_virt;
uint32_t *data_tx = (uint32_t *) mem.base_virt + 0x200;
uint64_t total, start, stop;
for (int p = 3; p < 45; p++) {
double *A = alloc(p*p*sizeof(double));
lapack_generate_workload(p, A);
overruns = 0;
total = 0;
for (int i = 0; i < 2000; i++) {
dma_read(dm, mem.base_phys, 0x200);
dma_read_complete(dm, NULL, NULL);
}
for (int i = 0; i < runs + BENCH_WARMUP; i++) {
dma_read(dm, mem.base_phys, 0x200);
start = rdtsc();
lapack_workload(p, A);
stop = rdtsc();
dma_read_complete(dm, NULL, NULL);
/* Send data to rtds */
data_tx[0] = i;
dma_write(dm, mem.base_phys + 0x200, 64 * sizeof(data_tx[0]));
if (i < BENCH_WARMUP)
continue;
if (i - data_rx[0] > 2)
overruns++;
total += stop - start;
}
free(A);
info("iter = %u clks = %ju overruns = %u", p, total / runs, overruns);
fprintf(g, "%u %ju %u\n", p, total / runs, overruns);
if (overruns >= runs)
break;
}
fclose(g);
return 0;
}

74
fpga/src/bench.c Normal file
View file

@ -0,0 +1,74 @@
/** Benchmarks for VILLASfpga
*
* @author Steffen Vogel <stvogel@eonerc.rwth-aachen.de>
* @copyright 2017, Steffen Vogel
**********************************************************************************/
#include <stdio.h>
#include <string.h>
#include <villas/utils.h>
#include <villas/log.h>
#include <villas/fpga/ip.h>
#include <villas/fpga/card.h>
#include <villas/fpga/ips/intc.h>
#include <villas/fpga/ips/timer.h>
#include "bench.h"
#ifdef WITH_LAPACK
int fpga_benchmark_overruns(struct fpga_card *c);
#endif
int intc_flags = 0;
struct utsname uts;
int fpga_benchmarks(int argc, char *argv[], struct fpga_card *c)
{
int ret;
struct bench {
const char *name;
int (*func)(struct fpga_card *c);
} benchmarks[] = {
{ "datamover", fpga_benchmark_datamover },
{ "jitter", fpga_benchmark_jitter },
{ "memcpy", fpga_benchmark_memcpy },
#ifdef WITH_LAPACK
{ "overruns", fpga_benchmark_overruns },
#endif
{ "latency", fpga_benchmark_latency }
};
if (argc < 2)
error("Usage: fpga benchmark (bench)");
struct bench *bench = NULL;
for (int i = 0; i < ARRAY_LEN(benchmarks); i++) {
if (strcmp(benchmarks[i].name, argv[1]) == 0) {
bench = &benchmarks[i];
break;
}
}
if (bench == NULL)
error("There is no benchmark named: %s", argv[1]);
ret = uname(&uts);
if (ret)
return -1;
again: ret = bench->func(c);
if (ret)
error("Benchmark %s failed", bench->name);
/* Rerun test with polling */
if (intc_flags == 0) {
intc_flags |= INTC_POLLING;
getchar();
goto again;
}
return -1;
}

22
fpga/src/bench.h Normal file
View file

@ -0,0 +1,22 @@
#include <sys/utsname.h>
#include "config.h"
/* Some hard-coded configuration for the FPGA benchmarks */
#define BENCH_DM 3
// 1 FIFO
// 2 DMA SG
// 3 DMA Simple
#define BENCH_RUNS 3000000
#define BENCH_WARMUP 100
#define BENCH_DM_EXP_MIN 0
#define BENCH_DM_EXP_MAX 20
int fpga_benchmark_datamover(struct fpga_card *c);
int fpga_benchmark_jitter(struct fpga_card *c);
int fpga_benchmark_memcpy(struct fpga_card *c);
int fpga_benchmark_latency(struct fpga_card *c);
extern int intc_flags;
extern struct utsname uts;

111
fpga/src/fpga.c Normal file
View file

@ -0,0 +1,111 @@
/** VILLASfpga utility for tests and benchmarks
*
* @author Steffen Vogel <stvogel@eonerc.rwth-aachen.de>
* @copyright 2017, Steffen Vogel
**********************************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <getopt.h>
#include <villas/log.h>
#include <villas/utils.h>
#include <villas/kernel/pci.h>
#include <villas/kernel/kernel.h>
#include <villas/fpga/card.h>
/* Declarations */
int fpga_benchmarks(int argc, char *argv[], struct fpga_card *c);
void usage()
{
printf("Usage: villas-fpga [OPTIONS] CONFIG CARD\n\n");
printf(" CONFIG path to a configuration file\n");
printf(" CARD name of the FPGA card\n");
printf(" OPTIONS is one or more of the following options:\n");
printf(" -h show this help\n");
printf(" -V show the version of the tool\n");
printf("\n");
print_copyright();
}
int main(int argc, char *argv[])
{
int ret;
struct list cards;
struct vfio_container vc;
struct pci pci;
struct fpga_card *card;
/* Parse arguments */
char c, *endptr;
while ((c = getopt(argc, argv, "Vh")) != -1) {
switch (c) {
case 'V':
print_version();
exit(EXIT_SUCCESS);
case 'h':
case '?':
default:
usage();
exit(EXIT_SUCCESS);
}
check: if (optarg == endptr)
error("Failed to parse parse option argument '-%c %s'", c, optarg);
}
if (argc != optind + 2) {
usage();
exit(EXIT_FAILURE);
}
char *configfile = argv[optind];
char *cardname = argv[optind+1];
FILE *f;
json_error_t err;
json_t *json;
ret = pci_init(&pci);
if (ret)
return -1;
ret = vfio_init(&vc);
if (ret)
return -1;
/* Parse FPGA configuration */
f = fopen(configfile, "r");
if (!f)
return -1;
json = json_loadf(f, 0, &err);
if (!json)
return -1;
fclose(f);
list_init(&cards);
ret = fpga_card_parse_list(&cards, json);
if (ret)
return -1;
json_decref(json);
card = list_lookup(&cards, cardname);
if (!card)
return -1;
fpga_card_dump(card);
/* Run benchmarks */
fpga_benchmarks(argc-optind-1, argv+optind+1, card);
return 0;
}