diff --git a/fpga/CMakeLists.txt b/fpga/CMakeLists.txt index 3bb0552ad..4d355ed0e 100644 --- a/fpga/CMakeLists.txt +++ b/fpga/CMakeLists.txt @@ -6,3 +6,4 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/cmake) add_subdirectory(lib) add_subdirectory(tests) +add_subdirectory(src) diff --git a/fpga/src/CMakeLists.txt b/fpga/src/CMakeLists.txt new file mode 100644 index 000000000..f38667024 --- /dev/null +++ b/fpga/src/CMakeLists.txt @@ -0,0 +1,28 @@ +set(SOURCES + bench-datamovers.c + bench-jitter.c + bench-latency.c + bench-memcpy.c + bench.c + fpga.c +) + +add_executable(fpga ${SOURCES}) + +target_include_directories(fpga PUBLIC + ../include/villas + ../include +) + +target_link_libraries(fpga PUBLIC + villas-fpga +) + +find_package(LAPACK) + +if(LAPACK_FOUND) + target_sources(fpga PUBLIC bench-overruns.c) + target_link_libraries(fpga PUBLIC ${LAPACK_LIBRARIES}) + target_include_directories(fpga PUBLIC ${LAPACK_INCLUDE_DIRS}) + target_compile_definitions(fpga PUBLIC WITH_LAPACK) +endif() diff --git a/fpga/src/bench-datamovers.c b/fpga/src/bench-datamovers.c new file mode 100644 index 000000000..e98dc2507 --- /dev/null +++ b/fpga/src/bench-datamovers.c @@ -0,0 +1,119 @@ +#include +#include + +#include +#include + +#include +#include +#include + +#include "bench.h" + +int fpga_benchmark_datamover(struct fpga_card *c) +{ + int ret; + + struct fpga_ip *dm; + struct dma_mem mem, src, dst; + +#if BENCH_DM == 1 + char *dm_name = "fifo_mm_s_0"; +#elif BENCH_DM == 2 + char *dm_name = "dma_0"; +#elif BENCH_DM == 3 + char *dm_name = "dma_1"; +#else + #error "Invalid DM selected" +#endif + + dm = list_lookup(&c->ips, dm_name); + if (!dm) + error("Unknown datamover"); + + ret = switch_connect(c->sw, dm, dm); + if (ret) + error("Failed to configure switch"); + + ret = intc_enable(c->intc, (1 << dm->irq) | (1 << (dm->irq + 1)), intc_flags); + if (ret) + error("Failed to enable interrupt"); + + /* Allocate DMA memory */ + ret = dma_alloc(dm, &mem, 2 * (1 << BENCH_DM_EXP_MAX), 0); + if (ret) + error("Failed to allocate DMA memory"); + + ret = dma_mem_split(&mem, &src, &dst); + if (ret) + return -1; + + /* Open file for results */ + char fn[256]; + snprintf(fn, sizeof(fn), "results/datamover_%s_%s_%s.dat", dm_name, intc_flags & INTC_POLLING ? "polling" : "irq", uts.release); + FILE *g = fopen(fn, "w"); + + for (int exp = BENCH_DM_EXP_MIN; exp <= BENCH_DM_EXP_MAX; exp++) { + uint64_t start, stop, total = 0, len = 1 << exp; + +#if BENCH_DM == 1 + if (exp > 11) + break; /* FIFO and Simple DMA are limited to 4kb */ +#elif BENCH_DM == 3 + if (exp >= 12) + break; /* FIFO and Simple DMA are limited to 4kb */ +#endif + + read_random(src.base_virt, len); + memset(dst.base_virt, 0, len); + + info("Start DM bench: len=%#jx", len); + + uint64_t runs = BENCH_RUNS >> exp; + for (int i = 0; i < runs + BENCH_WARMUP; i++) { + start = rdtsc(); +#if BENCH_DM == 1 + ssize_t ret; + + ret = fifo_write(dm, src.base_virt, len); + if (ret < 0) + error("Failed write to FIFO with len = %zu", len); + + ret = fifo_read(dm, dst.base_virt, dst.len); + if (ret < 0) + error("Failed read from FIFO with len = %zu", len); +#else + ret = dma_ping_pong(dm, src.base_phys, dst.base_phys, len); + if (ret) + error("DMA ping pong failed"); +#endif + stop = rdtsc(); + + if (memcmp(src.base_virt, dst.base_virt, len)) + warn("Compare failed"); + + if (i > BENCH_WARMUP) + total += stop - start; + } + + info("exp %u avg %lu", exp, total / runs); + fprintf(g, "%lu %lu\n", len, total / runs); + } + + fclose(g); + + ret = switch_disconnect(c->sw, dm, dm); + if (ret) + error("Failed to configure switch"); + + ret = dma_free(dm, &mem); + if (ret) + error("Failed to release DMA memory"); + + ret = intc_disable(c->intc, (1 << dm->irq) | (1 << (dm->irq + 1))); + if (ret) + error("Failed to enable interrupt"); + + + return 0; +} diff --git a/fpga/src/bench-jitter.c b/fpga/src/bench-jitter.c new file mode 100644 index 000000000..fe4309755 --- /dev/null +++ b/fpga/src/bench-jitter.c @@ -0,0 +1,65 @@ +#include + +#include +#include + +#include + +#include "bench.h" + +int fpga_benchmark_jitter(struct fpga_card *c) +{ + int ret; + + struct fpga_ip *ip = list_lookup(&c->ips, "timer_0"); + if (!ip || !c->intc) + return -1; + + struct timer *tmr = (struct timer *) ip->_vd; + + XTmrCtr *xtmr = &tmr->inst; + + ret = intc_enable(c->intc, (1 << ip->irq), intc_flags); + if (ret) + error("Failed to enable interrupt"); + + float period = 50e-6; + int runs = 300.0 / period; + + int *hist = alloc(8 << 20); + + XTmrCtr_SetOptions(xtmr, 0, XTC_INT_MODE_OPTION | XTC_EXT_COMPARE_OPTION | XTC_DOWN_COUNT_OPTION | XTC_AUTO_RELOAD_OPTION); + XTmrCtr_SetResetValue(xtmr, 0, period * FPGA_AXI_HZ); + XTmrCtr_Start(xtmr, 0); + + uint64_t end, start = rdtsc(); + for (int i = 0; i < runs; i++) { + uint64_t cnt = intc_wait(c->intc, ip->irq); + if (cnt != 1) + warn("fail"); + + /* Ackowledge IRQ */ + XTmrCtr_WriteReg((uintptr_t) c->map + ip->baseaddr, 0, XTC_TCSR_OFFSET, XTmrCtr_ReadReg((uintptr_t) c->map + ip->baseaddr, 0, XTC_TCSR_OFFSET)); + + end = rdtsc(); + hist[i] = end - start; + start = end; + } + + XTmrCtr_Stop(xtmr, 0); + + char fn[256]; + snprintf(fn, sizeof(fn), "results/jitter_%s_%s.dat", intc_flags & INTC_POLLING ? "polling" : "irq", uts.release); + FILE *g = fopen(fn, "w"); + for (int i = 0; i < runs; i++) + fprintf(g, "%u\n", hist[i]); + fclose(g); + + free(hist); + + ret = intc_disable(c->intc, (1 << ip->irq)); + if (ret) + error("Failed to disable interrupt"); + + return 0; +} diff --git a/fpga/src/bench-latency.c b/fpga/src/bench-latency.c new file mode 100644 index 000000000..f0affc4e0 --- /dev/null +++ b/fpga/src/bench-latency.c @@ -0,0 +1,49 @@ +#include + +#include +#include + +#include +#include + +#include "bench.h" + +int fpga_benchmark_latency(struct fpga_card *c) +{ + int ret; + + uint64_t start, end; + + if (!c->intc) + return -1; + + int runs = 1000000; + int hist[runs]; + + ret = intc_enable(c->intc, 0x100, intc_flags); + if (ret) + error("Failed to enable interrupts"); + + for (int i = 0; i < runs; i++) { + start = rdtsc(); + XIntc_Out32((uintptr_t) c->map + c->intc->baseaddr + XIN_ISR_OFFSET, 0x100); + + intc_wait(c->intc, 8); + end = rdtsc(); + + hist[i] = end - start; + } + + char fn[256]; + snprintf(fn, sizeof(fn), "results/latency_%s_%s.dat", intc_flags & INTC_POLLING ? "polling" : "irq", uts.release); + FILE *g = fopen(fn, "w"); + for (int i = 0; i < runs; i++) + fprintf(g, "%u\n", hist[i]); + fclose(g); + + ret = intc_disable(c->intc, 0x100); + if (ret) + error("Failed to disable interrupt"); + + return 0; +} diff --git a/fpga/src/bench-memcpy.c b/fpga/src/bench-memcpy.c new file mode 100644 index 000000000..b34788d17 --- /dev/null +++ b/fpga/src/bench-memcpy.c @@ -0,0 +1,46 @@ +#include + +#include + +#include + +#include "bench.h" + +int fpga_benchmark_memcpy(struct fpga_card *c) +{ + char *map = c->map + 0x200000; + uint32_t *mapi = (uint32_t *) map; + + char fn[256]; + snprintf(fn, sizeof(fn), "results/bar0_%s_%s.dat", intc_flags & INTC_POLLING ? "polling" : "irq", uts.release); + FILE *g = fopen(fn, "w"); + fprintf(g, "# bytes cycles\n"); + + uint32_t dummy = 0; + + for (int exp = BENCH_DM_EXP_MIN; exp <= BENCH_DM_EXP_MAX; exp++) { + uint64_t len = 1 << exp; + uint64_t start, end, total = 0; + uint64_t runs = (BENCH_RUNS << 2) >> exp; + + for (int i = 0; i < runs + BENCH_WARMUP; i++) { + start = rdtsc(); + + for (int j = 0; j < len / 4; j++) +// mapi[j] = j; // write + dummy += mapi[j]; // read + + end = rdtsc(); + + if (i > BENCH_WARMUP) + total += end - start; + } + + info("exp = %u\truns = %ju\ttotal = %ju\tavg = %ju\tavgw = %ju", exp, runs, total, total / runs, total / (runs * len)); + fprintf(g, "%zu %lu %ju\n", len, total / runs, runs); + } + + fclose(g); + + return 0; +} diff --git a/fpga/src/bench-overruns.c b/fpga/src/bench-overruns.c new file mode 100644 index 000000000..28e82a5db --- /dev/null +++ b/fpga/src/bench-overruns.c @@ -0,0 +1,152 @@ +/** Benchmarks for VILLASfpga: LAPACK & BLAS + * + * @author Steffen Vogel + * @copyright 2017, Steffen Vogel + **********************************************************************************/ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "bench.h" + +/* Some hard-coded configuration for the FPGA benchmarks */ +#define BENCH_WARMUP 100 + +/* Declared in fpga-bench.c */ +extern int intc_flags; +extern struct utsname uts; + +/* LAPACK & BLAS Fortran prototypes */ +extern int dgemm_(char *transa, char *transb, int *m, int *n, int *k, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc); +extern int dgetrf_(int *m, int *n, double *a, int *lda, int *ipiv, int *info); +extern int dgetri_(int *n, double *a, int *lda, int *ipiv, double *work, int *lwork, int *info); + +static int lapack_generate_workload(int N, double *C) +{ + double *A = alloc(N * N * sizeof(double)); + + srand(time(NULL)); + + for (int i = 0; i < N * N; i++) + A[i] = 100 * (double) rand() / RAND_MAX + 1; + + char transA = 'T'; + char transB = 'N'; + double alpha = 1; + double beta = 1; + + /* C = A' * A, to get an invertible matrix */ + dgemm_(&transA, &transB, &N, &N, &N, &alpha, A, &N, A, &N, &beta, C, &N); + + free(A); + + return 0; +} + +static int lapack_workload(int N, double *A) +{ + int info = 0; + int lworkspace = N; + int ipiv[N]; + double workspace[N]; + + dgetrf_(&N, &N, A, &N, ipiv, &info); + if (info > 0) + error("Failed to pivot matrix"); + + dgetri_(&N, A, &N, ipiv, workspace, &lworkspace, &info); + if (info > 0) + error("Failed to LU factorized matrix"); + + return 0; +} + +int fpga_benchmark_overruns(struct fpga_card *c) +{ + struct fpga_ip *rtds, *dm; + + dm = list_lookup(&c->ips, "dma_1"); + rtds = list_lookup(&c->ips, "rtds_axis_0"); + if (!rtds || !c->intc) + return -1; + + int ret; + float period = 50e-6; + int runs = 1.0 / period; + int overruns; + + info("runs = %u", runs); + + switch_connect(c->sw, dm, rtds); + switch_connect(c->sw, rtds, dm); + + intc_enable(c->intc, (1 << (dm->irq + 1 )), intc_flags); + + /* Dump results */ + char fn[256]; + snprintf(fn, sizeof(fn), "results/overruns_lu_rtds_axis_%s_%s.dat", intc_flags & INTC_POLLING ? "polling" : "irq", uts.release); + FILE *g = fopen(fn, "w"); + fprintf(g, "# period = %f\n", period); + fprintf(g, "# runs = %u\n", runs); + + struct dma_mem mem; + ret = dma_alloc(dm, &mem, 0x1000, 0); + if (ret) + error("Failed to allocate DMA memory"); + + uint32_t *data_rx = (uint32_t *) mem.base_virt; + uint32_t *data_tx = (uint32_t *) mem.base_virt + 0x200; + uint64_t total, start, stop; + for (int p = 3; p < 45; p++) { + double *A = alloc(p*p*sizeof(double)); + + lapack_generate_workload(p, A); + + overruns = 0; + total = 0; + + for (int i = 0; i < 2000; i++) { + dma_read(dm, mem.base_phys, 0x200); + dma_read_complete(dm, NULL, NULL); + } + + for (int i = 0; i < runs + BENCH_WARMUP; i++) { + dma_read(dm, mem.base_phys, 0x200); + + start = rdtsc(); + lapack_workload(p, A); + stop = rdtsc(); + + dma_read_complete(dm, NULL, NULL); + + /* Send data to rtds */ + data_tx[0] = i; + dma_write(dm, mem.base_phys + 0x200, 64 * sizeof(data_tx[0])); + + if (i < BENCH_WARMUP) + continue; + + if (i - data_rx[0] > 2) + overruns++; + total += stop - start; + } + + free(A); + + info("iter = %u clks = %ju overruns = %u", p, total / runs, overruns); + fprintf(g, "%u %ju %u\n", p, total / runs, overruns); + + if (overruns >= runs) + break; + } + + fclose(g); + return 0; +} diff --git a/fpga/src/bench.c b/fpga/src/bench.c new file mode 100644 index 000000000..afe97b0cb --- /dev/null +++ b/fpga/src/bench.c @@ -0,0 +1,74 @@ +/** Benchmarks for VILLASfpga + * + * @author Steffen Vogel + * @copyright 2017, Steffen Vogel + **********************************************************************************/ + +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include "bench.h" + +#ifdef WITH_LAPACK +int fpga_benchmark_overruns(struct fpga_card *c); +#endif + +int intc_flags = 0; +struct utsname uts; + +int fpga_benchmarks(int argc, char *argv[], struct fpga_card *c) +{ + int ret; + struct bench { + const char *name; + int (*func)(struct fpga_card *c); + } benchmarks[] = { + { "datamover", fpga_benchmark_datamover }, + { "jitter", fpga_benchmark_jitter }, + { "memcpy", fpga_benchmark_memcpy }, +#ifdef WITH_LAPACK + { "overruns", fpga_benchmark_overruns }, +#endif + { "latency", fpga_benchmark_latency } + }; + + if (argc < 2) + error("Usage: fpga benchmark (bench)"); + + struct bench *bench = NULL; + for (int i = 0; i < ARRAY_LEN(benchmarks); i++) { + if (strcmp(benchmarks[i].name, argv[1]) == 0) { + bench = &benchmarks[i]; + break; + } + } + + if (bench == NULL) + error("There is no benchmark named: %s", argv[1]); + + ret = uname(&uts); + if (ret) + return -1; + +again: ret = bench->func(c); + if (ret) + error("Benchmark %s failed", bench->name); + + /* Rerun test with polling */ + if (intc_flags == 0) { + intc_flags |= INTC_POLLING; + getchar(); + goto again; + } + + return -1; +} + diff --git a/fpga/src/bench.h b/fpga/src/bench.h new file mode 100644 index 000000000..a056230aa --- /dev/null +++ b/fpga/src/bench.h @@ -0,0 +1,22 @@ +#include + +#include "config.h" + +/* Some hard-coded configuration for the FPGA benchmarks */ +#define BENCH_DM 3 +// 1 FIFO +// 2 DMA SG +// 3 DMA Simple + +#define BENCH_RUNS 3000000 +#define BENCH_WARMUP 100 +#define BENCH_DM_EXP_MIN 0 +#define BENCH_DM_EXP_MAX 20 + +int fpga_benchmark_datamover(struct fpga_card *c); +int fpga_benchmark_jitter(struct fpga_card *c); +int fpga_benchmark_memcpy(struct fpga_card *c); +int fpga_benchmark_latency(struct fpga_card *c); + +extern int intc_flags; +extern struct utsname uts; diff --git a/fpga/src/fpga.c b/fpga/src/fpga.c new file mode 100644 index 000000000..602e392ff --- /dev/null +++ b/fpga/src/fpga.c @@ -0,0 +1,111 @@ +/** VILLASfpga utility for tests and benchmarks + * + * @author Steffen Vogel + * @copyright 2017, Steffen Vogel + **********************************************************************************/ + +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include + +/* Declarations */ +int fpga_benchmarks(int argc, char *argv[], struct fpga_card *c); + +void usage() +{ + printf("Usage: villas-fpga [OPTIONS] CONFIG CARD\n\n"); + printf(" CONFIG path to a configuration file\n"); + printf(" CARD name of the FPGA card\n"); + printf(" OPTIONS is one or more of the following options:\n"); + printf(" -h show this help\n"); + printf(" -V show the version of the tool\n"); + printf("\n"); + print_copyright(); +} + +int main(int argc, char *argv[]) +{ + int ret; + + struct list cards; + struct vfio_container vc; + struct pci pci; + struct fpga_card *card; + + /* Parse arguments */ + char c, *endptr; + while ((c = getopt(argc, argv, "Vh")) != -1) { + switch (c) { + case 'V': + print_version(); + exit(EXIT_SUCCESS); + + case 'h': + case '?': + default: + usage(); + exit(EXIT_SUCCESS); + } + +check: if (optarg == endptr) + error("Failed to parse parse option argument '-%c %s'", c, optarg); + } + + if (argc != optind + 2) { + usage(); + exit(EXIT_FAILURE); + } + + char *configfile = argv[optind]; + char *cardname = argv[optind+1]; + + FILE *f; + json_error_t err; + json_t *json; + + ret = pci_init(&pci); + if (ret) + return -1; + + ret = vfio_init(&vc); + if (ret) + return -1; + + /* Parse FPGA configuration */ + f = fopen(configfile, "r"); + if (!f) + return -1; + + json = json_loadf(f, 0, &err); + if (!json) + return -1; + + fclose(f); + + list_init(&cards); + ret = fpga_card_parse_list(&cards, json); + if (ret) + return -1; + + json_decref(json); + + card = list_lookup(&cards, cardname); + if (!card) + return -1; + + fpga_card_dump(card); + + /* Run benchmarks */ + fpga_benchmarks(argc-optind-1, argv+optind+1, card); + + return 0; +}