diff --git a/apps/membench.c b/apps/membench.c index 8c7e03f8..bcae5fa2 100644 --- a/apps/membench.c +++ b/apps/membench.c @@ -16,8 +16,10 @@ * * This file is part of MetalSVM. */ -/* - * Based on: +/** + * @author Steffen Vogel + * + * Based on: * www.cs.berkeley.edu/~richie/bebop/notes/matmul/files/membench/ */ @@ -28,67 +30,146 @@ #include #include -#define SAMPLES 40 -#define CLKS 1e9 -#define RANGE_MIN 1024 /* 1 KiB */ -#define RANGE_MAX (16*1024*1024) /* 16 MiB */ +#define SAMPLES 1000 +#define CLKS 1000000000L + +#define STRIDE_MIN 1 + +#define RANGE_MIN (32) /* 32 Byte */ +#define RANGE_MAX (32*1024*1024) /* 32 MiB */ + +#define FLUSH 1 + +static inline void warmup(char* memory) +{ +#ifdef FLUSH + tlb_flush(); + flush_cache(); +#else + int index; + for (index = 0; index < RANGE_MAX; index++) { + memory[index] = 0; + } +#endif +} int membench(void* arg) { kprintf("======= Starting membench\n"); + /* Init PMCs */ + struct pmc_caps* cap = pmc_init(); + + kprintf("PMC architecural version: %u\n", cap->version); + kprintf("There are %u general purpose PMCs (%u bit wide) available\n", cap->gp_count, cap->gp_width); + kprintf("There are %u fixed function PMCs (%u bit wide) available\n", cap->ff_count, cap->ff_width); + + int i; + + uint16_t tests[][2] = { +#if 0 + {PMC_EVT_MEM_LOAD_RETIRED_DTLB_MISS, 0} + {PMC_EVT_MEM_LOAD_RETIRED_L1D_MISS, 0} + {PMC_EVT_MEM_LOAD_RETIRED_L2_MISS, 0} +#elif 0 + {PMC_EVT_PAGE_WALK_CLKS, PMC_EVT_PAGE_WALK_COUNT} +#else + {PMC_EVT_DTLB_MISS_ANY, PMC_EVT_DTLB_MISS_LD}, + {PMC_EVT_DTLB_MISS_ANY, PMC_EVT_DTLB_MISS_L0_LD}, + {PMC_EVT_DTLB_MISS_ANY, PMC_EVT_DTLB_MISS_ST}, +#endif + }; + + char *memory = palloc(RANGE_MAX, 0); kprintf("Allocated test memory: %#lx bytes at %p\n", RANGE_MAX, memory); - kprintf("%12s%12s%12s%12s\n", "reads", "range", "stride", "clks"); + kprintf("#%12s%12s%12s%12s%12s%12s\n", "range", "stride", "steps", "runs", "reads", "results"); + + irq_disable(); + + /* Setup PMCs */ + pmc_stop_all(); + pmc_ff_config(1, PMC_FIXED_OS); // CPU_CLK_UNHALTED.CORE + + /* Variables for PMC values */ + uint64_t gp[2], ff[3]; + uint64_t gp_ovh[2], ff_ovh[3]; + int64_t gp_real[2], ff_real[3]; + uint64_t run, steps, tsteps, index, runs, reads; + uint64_t range, stride; + volatile char temp; /* Run the timing experiments */ - register uint64_t range, stride; for (range = RANGE_MIN; range <= RANGE_MAX; range *= 2) { - for (stride = 1; stride < range/2; stride *= 2) { - register uint64_t t0, t1, t2; - register uint64_t run, steps, tsteps, temp, index; + for (stride = STRIDE_MIN; stride < range; stride *= 2) { + runs = SAMPLES * stride / range + 1; - /* Time the loop with strided access + loop overhead */ - steps = 0; - tsteps = 0; + for (i = 0; i < sizeof(tests) / 4; i++) { + pmc_gp_config(0, tests[i][0], PMC_EVTSEL_OS | PMC_EVTSEL_EN, 0, 0); + pmc_gp_config(1, tests[i][1], PMC_EVTSEL_OS | PMC_EVTSEL_EN, 0, 0); - // TODO: setup and start PMCs + steps = 0; + tsteps = 0; - t0 = rdtsc(); - do { - for (run = 0; run < SAMPLES; run++) { - for (index = 0; index < range; index += stride) { - memory[index]++; + /* Time the loop with strided access + loop overhead */ + warmup(memory); + pmc_reset_all(); + pmc_start_all(); + do { + for (run = 0; run < runs; run++) { + for (index = 0; index < range; index += stride) { + temp = memory[index]; + } } - } - steps++; - t1 = rdtsc() - t0; - } while (t1 < CLKS); + steps++; + } while (pmc_ff_read(1) < CLKS); - // TODO: read and stop PMCs + pmc_stop_all(); - /* Try to time just the overheads */ - t0 = rdtsc(); - do { - for (run = 0; run < SAMPLES; run++) { - for (index = 0; index < range; index += stride) { - temp++; + gp[0] = pmc_gp_read(0); + gp[1] = pmc_gp_read(1); + ff[1] = pmc_ff_read(1); + + /* Try to time just the overheads */ + warmup(memory); + pmc_reset_all(); + pmc_start_all(); + do { + for (run = 0; run < runs; run++) { + for (index = 0; index < range; index += stride) { + temp++; + } } - } - tsteps++; - t2 = rdtsc() - t0; - } while (tsteps < steps); + tsteps++; + } while (tsteps < steps); + pmc_stop_all(); - /* Report on the average time per read/write */ - int64_t clks = t1 - t2; - uint64_t reads = SAMPLES * steps * range / stride; + gp_ovh[0] = pmc_gp_read(0); + gp_ovh[1] = pmc_gp_read(1); + ff_ovh[1] = pmc_ff_read(1); - kprintf("%12d%12d%12d%12llu\n", reads, range, stride, 1000 * clks / reads); + gp_real[0] = gp[0] - gp_ovh[0]; + gp_real[1] = gp[1] - gp_ovh[1]; + ff_real[1] = ff[1] - ff_ovh[1]; + + reads = runs * steps * range / stride; + + if (i == 0) + kprintf("%12llu%12llu%12llu%12llu%12llu%12llu", range, stride, steps, runs, reads, ff_real[1]); + + kprintf("%12llu", gp[0]); + kprintf("%12llu", gp[1]); + kprintf("\t"); + } + kprintf("\n"); } + kprintf("\n"); } + irq_enable(); + return 0; } diff --git a/apps/memory.c b/apps/memory.c index ec2a7639..2107c3a8 100644 --- a/apps/memory.c +++ b/apps/memory.c @@ -17,6 +17,10 @@ * This file is part of MetalSVM. */ +/** + * @author Steffen Vogel + */ + #include #include #include diff --git a/arch/x86/kernel/uart.c b/arch/x86/kernel/uart.c index 57ccc47d..c208eaa2 100644 --- a/arch/x86/kernel/uart.c +++ b/arch/x86/kernel/uart.c @@ -20,6 +20,10 @@ #include #include +/** + * @author Steffen Vogel + */ + static inline int is_transmit_empty() { return inportb(UART_PORT + 5) & 0x20; diff --git a/arch/x86/mm/page64.c b/arch/x86/mm/page64.c index f183147e..5fce1aac 100644 --- a/arch/x86/mm/page64.c +++ b/arch/x86/mm/page64.c @@ -33,6 +33,10 @@ #include #include +/** + * @author Steffen Vogel + */ + /* * Virtual Memory Layout of the standard configuration * (1 GB kernel space) diff --git a/include/metalsvm/malloc.h b/include/metalsvm/malloc.h index 40f122c8..024e442d 100644 --- a/include/metalsvm/malloc.h +++ b/include/metalsvm/malloc.h @@ -17,6 +17,10 @@ * This file is part of MetalSVM. */ +/** + * @author Steffen Vogel + */ + #ifndef __MALLOC_H__ #define __MALLOC_H__ diff --git a/include/metalsvm/vma.h b/include/metalsvm/vma.h index 71c6ba7d..53818c34 100644 --- a/include/metalsvm/vma.h +++ b/include/metalsvm/vma.h @@ -19,6 +19,7 @@ /** * @author Stefan Lankes + * @author Steffen Vogel * @file include/metalsvm/vma.h * @brief VMA related sructure and functions */