/* * Copyright 2014 Steffen Vogel, Chair for Operating Systems, * RWTH Aachen University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * This file is part of MetalSVM. */ /** * @author Steffen Vogel * * Based on: * www.cs.berkeley.edu/~richie/bebop/notes/matmul/files/membench/ */ #include #include #include #include #include #define SAMPLES 1000 #define CLKS 1000000000L #define STRIDE_MIN 1 #define RANGE_MIN (32) /* 32 Byte */ #define RANGE_MAX (32*1024*1024) /* 32 MiB */ #define FLUSH 1 static inline void warmup(char* memory) { #ifdef FLUSH tlb_flush(); flush_cache(); #else int index; for (index = 0; index < RANGE_MAX; index++) { memory[index] = 0; } #endif } int membench(void* arg) { kprintf("======= Starting membench\n"); /* Init PMCs */ struct pmc_caps* cap = pmc_init(); kprintf("PMC architecural version: %u\n", cap->version); kprintf("There are %u general purpose PMCs (%u bit wide) available\n", cap->gp_count, cap->gp_width); kprintf("There are %u fixed function PMCs (%u bit wide) available\n", cap->ff_count, cap->ff_width); int i; uint16_t tests[][2] = { #if 0 {PMC_EVT_MEM_LOAD_RETIRED_DTLB_MISS, 0} {PMC_EVT_MEM_LOAD_RETIRED_L1D_MISS, 0} {PMC_EVT_MEM_LOAD_RETIRED_L2_MISS, 0} #elif 0 {PMC_EVT_PAGE_WALK_CLKS, PMC_EVT_PAGE_WALK_COUNT} #else {PMC_EVT_DTLB_MISS_ANY, PMC_EVT_DTLB_MISS_LD}, {PMC_EVT_DTLB_MISS_ANY, PMC_EVT_DTLB_MISS_L0_LD}, {PMC_EVT_DTLB_MISS_ANY, PMC_EVT_DTLB_MISS_ST}, #endif }; char *memory = palloc(RANGE_MAX, 0); kprintf("Allocated test memory: %#lx bytes at %p\n", RANGE_MAX, memory); kprintf("#%12s%12s%12s%12s%12s%12s\n", "range", "stride", "steps", "runs", "reads", "results"); irq_disable(); /* Setup PMCs */ pmc_stop_all(); pmc_ff_config(1, PMC_FIXED_OS); // CPU_CLK_UNHALTED.CORE /* Variables for PMC values */ uint64_t gp[2], ff[3]; uint64_t gp_ovh[2], ff_ovh[3]; int64_t gp_real[2], ff_real[3]; uint64_t run, steps, tsteps, index, runs, reads; uint64_t range, stride; volatile char temp; /* Run the timing experiments */ for (range = RANGE_MIN; range <= RANGE_MAX; range *= 2) { for (stride = STRIDE_MIN; stride < range; stride *= 2) { runs = SAMPLES * stride / range + 1; for (i = 0; i < sizeof(tests) / 4; i++) { pmc_gp_config(0, tests[i][0], PMC_EVTSEL_OS | PMC_EVTSEL_EN, 0, 0); pmc_gp_config(1, tests[i][1], PMC_EVTSEL_OS | PMC_EVTSEL_EN, 0, 0); steps = 0; tsteps = 0; /* Time the loop with strided access + loop overhead */ warmup(memory); pmc_reset_all(); pmc_start_all(); do { for (run = 0; run < runs; run++) { for (index = 0; index < range; index += stride) { temp = memory[index]; } } steps++; } while (pmc_ff_read(1) < CLKS); pmc_stop_all(); gp[0] = pmc_gp_read(0); gp[1] = pmc_gp_read(1); ff[1] = pmc_ff_read(1); /* Try to time just the overheads */ warmup(memory); pmc_reset_all(); pmc_start_all(); do { for (run = 0; run < runs; run++) { for (index = 0; index < range; index += stride) { temp++; } } tsteps++; } while (tsteps < steps); pmc_stop_all(); gp_ovh[0] = pmc_gp_read(0); gp_ovh[1] = pmc_gp_read(1); ff_ovh[1] = pmc_ff_read(1); gp_real[0] = gp[0] - gp_ovh[0]; gp_real[1] = gp[1] - gp_ovh[1]; ff_real[1] = ff[1] - ff_ovh[1]; reads = runs * steps * range / stride; if (i == 0) kprintf("%12llu%12llu%12llu%12llu%12llu%12llu", range, stride, steps, runs, reads, ff_real[1]); kprintf("%12llu", gp_real[0]); kprintf("%12llu", gp_real[1]); kprintf("\t"); } kprintf("\n"); } kprintf("\n"); } irq_enable(); return 0; }