From 6e0a11373daa1056ce0d32886b3e02f4b0a00794 Mon Sep 17 00:00:00 2001 From: daniel-k Date: Wed, 18 May 2016 01:19:36 +0200 Subject: [PATCH] openmpbench: implement profiling for syncbench and also change linker order --- hermit/usr/openmpbench/Makefile | 15 ++++---- hermit/usr/openmpbench/common.c | 5 +++ hermit/usr/openmpbench/syncbench.c | 55 ++++++++++++++++++++++++++++-- 3 files changed, 66 insertions(+), 9 deletions(-) diff --git a/hermit/usr/openmpbench/Makefile b/hermit/usr/openmpbench/Makefile index f3c16d8b7..7ba028b0a 100644 --- a/hermit/usr/openmpbench/Makefile +++ b/hermit/usr/openmpbench/Makefile @@ -18,8 +18,11 @@ all: syncbench schedbench taskbench prog: arraybench_$(IDA) +# We need to generate a linker map file so that Xray can resolve function names +LDFLAGS += -Wl,-Map=$@.map + syncbench: $(SYNCOBJS) - $(CC) -o syncbench $(LDFLAGS) $(SYNCOBJS) $(CLOCKOBJS) $(LIBS) -lm + $(CC) -o syncbench $(SYNCOBJS) $(LDFLAGS) $(CLOCKOBJS) $(LIBS) -lm # Rule to ensure the lower optimisation level is picked up for common.c # with the Cray compiler @@ -32,7 +35,7 @@ common_sched.o: ${CC} ${CFLAGS_CRAY} $(SCHEDFLAGS) $(OMPFLAG) -o common_sched.o -c common.c schedbench: $(SCHEDOBJS) - $(CC) -o schedbench $(LDFLAGS) $(SCHEDOBJS) $(CLOCKOBJS) $(LIBS) -lm + $(CC) -o schedbench $(SCHEDOBJS) $(LDFLAGS) $(CLOCKOBJS) $(LIBS) -lm # Multiple header files due to multiple array sizes, makes header file arraybench_*.h arraybench_$(IDA).h: arraybench.h @@ -44,13 +47,13 @@ arraybench_$(IDA).o: arraybench_$(IDA).h arraybench.c # Multiple executables due to multiple array sizes, makes exe file arraybench_* arraybench_$(IDA): $(ARRAYOBJS) $(CLOCKOBJS) arraybench.c - $(CC) $(LDFLAGS) $(ARRAYOBJS) $(CLOCKOBJS) $(LIBS) -lm -o $@ + $(CC) -o $@ $(LDFLAGS) $(ARRAYOBJS) $(CLOCKOBJS) $(LIBS) -lm taskbench: $(TASKOBJS) - $(CC) -o taskbench $(LDFLAGS) $(OMPFLAG) $(TASKOBJS) $(CLOCKOBJS) $(LIBS) -lm + $(CC) -o taskbench $(TASKOBJS) $(LDFLAGS) $(OMPFLAG) $(CLOCKOBJS) $(LIBS) -lm -clean: - -rm -rf *.o syncbench schedbench arraybench_* taskbench +clean: + -rm -rf *.o *.xray *.map syncbench schedbench arraybench_* taskbench veryclean: clean -rm -rf OpenMPBench.* *.all diff --git a/hermit/usr/openmpbench/common.c b/hermit/usr/openmpbench/common.c index 9267729d1..816795c56 100644 --- a/hermit/usr/openmpbench/common.c +++ b/hermit/usr/openmpbench/common.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "common.h" @@ -276,6 +277,8 @@ void reference(char *name, void (*refer)(void)) { int k; double start; + XRayAnnotate("name='%s'", name); + // Calculate the required number of innerreps innerreps = getinnerreps(refer); @@ -319,6 +322,8 @@ void benchmark(char *name, void (*test)(void)) intitest(name); + XRayAnnotate("name='%s'", name); + for (k=0; k<=outerreps; k++) { start = getclock(); test(); diff --git a/hermit/usr/openmpbench/syncbench.c b/hermit/usr/openmpbench/syncbench.c index b8f0d9ae6..6fa88de7f 100644 --- a/hermit/usr/openmpbench/syncbench.c +++ b/hermit/usr/openmpbench/syncbench.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "common.h" #include "syncbench.h" @@ -42,6 +43,12 @@ omp_lock_t lock; int main(int argc, char **argv) { + struct XRayTraceCapture* trace = XRayInit( + 20, // max. call depth + 32 * 1000 * 1000, // memory for report + 13, // frame count + "/hermit/usr/openmpbench/syncbench.map"); + // Start Paraver tracing #ifdef PARAVERTRACE Extrae_init(); @@ -52,48 +59,80 @@ int main(int argc, char **argv) { omp_init_lock(&lock); /* GENERATE REFERENCE TIME */ - reference("reference time 1", &refer); + XRayStartFrame(trace); + reference("reference time 1", &refer); + XRayEndFrame(trace); /* TEST PARALLEL REGION */ + XRayStartFrame(trace); benchmark("PARALLEL", &testpr); + XRayEndFrame(trace); /* TEST FOR */ - benchmark("FOR", &testfor); + XRayStartFrame(trace); + benchmark("FOR", &testfor); + XRayEndFrame(trace); /* TEST PARALLEL FOR */ - benchmark("PARALLEL FOR", &testpfor); + XRayStartFrame(trace); + benchmark("PARALLEL FOR", &testpfor); + XRayEndFrame(trace); /* TEST BARRIER */ + XRayStartFrame(trace); benchmark("BARRIER", &testbar); + XRayEndFrame(trace); /* TEST SINGLE */ + XRayStartFrame(trace); benchmark("SINGLE", &testsing); + XRayEndFrame(trace); /* TEST CRITICAL*/ + XRayStartFrame(trace); benchmark("CRITICAL", &testcrit); + XRayEndFrame(trace); /* TEST LOCK/UNLOCK */ + XRayStartFrame(trace); benchmark("LOCK/UNLOCK", &testlock); + XRayEndFrame(trace); /* TEST ORDERED SECTION */ + XRayStartFrame(trace); benchmark("ORDERED", &testorder); + XRayEndFrame(trace); /* GENERATE NEW REFERENCE TIME */ + XRayStartFrame(trace); reference("reference time 2", &referatom); + XRayEndFrame(trace); /* TEST ATOMIC */ + XRayStartFrame(trace); benchmark("ATOMIC", &testatom); + XRayEndFrame(trace); /* GENERATE NEW REFERENCE TIME */ + XRayStartFrame(trace); reference("reference time 3", &referred); + XRayEndFrame(trace); /* TEST REDUCTION (1 var) */ + XRayStartFrame(trace); benchmark("REDUCTION", &testred); + XRayEndFrame(trace); #ifdef PARAVERTRACE Extrae_fini(); #endif + XRaySaveReport(trace, + "/hermit/usr/openmpbench/syncbench.xray", // report file + 0.05f, // Only output funcs that have higher runtime [%] + 1000); // Only output funcs that have higher runtime [cycles] + XRayShutdown(trace); + finalise(); return EXIT_SUCCESS; @@ -132,6 +171,11 @@ void referred() { void testpr() { int j; +#ifdef XRAY + static int n = 1; + XRayAnnotate("n = %i", n); + n++; +#endif for (j = 0; j < innerreps; j++) { #pragma omp parallel { @@ -155,6 +199,11 @@ void testfor() { void testpfor() { int i, j; +#ifdef XRAY + static int n = 1; + XRayAnnotate("n = %i", n); + n++; +#endif for (j = 0; j < innerreps; j++) { #pragma omp parallel for for (i = 0; i < nthreads; i++) {