mirror of
https://github.com/hermitcore/libhermit.git
synced 2025-03-09 00:00:03 +01:00
openmpbench: implement profiling for syncbench and also change linker order
This commit is contained in:
parent
20f6597ef9
commit
6e0a11373d
3 changed files with 66 additions and 9 deletions
|
@ -18,8 +18,11 @@ all: syncbench schedbench taskbench
|
|||
|
||||
prog: arraybench_$(IDA)
|
||||
|
||||
# We need to generate a linker map file so that Xray can resolve function names
|
||||
LDFLAGS += -Wl,-Map=$@.map
|
||||
|
||||
syncbench: $(SYNCOBJS)
|
||||
$(CC) -o syncbench $(LDFLAGS) $(SYNCOBJS) $(CLOCKOBJS) $(LIBS) -lm
|
||||
$(CC) -o syncbench $(SYNCOBJS) $(LDFLAGS) $(CLOCKOBJS) $(LIBS) -lm
|
||||
|
||||
# Rule to ensure the lower optimisation level is picked up for common.c
|
||||
# with the Cray compiler
|
||||
|
@ -32,7 +35,7 @@ common_sched.o:
|
|||
${CC} ${CFLAGS_CRAY} $(SCHEDFLAGS) $(OMPFLAG) -o common_sched.o -c common.c
|
||||
|
||||
schedbench: $(SCHEDOBJS)
|
||||
$(CC) -o schedbench $(LDFLAGS) $(SCHEDOBJS) $(CLOCKOBJS) $(LIBS) -lm
|
||||
$(CC) -o schedbench $(SCHEDOBJS) $(LDFLAGS) $(CLOCKOBJS) $(LIBS) -lm
|
||||
|
||||
# Multiple header files due to multiple array sizes, makes header file arraybench_*.h
|
||||
arraybench_$(IDA).h: arraybench.h
|
||||
|
@ -44,13 +47,13 @@ arraybench_$(IDA).o: arraybench_$(IDA).h arraybench.c
|
|||
|
||||
# Multiple executables due to multiple array sizes, makes exe file arraybench_*
|
||||
arraybench_$(IDA): $(ARRAYOBJS) $(CLOCKOBJS) arraybench.c
|
||||
$(CC) $(LDFLAGS) $(ARRAYOBJS) $(CLOCKOBJS) $(LIBS) -lm -o $@
|
||||
$(CC) -o $@ $(LDFLAGS) $(ARRAYOBJS) $(CLOCKOBJS) $(LIBS) -lm
|
||||
|
||||
taskbench: $(TASKOBJS)
|
||||
$(CC) -o taskbench $(LDFLAGS) $(OMPFLAG) $(TASKOBJS) $(CLOCKOBJS) $(LIBS) -lm
|
||||
$(CC) -o taskbench $(TASKOBJS) $(LDFLAGS) $(OMPFLAG) $(CLOCKOBJS) $(LIBS) -lm
|
||||
|
||||
clean:
|
||||
-rm -rf *.o syncbench schedbench arraybench_* taskbench
|
||||
clean:
|
||||
-rm -rf *.o *.xray *.map syncbench schedbench arraybench_* taskbench
|
||||
|
||||
veryclean: clean
|
||||
-rm -rf OpenMPBench.* *.all
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <omp.h>
|
||||
#include <xray.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
|
@ -276,6 +277,8 @@ void reference(char *name, void (*refer)(void)) {
|
|||
int k;
|
||||
double start;
|
||||
|
||||
XRayAnnotate("name='%s'", name);
|
||||
|
||||
// Calculate the required number of innerreps
|
||||
innerreps = getinnerreps(refer);
|
||||
|
||||
|
@ -319,6 +322,8 @@ void benchmark(char *name, void (*test)(void))
|
|||
|
||||
intitest(name);
|
||||
|
||||
XRayAnnotate("name='%s'", name);
|
||||
|
||||
for (k=0; k<=outerreps; k++) {
|
||||
start = getclock();
|
||||
test();
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <omp.h>
|
||||
#include <xray.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "syncbench.h"
|
||||
|
@ -42,6 +43,12 @@ omp_lock_t lock;
|
|||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
struct XRayTraceCapture* trace = XRayInit(
|
||||
20, // max. call depth
|
||||
32 * 1000 * 1000, // memory for report
|
||||
13, // frame count
|
||||
"/hermit/usr/openmpbench/syncbench.map");
|
||||
|
||||
// Start Paraver tracing
|
||||
#ifdef PARAVERTRACE
|
||||
Extrae_init();
|
||||
|
@ -52,48 +59,80 @@ int main(int argc, char **argv) {
|
|||
omp_init_lock(&lock);
|
||||
|
||||
/* GENERATE REFERENCE TIME */
|
||||
reference("reference time 1", &refer);
|
||||
XRayStartFrame(trace);
|
||||
reference("reference time 1", &refer);
|
||||
XRayEndFrame(trace);
|
||||
|
||||
/* TEST PARALLEL REGION */
|
||||
XRayStartFrame(trace);
|
||||
benchmark("PARALLEL", &testpr);
|
||||
XRayEndFrame(trace);
|
||||
|
||||
/* TEST FOR */
|
||||
benchmark("FOR", &testfor);
|
||||
XRayStartFrame(trace);
|
||||
benchmark("FOR", &testfor);
|
||||
XRayEndFrame(trace);
|
||||
|
||||
/* TEST PARALLEL FOR */
|
||||
benchmark("PARALLEL FOR", &testpfor);
|
||||
XRayStartFrame(trace);
|
||||
benchmark("PARALLEL FOR", &testpfor);
|
||||
XRayEndFrame(trace);
|
||||
|
||||
/* TEST BARRIER */
|
||||
XRayStartFrame(trace);
|
||||
benchmark("BARRIER", &testbar);
|
||||
XRayEndFrame(trace);
|
||||
|
||||
/* TEST SINGLE */
|
||||
XRayStartFrame(trace);
|
||||
benchmark("SINGLE", &testsing);
|
||||
XRayEndFrame(trace);
|
||||
|
||||
/* TEST CRITICAL*/
|
||||
XRayStartFrame(trace);
|
||||
benchmark("CRITICAL", &testcrit);
|
||||
XRayEndFrame(trace);
|
||||
|
||||
/* TEST LOCK/UNLOCK */
|
||||
XRayStartFrame(trace);
|
||||
benchmark("LOCK/UNLOCK", &testlock);
|
||||
XRayEndFrame(trace);
|
||||
|
||||
/* TEST ORDERED SECTION */
|
||||
XRayStartFrame(trace);
|
||||
benchmark("ORDERED", &testorder);
|
||||
XRayEndFrame(trace);
|
||||
|
||||
/* GENERATE NEW REFERENCE TIME */
|
||||
XRayStartFrame(trace);
|
||||
reference("reference time 2", &referatom);
|
||||
XRayEndFrame(trace);
|
||||
|
||||
/* TEST ATOMIC */
|
||||
XRayStartFrame(trace);
|
||||
benchmark("ATOMIC", &testatom);
|
||||
XRayEndFrame(trace);
|
||||
|
||||
/* GENERATE NEW REFERENCE TIME */
|
||||
XRayStartFrame(trace);
|
||||
reference("reference time 3", &referred);
|
||||
XRayEndFrame(trace);
|
||||
|
||||
/* TEST REDUCTION (1 var) */
|
||||
XRayStartFrame(trace);
|
||||
benchmark("REDUCTION", &testred);
|
||||
XRayEndFrame(trace);
|
||||
|
||||
#ifdef PARAVERTRACE
|
||||
Extrae_fini();
|
||||
#endif
|
||||
|
||||
XRaySaveReport(trace,
|
||||
"/hermit/usr/openmpbench/syncbench.xray", // report file
|
||||
0.05f, // Only output funcs that have higher runtime [%]
|
||||
1000); // Only output funcs that have higher runtime [cycles]
|
||||
XRayShutdown(trace);
|
||||
|
||||
finalise();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
|
@ -132,6 +171,11 @@ void referred() {
|
|||
|
||||
void testpr() {
|
||||
int j;
|
||||
#ifdef XRAY
|
||||
static int n = 1;
|
||||
XRayAnnotate("n = %i", n);
|
||||
n++;
|
||||
#endif
|
||||
for (j = 0; j < innerreps; j++) {
|
||||
#pragma omp parallel
|
||||
{
|
||||
|
@ -155,6 +199,11 @@ void testfor() {
|
|||
|
||||
void testpfor() {
|
||||
int i, j;
|
||||
#ifdef XRAY
|
||||
static int n = 1;
|
||||
XRayAnnotate("n = %i", n);
|
||||
n++;
|
||||
#endif
|
||||
for (j = 0; j < innerreps; j++) {
|
||||
#pragma omp parallel for
|
||||
for (i = 0; i < nthreads; i++) {
|
||||
|
|
Loading…
Add table
Reference in a new issue