1
0
Fork 0
mirror of https://github.com/hermitcore/libhermit.git synced 2025-03-09 00:00:03 +01:00
libhermit/usr/openmpbench/common.c

380 lines
11 KiB
C
Raw Permalink Normal View History

/****************************************************************************
* *
* OpenMP MicroBenchmark Suite - Version 3.1 *
* *
* produced by *
* *
* Mark Bull, Fiona Reid and Nix Mc Donnell *
* *
* at *
* *
* Edinburgh Parallel Computing Centre *
* *
* email: markb@epcc.ed.ac.uk or fiona@epcc.ed.ac.uk *
* *
* *
* This version copyright (c) The University of Edinburgh, 2015. *
* *
* *
* Licensed under the Apache License, Version 2.0 (the "License"); *
* you may not use this file except in compliance with the License. *
* You may obtain a copy of the License at *
* *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, software *
* distributed under the License is distributed on an "AS IS" BASIS, *
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
* See the License for the specific language governing permissions and *
* limitations under the License. *
* *
****************************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <omp.h>
#include <xray.h>
#include "common.h"
#define CONF95 1.96
int nthreads = -1; // Number of OpenMP threads
int delaylength = -1; // The number of iterations to delay for
int outerreps = -1; // Outer repetitions
double delaytime = -1.0; // Length of time to delay for in microseconds
double targettesttime = 0.0; // The length of time in microseconds that the test
// should run for.
unsigned long innerreps; // Inner repetitions
double *times; // Array of doubles storing the benchmark times in microseconds
double referencetime; // The average reference time in microseconds to perform
// outerreps runs
double referencesd; // The standard deviation in the reference time in
// microseconds for outerreps runs.
double testtime; // The average test time in microseconds for
// outerreps runs
double testsd; // The standard deviation in the test time in
// microseconds for outerreps runs.
void usage(char *argv[]) {
printf("Usage: %s.x \n"
"\t--outer-repetitions <outer-repetitions> (default %d)\n"
"\t--test-time <target-test-time> (default %0.2f microseconds)\n"
"\t--delay-time <delay-time> (default %0.4f microseconds)\n"
"\t--delay-length <delay-length> "
"(default auto-generated based on processor speed)\n",
argv[0],
DEFAULT_OUTER_REPS, DEFAULT_TEST_TARGET_TIME, DEFAULT_DELAY_TIME);
}
void parse_args(int argc, char *argv[]) {
// Parse the parameters
int arg;
for (arg = 1; arg < argc; arg++) {
if (strcmp(argv[arg], "--delay-time") == 0.0) {
delaytime = atof(argv[++arg]);
if (delaytime == 0.0) {
printf("Invalid float:--delay-time: %s\n", argv[arg]);
usage(argv);
exit(EXIT_FAILURE);
}
} else if (strcmp(argv[arg], "--outer-repetitions") == 0) {
outerreps = atoi(argv[++arg]);
if (outerreps == 0) {
printf("Invalid integer:--outer-repetitions: %s\n", argv[arg]);
usage(argv);
exit(EXIT_FAILURE);
}
} else if (strcmp(argv[arg], "--test-time") == 0) {
targettesttime = atof(argv[++arg]);
if (targettesttime == 0) {
printf("Invalid integer:--test-time: %s\n", argv[arg]);
usage(argv);
exit(EXIT_FAILURE);
}
} else if (strcmp(argv[arg], "-h") == 0) {
usage(argv);
exit(EXIT_SUCCESS);
} else {
printf("Invalid parameters: %s\n", argv[arg]);
usage(argv);
exit(EXIT_FAILURE);
}
}
}
int getdelaylengthfromtime(double delaytime) {
int i, reps;
double lapsedtime, starttime; // seconds
reps = 1000;
lapsedtime = 0.0;
delaytime = delaytime/1.0E6; // convert from microseconds to seconds
// Note: delaytime is local to this function and thus the conversion
// does not propagate to the main code.
// Here we want to use the delaytime in microseconds to find the
// delaylength in iterations. We start with delaylength=0 and
// increase until we get a large enough delaytime, return delaylength
// in iterations.
delaylength = 0;
delay(delaylength);
while (lapsedtime < delaytime) {
delaylength = delaylength * 1.1 + 1;
starttime = getclock();
for (i = 0; i < reps; i++) {
delay(delaylength);
}
lapsedtime = (getclock() - starttime) / (double) reps;
}
return delaylength;
}
unsigned long getinnerreps(void (*test)(void)) {
innerreps = 10L; // some initial value
double time = 0.0;
while (time < targettesttime) {
double start = getclock();
test();
time = (getclock() - start) * 1.0e6;
innerreps *=2;
// Test to stop code if compiler is optimising reference time expressions away
if (innerreps > (targettesttime*1.0e15)) {
printf("Compiler has optimised reference loop away, STOP! \n");
printf("Try recompiling with lower optimisation level \n");
exit(1);
}
}
return innerreps;
}
void printheader(char *name) {
printf("\n");
printf("--------------------------------------------------------\n");
printf("Computing %s time using %lu reps\n", name, innerreps);
}
void stats(double *mtp, double *sdp) {
double meantime, totaltime, sumsq, mintime, maxtime, sd, cutoff;
int i, nr;
mintime = 1.0e10;
maxtime = 0.;
totaltime = 0.;
for (i = 1; i <= outerreps; i++) {
mintime = (mintime < times[i]) ? mintime : times[i];
maxtime = (maxtime > times[i]) ? maxtime : times[i];
totaltime += times[i];
}
meantime = totaltime / outerreps;
sumsq = 0;
for (i = 1; i <= outerreps; i++) {
sumsq += (times[i] - meantime) * (times[i] - meantime);
}
sd = sqrt(sumsq / (outerreps - 1));
cutoff = 3.0 * sd;
nr = 0;
for (i = 1; i <= outerreps; i++) {
if (fabs(times[i] - meantime) > cutoff)
nr++;
}
printf("\n");
printf("Sample_size Average Min Max S.D. Outliers\n");
printf(" %d %f %f %f %f %d\n",
outerreps, meantime, mintime, maxtime, sd, nr);
printf("\n");
*mtp = meantime;
*sdp = sd;
}
void printfooter(char *name, double testtime, double testsd,
double referencetime, double refsd) {
printf("%s time = %f microseconds +/- %f\n",
name, testtime, CONF95*testsd);
printf("%s overhead = %f microseconds +/- %f\n",
name, testtime-referencetime, CONF95*(testsd+referencesd));
}
void printreferencefooter(char *name, double referencetime, double referencesd) {
printf("%s time = %f microseconds +/- %f\n",
name, referencetime, CONF95 * referencesd);
}
void init(int argc, char **argv)
{
#pragma omp parallel
{
#pragma omp master
{
nthreads = omp_get_num_threads();
}
}
parse_args(argc, argv);
if (outerreps == -1) {
outerreps = DEFAULT_OUTER_REPS;
}
if (targettesttime == 0.0) {
targettesttime = DEFAULT_TEST_TARGET_TIME;
}
if (delaytime == -1.0) {
delaytime = DEFAULT_DELAY_TIME;
}
delaylength = getdelaylengthfromtime(delaytime); // Always need to compute delaylength in iterations
times = malloc((outerreps+1) * sizeof(double));
printf("Running OpenMP benchmark version 3.0\n"
"\t%d thread(s)\n"
"\t%d outer repetitions\n"
"\t%0.2f test time (microseconds)\n"
"\t%d delay length (iterations) \n"
"\t%f delay time (microseconds)\n",
nthreads,
outerreps, targettesttime,
delaylength, delaytime);
}
void finalise(void) {
free(times);
}
void initreference(char *name) {
printheader(name);
}
/* Calculate the reference time. */
void reference(char *name, void (*refer)(void)) {
int k;
double start;
2016-05-18 11:55:37 +02:00
XRayLabelFrame(name);
// Calculate the required number of innerreps
innerreps = getinnerreps(refer);
initreference(name);
for (k = 0; k <= outerreps; k++) {
start = getclock();
refer();
times[k] = (getclock() - start) * 1.0e6 / (double) innerreps;
}
finalisereference(name);
}
void finalisereference(char *name) {
stats(&referencetime, &referencesd);
printreferencefooter(name, referencetime, referencesd);
}
void intitest(char *name) {
printheader(name);
}
void finalisetest(char *name) {
stats(&testtime, &testsd);
printfooter(name, testtime, testsd, referencetime, referencesd);
}
/* Function to run a microbenchmark test*/
void benchmark(char *name, void (*test)(void))
{
int k;
double start;
// Calculate the required number of innerreps
innerreps = getinnerreps(test);
intitest(name);
2016-05-18 11:55:37 +02:00
XRayLabelFrame(name);
for (k=0; k<=outerreps; k++) {
start = getclock();
test();
times[k] = (getclock() - start) * 1.0e6 / (double) innerreps;
}
finalisetest(name);
}
// For the Cray compiler on HECToR we need to turn off optimisation
// for the delay and array_delay functions. Other compilers should
// not be afffected.
#pragma _CRI noopt
void delay(int delaylength) {
int i;
float a = 0.;
for (i = 0; i < delaylength; i++)
a += i;
if (a < 0)
printf("%f \n", a);
}
void array_delay(int delaylength, double a[1]) {
int i;
a[0] = 1.0;
for (i = 0; i < delaylength; i++)
a[0] += i;
if (a[0] < 0)
printf("%f \n", a[0]);
}
// Re-enable optimisation for remainder of source.
#pragma _CRI opt
double getclock() {
double time;
// Returns a value in seconds of the time elapsed from some arbitrary,
// but consistent point.
double omp_get_wtime(void);
time = omp_get_wtime();
return time;
}
int returnfalse() {
return 0;
}