/**************************************************************************** * * * OpenMP MicroBenchmark Suite - Version 3.1 * * * * produced by * * * * Mark Bull, Fiona Reid and Nix Mc Donnell * * * * at * * * * Edinburgh Parallel Computing Centre * * * * email: markb@epcc.ed.ac.uk or fiona@epcc.ed.ac.uk * * * * * * This version copyright (c) The University of Edinburgh, 2015. * * * * * * Licensed under the Apache License, Version 2.0 (the "License"); * * you may not use this file except in compliance with the License. * * You may obtain a copy of the License at * * * * http://www.apache.org/licenses/LICENSE-2.0 * * * * Unless required by applicable law or agreed to in writing, software * * distributed under the License is distributed on an "AS IS" BASIS, * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * * limitations under the License. * * * ****************************************************************************/ #include #include #include #include #include #include #include "common.h" #define CONF95 1.96 int nthreads = -1; // Number of OpenMP threads int delaylength = -1; // The number of iterations to delay for int outerreps = -1; // Outer repetitions double delaytime = -1.0; // Length of time to delay for in microseconds double targettesttime = 0.0; // The length of time in microseconds that the test // should run for. unsigned long innerreps; // Inner repetitions double *times; // Array of doubles storing the benchmark times in microseconds double referencetime; // The average reference time in microseconds to perform // outerreps runs double referencesd; // The standard deviation in the reference time in // microseconds for outerreps runs. double testtime; // The average test time in microseconds for // outerreps runs double testsd; // The standard deviation in the test time in // microseconds for outerreps runs. void usage(char *argv[]) { printf("Usage: %s.x \n" "\t--outer-repetitions (default %d)\n" "\t--test-time (default %0.2f microseconds)\n" "\t--delay-time (default %0.4f microseconds)\n" "\t--delay-length " "(default auto-generated based on processor speed)\n", argv[0], DEFAULT_OUTER_REPS, DEFAULT_TEST_TARGET_TIME, DEFAULT_DELAY_TIME); } void parse_args(int argc, char *argv[]) { // Parse the parameters int arg; for (arg = 1; arg < argc; arg++) { if (strcmp(argv[arg], "--delay-time") == 0.0) { delaytime = atof(argv[++arg]); if (delaytime == 0.0) { printf("Invalid float:--delay-time: %s\n", argv[arg]); usage(argv); exit(EXIT_FAILURE); } } else if (strcmp(argv[arg], "--outer-repetitions") == 0) { outerreps = atoi(argv[++arg]); if (outerreps == 0) { printf("Invalid integer:--outer-repetitions: %s\n", argv[arg]); usage(argv); exit(EXIT_FAILURE); } } else if (strcmp(argv[arg], "--test-time") == 0) { targettesttime = atof(argv[++arg]); if (targettesttime == 0) { printf("Invalid integer:--test-time: %s\n", argv[arg]); usage(argv); exit(EXIT_FAILURE); } } else if (strcmp(argv[arg], "-h") == 0) { usage(argv); exit(EXIT_SUCCESS); } else { printf("Invalid parameters: %s\n", argv[arg]); usage(argv); exit(EXIT_FAILURE); } } } int getdelaylengthfromtime(double delaytime) { int i, reps; double lapsedtime, starttime; // seconds reps = 1000; lapsedtime = 0.0; delaytime = delaytime/1.0E6; // convert from microseconds to seconds // Note: delaytime is local to this function and thus the conversion // does not propagate to the main code. // Here we want to use the delaytime in microseconds to find the // delaylength in iterations. We start with delaylength=0 and // increase until we get a large enough delaytime, return delaylength // in iterations. delaylength = 0; delay(delaylength); while (lapsedtime < delaytime) { delaylength = delaylength * 1.1 + 1; starttime = getclock(); for (i = 0; i < reps; i++) { delay(delaylength); } lapsedtime = (getclock() - starttime) / (double) reps; } return delaylength; } unsigned long getinnerreps(void (*test)(void)) { innerreps = 10L; // some initial value double time = 0.0; while (time < targettesttime) { double start = getclock(); test(); time = (getclock() - start) * 1.0e6; innerreps *=2; // Test to stop code if compiler is optimising reference time expressions away if (innerreps > (targettesttime*1.0e15)) { printf("Compiler has optimised reference loop away, STOP! \n"); printf("Try recompiling with lower optimisation level \n"); exit(1); } } return innerreps; } void printheader(char *name) { printf("\n"); printf("--------------------------------------------------------\n"); printf("Computing %s time using %lu reps\n", name, innerreps); } void stats(double *mtp, double *sdp) { double meantime, totaltime, sumsq, mintime, maxtime, sd, cutoff; int i, nr; mintime = 1.0e10; maxtime = 0.; totaltime = 0.; for (i = 1; i <= outerreps; i++) { mintime = (mintime < times[i]) ? mintime : times[i]; maxtime = (maxtime > times[i]) ? maxtime : times[i]; totaltime += times[i]; } meantime = totaltime / outerreps; sumsq = 0; for (i = 1; i <= outerreps; i++) { sumsq += (times[i] - meantime) * (times[i] - meantime); } sd = sqrt(sumsq / (outerreps - 1)); cutoff = 3.0 * sd; nr = 0; for (i = 1; i <= outerreps; i++) { if (fabs(times[i] - meantime) > cutoff) nr++; } printf("\n"); printf("Sample_size Average Min Max S.D. Outliers\n"); printf(" %d %f %f %f %f %d\n", outerreps, meantime, mintime, maxtime, sd, nr); printf("\n"); *mtp = meantime; *sdp = sd; } void printfooter(char *name, double testtime, double testsd, double referencetime, double refsd) { printf("%s time = %f microseconds +/- %f\n", name, testtime, CONF95*testsd); printf("%s overhead = %f microseconds +/- %f\n", name, testtime-referencetime, CONF95*(testsd+referencesd)); } void printreferencefooter(char *name, double referencetime, double referencesd) { printf("%s time = %f microseconds +/- %f\n", name, referencetime, CONF95 * referencesd); } void init(int argc, char **argv) { #pragma omp parallel { #pragma omp master { nthreads = omp_get_num_threads(); } } parse_args(argc, argv); if (outerreps == -1) { outerreps = DEFAULT_OUTER_REPS; } if (targettesttime == 0.0) { targettesttime = DEFAULT_TEST_TARGET_TIME; } if (delaytime == -1.0) { delaytime = DEFAULT_DELAY_TIME; } delaylength = getdelaylengthfromtime(delaytime); // Always need to compute delaylength in iterations times = malloc((outerreps+1) * sizeof(double)); printf("Running OpenMP benchmark version 3.0\n" "\t%d thread(s)\n" "\t%d outer repetitions\n" "\t%0.2f test time (microseconds)\n" "\t%d delay length (iterations) \n" "\t%f delay time (microseconds)\n", nthreads, outerreps, targettesttime, delaylength, delaytime); } void finalise(void) { free(times); } void initreference(char *name) { printheader(name); } /* Calculate the reference time. */ void reference(char *name, void (*refer)(void)) { int k; double start; XRayLabelFrame(name); // Calculate the required number of innerreps innerreps = getinnerreps(refer); initreference(name); for (k = 0; k <= outerreps; k++) { start = getclock(); refer(); times[k] = (getclock() - start) * 1.0e6 / (double) innerreps; } finalisereference(name); } void finalisereference(char *name) { stats(&referencetime, &referencesd); printreferencefooter(name, referencetime, referencesd); } void intitest(char *name) { printheader(name); } void finalisetest(char *name) { stats(&testtime, &testsd); printfooter(name, testtime, testsd, referencetime, referencesd); } /* Function to run a microbenchmark test*/ void benchmark(char *name, void (*test)(void)) { int k; double start; // Calculate the required number of innerreps innerreps = getinnerreps(test); intitest(name); XRayLabelFrame(name); for (k=0; k<=outerreps; k++) { start = getclock(); test(); times[k] = (getclock() - start) * 1.0e6 / (double) innerreps; } finalisetest(name); } // For the Cray compiler on HECToR we need to turn off optimisation // for the delay and array_delay functions. Other compilers should // not be afffected. #pragma _CRI noopt void delay(int delaylength) { int i; float a = 0.; for (i = 0; i < delaylength; i++) a += i; if (a < 0) printf("%f \n", a); } void array_delay(int delaylength, double a[1]) { int i; a[0] = 1.0; for (i = 0; i < delaylength; i++) a[0] += i; if (a[0] < 0) printf("%f \n", a[0]); } // Re-enable optimisation for remainder of source. #pragma _CRI opt double getclock() { double time; // Returns a value in seconds of the time elapsed from some arbitrary, // but consistent point. double omp_get_wtime(void); time = omp_get_wtime(); return time; } int returnfalse() { return 0; }