mirror of
https://github.com/hermitcore/libhermit.git
synced 2025-03-09 00:00:03 +01:00
302 lines
7 KiB
C
302 lines
7 KiB
C
/****************************************************************************
|
|
* *
|
|
* OpenMP MicroBenchmark Suite - Version 3.1 *
|
|
* *
|
|
* produced by *
|
|
* *
|
|
* Mark Bull, Fiona Reid and Nix Mc Donnell *
|
|
* *
|
|
* at *
|
|
* *
|
|
* Edinburgh Parallel Computing Centre *
|
|
* *
|
|
* email: markb@epcc.ed.ac.uk or fiona@epcc.ed.ac.uk *
|
|
* *
|
|
* *
|
|
* This version copyright (c) The University of Edinburgh, 2015. *
|
|
* *
|
|
* *
|
|
* Licensed under the Apache License, Version 2.0 (the "License"); *
|
|
* you may not use this file except in compliance with the License. *
|
|
* You may obtain a copy of the License at *
|
|
* *
|
|
* http://www.apache.org/licenses/LICENSE-2.0 *
|
|
* *
|
|
* Unless required by applicable law or agreed to in writing, software *
|
|
* distributed under the License is distributed on an "AS IS" BASIS, *
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
|
|
* See the License for the specific language governing permissions and *
|
|
* limitations under the License. *
|
|
* *
|
|
****************************************************************************/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <math.h>
|
|
#include <omp.h>
|
|
#include <xray.h>
|
|
|
|
#include "common.h"
|
|
#include "syncbench.h"
|
|
|
|
omp_lock_t lock;
|
|
|
|
int main(int argc, char **argv) {
|
|
|
|
struct XRayTraceCapture* trace = XRayInit(
|
|
20, // max. call depth
|
|
16 * 1000 * 1000, // memory for report
|
|
13, // frame count
|
|
"syncbench.map");
|
|
|
|
// Start Paraver tracing
|
|
#ifdef PARAVERTRACE
|
|
Extrae_init();
|
|
#endif
|
|
|
|
init(argc, argv);
|
|
|
|
omp_init_lock(&lock);
|
|
|
|
/* GENERATE REFERENCE TIME */
|
|
XRayStartFrame(trace);
|
|
reference("reference time 1", &refer);
|
|
XRayEndFrame(trace);
|
|
|
|
/* TEST PARALLEL REGION */
|
|
XRayStartFrame(trace);
|
|
benchmark("PARALLEL", &testpr);
|
|
XRayEndFrame(trace);
|
|
|
|
/* TEST FOR */
|
|
XRayStartFrame(trace);
|
|
benchmark("FOR", &testfor);
|
|
XRayEndFrame(trace);
|
|
|
|
/* TEST PARALLEL FOR */
|
|
XRayStartFrame(trace);
|
|
benchmark("PARALLEL FOR", &testpfor);
|
|
XRayEndFrame(trace);
|
|
|
|
/* TEST BARRIER */
|
|
XRayStartFrame(trace);
|
|
benchmark("BARRIER", &testbar);
|
|
XRayEndFrame(trace);
|
|
|
|
/* TEST SINGLE */
|
|
XRayStartFrame(trace);
|
|
benchmark("SINGLE", &testsing);
|
|
XRayEndFrame(trace);
|
|
|
|
/* TEST CRITICAL*/
|
|
XRayStartFrame(trace);
|
|
benchmark("CRITICAL", &testcrit);
|
|
XRayEndFrame(trace);
|
|
|
|
/* TEST LOCK/UNLOCK */
|
|
XRayStartFrame(trace);
|
|
benchmark("LOCK/UNLOCK", &testlock);
|
|
XRayEndFrame(trace);
|
|
|
|
/* TEST ORDERED SECTION */
|
|
XRayStartFrame(trace);
|
|
benchmark("ORDERED", &testorder);
|
|
XRayEndFrame(trace);
|
|
|
|
/* GENERATE NEW REFERENCE TIME */
|
|
XRayStartFrame(trace);
|
|
reference("reference time 2", &referatom);
|
|
XRayEndFrame(trace);
|
|
|
|
/* TEST ATOMIC */
|
|
XRayStartFrame(trace);
|
|
benchmark("ATOMIC", &testatom);
|
|
XRayEndFrame(trace);
|
|
|
|
/* GENERATE NEW REFERENCE TIME */
|
|
XRayStartFrame(trace);
|
|
reference("reference time 3", &referred);
|
|
XRayEndFrame(trace);
|
|
|
|
/* TEST REDUCTION (1 var) */
|
|
XRayStartFrame(trace);
|
|
benchmark("REDUCTION", &testred);
|
|
XRayEndFrame(trace);
|
|
|
|
#ifdef PARAVERTRACE
|
|
Extrae_fini();
|
|
#endif
|
|
|
|
XRaySaveReport(trace,
|
|
"syncbench.xray", // report file
|
|
0.05f, // Only output funcs that have higher runtime [%]
|
|
1000); // Only output funcs that have higher runtime [cycles]
|
|
XRayShutdown(trace);
|
|
|
|
finalise();
|
|
|
|
return EXIT_SUCCESS;
|
|
}
|
|
|
|
void refer() {
|
|
int j;
|
|
for (j = 0; j < innerreps; j++) {
|
|
delay(delaylength);
|
|
}
|
|
}
|
|
|
|
void referatom(){
|
|
int j;
|
|
double aaaa = 0.0;
|
|
double epsilon = 1.0e-15;
|
|
double b, c;
|
|
b = 1.0;
|
|
c = (1.0 + epsilon);
|
|
for (j = 0; j < innerreps; j++) {
|
|
aaaa += b;
|
|
b *= c;
|
|
}
|
|
if (aaaa < 0.0)
|
|
printf("%f\n", aaaa);
|
|
}
|
|
|
|
void referred() {
|
|
int j;
|
|
int aaaa = 0;
|
|
for (j = 0; j < innerreps; j++) {
|
|
delay(delaylength);
|
|
aaaa += 1;
|
|
}
|
|
}
|
|
|
|
void testpr() {
|
|
int j;
|
|
#ifdef XRAY
|
|
static int n = 1;
|
|
XRayAnnotate("n = %i", n);
|
|
n++;
|
|
#endif
|
|
for (j = 0; j < innerreps; j++) {
|
|
#pragma omp parallel
|
|
{
|
|
delay(delaylength);
|
|
}
|
|
}
|
|
}
|
|
|
|
void testfor() {
|
|
int i, j;
|
|
#pragma omp parallel private(j)
|
|
{
|
|
for (j = 0; j < innerreps; j++) {
|
|
#pragma omp for
|
|
for (i = 0; i < nthreads; i++) {
|
|
delay(delaylength);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void testpfor() {
|
|
int i, j;
|
|
#ifdef XRAY
|
|
static int n = 1;
|
|
XRayAnnotate("n = %i", n);
|
|
n++;
|
|
#endif
|
|
for (j = 0; j < innerreps; j++) {
|
|
#pragma omp parallel for
|
|
for (i = 0; i < nthreads; i++) {
|
|
delay(delaylength);
|
|
}
|
|
}
|
|
}
|
|
|
|
void testbar() {
|
|
int j;
|
|
#pragma omp parallel private(j)
|
|
{
|
|
for (j = 0; j < innerreps; j++) {
|
|
delay(delaylength);
|
|
#pragma omp barrier
|
|
}
|
|
}
|
|
}
|
|
|
|
void testsing() {
|
|
int j;
|
|
#pragma omp parallel private(j)
|
|
{
|
|
for (j = 0; j < innerreps; j++) {
|
|
#pragma omp single
|
|
delay(delaylength);
|
|
}
|
|
}
|
|
}
|
|
|
|
void testcrit() {
|
|
int j;
|
|
#pragma omp parallel private(j)
|
|
{
|
|
for (j = 0; j < innerreps / nthreads; j++) {
|
|
#pragma omp critical
|
|
{
|
|
delay(delaylength);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void testlock() {
|
|
int j;
|
|
|
|
#pragma omp parallel private(j)
|
|
{
|
|
for (j = 0; j < innerreps / nthreads; j++) {
|
|
omp_set_lock(&lock);
|
|
delay(delaylength);
|
|
omp_unset_lock(&lock);
|
|
}
|
|
}
|
|
}
|
|
|
|
void testorder() {
|
|
int j;
|
|
#pragma omp parallel for ordered schedule (static,1)
|
|
for (j = 0; j < (int)innerreps; j++) {
|
|
#pragma omp ordered
|
|
delay(delaylength);
|
|
}
|
|
}
|
|
|
|
void testatom() {
|
|
int j;
|
|
double aaaa = 0.0;
|
|
double epsilon = 1.0e-15;
|
|
double b,c;
|
|
b = 1.0;
|
|
c = (1.0 + epsilon);
|
|
#pragma omp parallel private(j) firstprivate(b)
|
|
{
|
|
for (j = 0; j < innerreps / nthreads; j++) {
|
|
#pragma omp atomic
|
|
aaaa += b;
|
|
b *= c;
|
|
}
|
|
}
|
|
if (aaaa < 0.0)
|
|
printf("%f\n", aaaa);
|
|
}
|
|
|
|
void testred() {
|
|
int j;
|
|
int aaaa = 0;
|
|
for (j = 0; j < innerreps; j++) {
|
|
#pragma omp parallel reduction(+:aaaa)
|
|
{
|
|
delay(delaylength);
|
|
aaaa += 1;
|
|
}
|
|
}
|
|
}
|
|
|