1
0
Fork 0
mirror of https://github.com/hermitcore/libhermit.git synced 2025-03-09 00:00:03 +01:00
libhermit/usr/ircce/RCCE.h

384 lines
14 KiB
C
Raw Permalink Normal View History

//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// [2010-10-25] added support for non-blocking send/recv operations
// - RCCE_isend(), ..._test(), ..._wait(), ..._push()
// - RCCE_irecv(), ..._test(), ..._wait(), ..._push()
// by Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
// [2012-09-10] added support for "tagged" flags
// - RCCE_send_tagged(), RCCE_recv_tagged(), RCCE_recv_probe_tagged()
// by Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
// [2015-10-18] port (i)RCCE to "HermitCore"
// by Stefan Lankes, Institute for Automation of Complex Power Systems
// RWTH Aachen University
#ifndef RCCE_H
#define RCCE_H
#include <stdlib.h>
#include <stdio.h>
#ifdef __hermit__
#define SCC
#define COPPERRIDGE
#define USE_REMOTE_PUT_LOCAL_GET
2015-11-11 21:51:06 +01:00
#define USE_PROBE_FLAGS
#undef SHMADD
#endif
#define _RCCE "1.0.13 release"
// #define USE_BYTE_FLAGS
// #define USE_FLAG_EXPERIMENTAL
// little trick to allow the application to be called "RCCE_APP" under
// OpenMP, and "main" otherwise
#define ABS(x) ((x > 0)?x:-x)
#if !defined(_OPENMP) || defined(__hermit__)
#define RCCE_APP main
#endif
// modify next line for Intel BareMetal, which supports stdout, but not stdferr
#define STDERR stdout
#ifdef __hermit__
#define LOG2_LINE_SIZE 6
#else
#define LOG2_LINE_SIZE 5
#endif
#define RCCE_LINE_SIZE (1<<LOG2_LINE_SIZE)
// RCCE_BUFF_SIZE_MAX is space per UE, which is half of the space per tile
2015-10-19 23:27:24 +02:00
#ifdef __hermit__
#define RCCE_BUFF_SIZE_MAX (64*1024)
#else
#define RCCE_BUFF_SIZE_MAX (1<<13)
2015-10-19 23:27:24 +02:00
#endif
#ifdef SHMADD
//64MB
//#define RCCE_SHM_SIZE_MAX 0x4000000
// 128MB
//#define RCCE_SHM_SIZE_MAX 0x8000000
// 256MB
//#define RCCE_SHM_SIZE_MAX 0x10000000
// 512MB
#define RCCE_SHM_SIZE_MAX 0x20000000
// 960MB
//#define RCCE_SHM_SIZE_MAX 0x3C000000
#else
#ifndef SCC_COUPLED_SYSTEMS
// 64MB
#define RCCE_SHM_SIZE_MAX (1<<26)
#else
// In Coupled Mode only 4MB
#define RCCE_SHM_SIZE_MAX (1<<22)
#endif
#endif
#ifdef __hermit__
#define RCCE_MAX_BOARDS 1
#define RCCE_MAXNP_PER_BOARD 8
#else
#define RCCE_MAX_BOARDS 2 /* allow up to 2 SCC boards for now */
#define RCCE_MAXNP_PER_BOARD 48
#endif
#define RCCE_MAXNP (RCCE_MAX_BOARDS * RCCE_MAXNP_PER_BOARD)
#define RCCE_SUCCESS 0
#define RCCE_PENDING -1
#define RCCE_RESERVED -2
#define RCCE_REJECTED -3
#define RCCE_ERROR_BASE 1234321
#define RCCE_ERROR_TARGET (RCCE_ERROR_BASE + 1)
#define RCCE_ERROR_SOURCE (RCCE_ERROR_BASE + 2)
#define RCCE_ERROR_ID (RCCE_ERROR_BASE + 3)
#define RCCE_ERROR_MESSAGE_LENGTH (RCCE_ERROR_BASE + 4)
#define RCCE_ERROR_FLAG_UNDEFINED (RCCE_ERROR_BASE + 5)
#define RCCE_ERROR_NUM_UES (RCCE_ERROR_BASE + 6)
#define RCCE_ERROR_DATA_OVERLAP (RCCE_ERROR_BASE + 7)
#define RCCE_ERROR_ALIGNMENT (RCCE_ERROR_BASE + 8)
#define RCCE_ERROR_DEBUG_FLAG (RCCE_ERROR_BASE + 9)
#define RCCE_ERROR_FLAG_NOT_IN_COMM_BUFFER (RCCE_ERROR_BASE + 10)
#define RCCE_ERROR_FLAG_STATUS_UNDEFINED (RCCE_ERROR_BASE + 11)
#define RCCE_ERROR_FLAG_NOT_ALLOCATED (RCCE_ERROR_BASE + 12)
#define RCCE_ERROR_VAL_UNDEFINED (RCCE_ERROR_BASE + 13)
#define RCCE_ERROR_INVALID_ERROR_CODE (RCCE_ERROR_BASE + 14)
#define RCCE_ERROR_RPC_NOT_ALLOCATED (RCCE_ERROR_BASE + 15)
#define RCCE_ERROR_RPC_INTERNAL (RCCE_ERROR_BASE + 16)
#define RCCE_ERROR_MULTIPLE_RPC_REQUESTS (RCCE_ERROR_BASE + 17)
#define RCCE_ERROR_FDIVIDER (RCCE_ERROR_BASE + 18)
#define RCCE_ERROR_FREQUENCY_EXCEEDED (RCCE_ERROR_BASE + 19)
#define RCCE_ERROR_NO_ACTIVE_RPC_REQUEST (RCCE_ERROR_BASE + 20)
#define RCCE_ERROR_STALE_RPC_REQUEST (RCCE_ERROR_BASE + 21)
#define RCCE_ERROR_COMM_UNDEFINED (RCCE_ERROR_BASE + 22)
#define RCCE_ERROR_ILLEGAL_OP (RCCE_ERROR_BASE + 23)
#define RCCE_ERROR_ILLEGAL_TYPE (RCCE_ERROR_BASE + 24)
#define RCCE_ERROR_MALLOC (RCCE_ERROR_BASE + 25)
#define RCCE_ERROR_COMM_INITIALIZED (RCCE_ERROR_BASE + 26)
#define RCCE_ERROR_CORE_NOT_IN_HOSTFILE (RCCE_ERROR_BASE + 27)
#define RCCE_ERROR_NO_MULTICAST_SUPPORT (RCCE_ERROR_BASE + 28)
#define RCCE_MAX_ERROR_STRING 45
#define RCCE_DEBUG_ALL 111111
#define RCCE_DEBUG_SYNCH 111444
#define RCCE_DEBUG_COMM 111555
#define RCCE_DEBUG_RPC 111666
#define RCCE_DEBUG_DEBUG 111888
#define RCCE_FLAG_SET 1
#define RCCE_FLAG_UNSET 0
#define RCCE_NUM_OPS 4
#define RCCE_OP_BASE 23232323
#define RCCE_SUM (RCCE_OP_BASE)
#define RCCE_MIN (RCCE_OP_BASE+1)
#define RCCE_MAX (RCCE_OP_BASE+2)
#define RCCE_PROD (RCCE_OP_BASE+3)
#define RCCE_TYPE_BASE 63636363
#define RCCE_INT (RCCE_TYPE_BASE)
#define RCCE_LONG (RCCE_TYPE_BASE+1)
#define RCCE_FLOAT (RCCE_TYPE_BASE+2)
#define RCCE_DOUBLE (RCCE_TYPE_BASE+3)
// MPB pointer type
typedef volatile unsigned char* t_vcharp;
#if (defined(SINGLEBITFLAGS) || defined(USE_BYTE_FLAGS)) && !defined(USE_FLAG_EXPERIMENTAL)
typedef struct {
int location; /* location of bit within line (0-255) */
t_vcharp flag_addr; /* address of byte containing flag inside cache line */
t_vcharp line_address; /* start of cache line containing flag */
} RCCE_FLAG;
#else
#ifdef USE_FLAG_EXPERIMENTAL
typedef volatile unsigned char *RCCE_FLAG;
#else
typedef volatile ssize_t *RCCE_FLAG;
#endif
#endif
#ifdef USE_FLAG_EXPERIMENTAL
typedef unsigned char RCCE_FLAG_STATUS;
#else
typedef ssize_t RCCE_FLAG_STATUS;
#endif
typedef struct {
int size;
int my_rank;
int initialized;
int member[RCCE_MAXNP];
#ifdef USE_FAT_BARRIER
RCCE_FLAG gather[RCCE_MAXNP];
#else
RCCE_FLAG gather;
#endif
RCCE_FLAG release;
volatile int cycle;
volatile int count;
int step;
int label;
} RCCE_COMM;
typedef struct _RCCE_SEND_REQUEST {
char *privbuf; // source buffer in local private memory (send buffer)
t_vcharp combuf; // intermediate buffer in MPB
size_t chunk; // size of MPB available for this message (bytes)
RCCE_FLAG *ready; // flag indicating whether receiver is ready
RCCE_FLAG *sent; // flag indicating whether message has been sent by source
size_t size; // size of message (bytes)
int dest; // UE that will receive the message
int copy; // set to 0 for synchronization only (no copying/sending)
void* tag; // additional tag?
int len; // length of additional tag
RCCE_FLAG *probe; // flag for probing for incoming messages
size_t wsize; // offset within send buffer when putting in "chunk" bytes
size_t remainder; // bytes remaining to be sent
size_t nbytes; // number of bytes to be sent in single RCCE_put call
char *bufptr; // running pointer inside privbuf for current location
int label; // jump/goto label for the reentrance of the respective poll function
int finished; // flag that indicates whether the request has already been finished
struct _RCCE_SEND_REQUEST *next;
} RCCE_SEND_REQUEST;
typedef struct _RCCE_RECV_REQUEST {
char *privbuf; // source buffer in local private memory (send buffer)
t_vcharp combuf; // intermediate buffer in MPB
size_t chunk; // size of MPB available for this message (bytes)
RCCE_FLAG *ready; // flag indicating whether receiver is ready
RCCE_FLAG *sent; // flag indicating whether message has been sent by source
size_t size; // size of message (bytes)
int source; // UE that will send the message
int copy; // set to 0 for cancel function
void* tag; // additional tag?
int len; // length of additional tag
RCCE_FLAG *probe; // flag for probing for incoming messages
size_t wsize; // offset within send buffer when putting in "chunk" bytes
size_t remainder; // bytes remaining to be sent
size_t nbytes; // number of bytes to be sent in single RCCE_put call
char *bufptr; // running pointer inside privbuf for current location
int label; // jump/goto label for the reentrance of the respective poll function
int finished; // flag that indicates whether the request has already been finished
struct _RCCE_RECV_REQUEST *next;
} RCCE_RECV_REQUEST;
typedef struct tree_s {
int parent; // UE of parent
int num_children;
int child[RCCE_MAXNP]; // UEs of children
} tree_t;
#ifdef RC_POWER_MANAGEMENT
typedef struct{
int release;
int old_voltage_level;
int new_voltage_level;
int old_frequency_divider;
int new_frequency_divider;
long long start_cycle;
} RCCE_REQUEST;
int RCCE_power_domain(void);
int RCCE_iset_power(int, RCCE_REQUEST *, int *, int *);
int RCCE_wait_power(RCCE_REQUEST *);
int RCCE_set_frequency_divider(int, int *);
int RCCE_power_domain_master(void);
int RCCE_power_domain_size(void);
#endif
int RCCE_init(int *, char***);
int RCCE_finalize(void);
double RCCE_wtime(void);
int RCCE_ue(void);
int RCCE_num_ues(void);
#ifdef SCC_COUPLED_SYSTEMS
int RCCE_dev(void);
int RCCE_dev_ue(void);
int RCCE_num_dev(void);
int RCCE_num_ues_dev(int);
int RCCE_ue_to_dev(int);
#endif
#ifdef GORY
t_vcharp RCCE_malloc(size_t);
t_vcharp RCCE_malloc_request(size_t, size_t *);
t_vcharp RCCE_palloc(size_t,int);
void RCCE_free(t_vcharp);
int RCCE_put(t_vcharp, t_vcharp, int, int);
int RCCE_get(t_vcharp, t_vcharp, int, int);
int RCCE_wait_until(RCCE_FLAG, RCCE_FLAG_STATUS);
int RCCE_test_flag(RCCE_FLAG, RCCE_FLAG_STATUS, int *);
int RCCE_flag_alloc(RCCE_FLAG *);
int RCCE_flag_free(RCCE_FLAG *);
int RCCE_flag_write(RCCE_FLAG *, RCCE_FLAG_STATUS, int);
int RCCE_flag_read(RCCE_FLAG, RCCE_FLAG_STATUS *, int);
int RCCE_flag_write_tagged(RCCE_FLAG *, RCCE_FLAG_STATUS, int, char*, int);
int RCCE_flag_read_tagged(RCCE_FLAG, RCCE_FLAG_STATUS *, int, char*, int);
int RCCE_send(char *, t_vcharp, size_t, RCCE_FLAG *, RCCE_FLAG *, size_t, int);
int RCCE_recv(char *, t_vcharp, size_t, RCCE_FLAG *, RCCE_FLAG *, size_t, int, RCCE_FLAG *);
int RCCE_recv_test(char *, t_vcharp, size_t, RCCE_FLAG *, RCCE_FLAG *, size_t, int, int *, RCCE_FLAG *);
#ifdef USE_FLAG_EXPERIMENTAL
int RCCE_put_flag(t_vcharp, t_vcharp, int, int);
int RCCE_get_flag(t_vcharp, t_vcharp, int, int);
#endif
#else
// standard non-gory functions:
t_vcharp RCCE_malloc(size_t);
int RCCE_flag_write(RCCE_FLAG *, RCCE_FLAG_STATUS, int);
int RCCE_flag_read(RCCE_FLAG, RCCE_FLAG_STATUS *, int);
int RCCE_send(char *, size_t, int);
int RCCE_recv(char *, size_t, int);
int RCCE_recv_test(char *, size_t, int, int *);
int RCCE_send_pipe(char *, size_t, int);
int RCCE_recv_pipe(char *, size_t, int);
int RCCE_send_mcast(char *, size_t);
int RCCE_recv_mcast(char *, size_t, int);
int RCCE_send_tagged(char *, size_t, int, void *, int);
int RCCE_recv_tagged(char *, size_t, int, void *, int);
int RCCE_recv_probe_tagged(int, int *, t_vcharp *, void *, int);
int RCCE_allreduce(char *, char *, int, int, int, RCCE_COMM);
int RCCE_reduce(char *, char *, int, int, int, int, RCCE_COMM);
int RCCE_bcast(char *, size_t, int, RCCE_COMM);
int RCCE_recv_probe(int, int *, t_vcharp *);
int RCCE_recv_cancel(size_t, int);
int RCCE_isend(char *, size_t, int, RCCE_SEND_REQUEST *);
int RCCE_isend_test(RCCE_SEND_REQUEST *, int *);
int RCCE_isend_wait(RCCE_SEND_REQUEST *);
int RCCE_isend_push(int);
int RCCE_irecv(char *, size_t, int, RCCE_RECV_REQUEST *);
int RCCE_irecv_test(RCCE_RECV_REQUEST *, int *);
int RCCE_irecv_wait(RCCE_RECV_REQUEST *);
int RCCE_irecv_push(int);
#endif
t_vcharp RCCE_shmalloc(size_t);
void RCCE_shfree(t_vcharp);
void RCCE_shflush(void);
t_vcharp RCCE_shrealloc(t_vcharp, size_t);
// LfBS-customized functions:
void* RCCE_memcpy_get(void *, const void *, size_t);
void* RCCE_memcpy_put(void *, const void *, size_t);
#define RCCE_memcpy(a,b,c) RCCE_memcpy_put(a,b,c)
int RCCE_comm_split(int (*)(int, void *), void *, RCCE_COMM *);
int RCCE_comm_free(RCCE_COMM *);
int RCCE_comm_size(RCCE_COMM, int *);
int RCCE_comm_rank(RCCE_COMM, int *);
void RCCE_fence(void);
int RCCE_barrier(RCCE_COMM *);
int RCCE_tree_init(RCCE_COMM *, tree_t *, int);
int RCCE_tree_barrier(RCCE_COMM *, tree_t *);
int RCCE_tournament_barrier(RCCE_COMM *);
int RCCE_tournament_fixed_barrier(RCCE_COMM *);
int RCCE_dissemination_barrier(RCCE_COMM *);
int RCCE_TNS_barrier(RCCE_COMM *);
int RCCE_AIR_barrier(RCCE_COMM *);
int RCCE_AIR_barrier2(RCCE_COMM *);
int RCCE_nb_barrier(RCCE_COMM *);
int RCCE_nb_TNS_barrier(RCCE_COMM *);
int RCCE_nb_AIR_barrier(RCCE_COMM *);
int RCCE_error_string(int, char *, int *);
int RCCE_debug_set(int);
int RCCE_debug_unset(int);
extern RCCE_COMM RCCE_COMM_WORLD;
#ifdef RC_POWER_MANAGEMENT
extern RCCE_COMM RCCE_P_COMM;
#define RCCE_POWER_DEFAULT -99999
#endif
#if defined(_OPENMP) && !defined(__hermit__)
#pragma omp threadprivate (RCCE_COMM_WORLD)
#ifdef RC_POWER_MANAGEMENT
#pragma omp threadprivate (RCCE_P_COMM)
#endif
#endif
#endif