From e339311d05371bc91cc5b94869dfe2788ada4860 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Sat, 17 Oct 2015 11:56:19 +0200 Subject: [PATCH] add iRCCE as communication library between isles - iRCCE was designed for the SCC - see http://www.lfbs.rwth-aachen.de/publications/files/iRCCE.pdf - HermitCreate creates virtual message passing puffers to emulate the SCC --- hermit/usr/Makefile | 5 + hermit/usr/ircce/Makefile | 43 + hermit/usr/ircce/RCCE.h | 374 ++++++++ hermit/usr/ircce/RCCE_admin.c | 1354 +++++++++++++++++++++++++++++ hermit/usr/ircce/RCCE_bcast.c | 73 ++ hermit/usr/ircce/RCCE_comm.c | 159 ++++ hermit/usr/ircce/RCCE_debug.c | 163 ++++ hermit/usr/ircce/RCCE_debug.h | 26 + hermit/usr/ircce/RCCE_flags.c | 488 +++++++++++ hermit/usr/ircce/RCCE_get.c | 169 ++++ hermit/usr/ircce/RCCE_lib.h | 382 ++++++++ hermit/usr/ircce/RCCE_malloc.c | 255 ++++++ hermit/usr/ircce/RCCE_put.c | 165 ++++ hermit/usr/ircce/RCCE_qsort.c | 131 +++ hermit/usr/ircce/RCCE_recv.c | 1350 ++++++++++++++++++++++++++++ hermit/usr/ircce/RCCE_reduce.c | 179 ++++ hermit/usr/ircce/RCCE_send.c | 992 +++++++++++++++++++++ hermit/usr/ircce/RCCE_synch.c | 613 +++++++++++++ hermit/usr/ircce/iRCCE.h | 290 ++++++ hermit/usr/ircce/iRCCE_admin.c | 195 +++++ hermit/usr/ircce/iRCCE_atomic.c | 195 +++++ hermit/usr/ircce/iRCCE_get.c | 78 ++ hermit/usr/ircce/iRCCE_irecv.c | 709 +++++++++++++++ hermit/usr/ircce/iRCCE_isend.c | 411 +++++++++ hermit/usr/ircce/iRCCE_lib.h | 62 ++ hermit/usr/ircce/iRCCE_mcast.c | 289 ++++++ hermit/usr/ircce/iRCCE_put.c | 82 ++ hermit/usr/ircce/iRCCE_srecv.c | 497 +++++++++++ hermit/usr/ircce/iRCCE_ssend.c | 282 ++++++ hermit/usr/ircce/iRCCE_synch.c | 279 ++++++ hermit/usr/ircce/iRCCE_waitlist.c | 324 +++++++ hermit/usr/ircce/syscall.h | 110 +++ 32 files changed, 10724 insertions(+) create mode 100644 hermit/usr/ircce/Makefile create mode 100644 hermit/usr/ircce/RCCE.h create mode 100644 hermit/usr/ircce/RCCE_admin.c create mode 100644 hermit/usr/ircce/RCCE_bcast.c create mode 100644 hermit/usr/ircce/RCCE_comm.c create mode 100644 hermit/usr/ircce/RCCE_debug.c create mode 100644 hermit/usr/ircce/RCCE_debug.h create mode 100644 hermit/usr/ircce/RCCE_flags.c create mode 100644 hermit/usr/ircce/RCCE_get.c create mode 100644 hermit/usr/ircce/RCCE_lib.h create mode 100644 hermit/usr/ircce/RCCE_malloc.c create mode 100644 hermit/usr/ircce/RCCE_put.c create mode 100644 hermit/usr/ircce/RCCE_qsort.c create mode 100644 hermit/usr/ircce/RCCE_recv.c create mode 100644 hermit/usr/ircce/RCCE_reduce.c create mode 100644 hermit/usr/ircce/RCCE_send.c create mode 100644 hermit/usr/ircce/RCCE_synch.c create mode 100644 hermit/usr/ircce/iRCCE.h create mode 100644 hermit/usr/ircce/iRCCE_admin.c create mode 100644 hermit/usr/ircce/iRCCE_atomic.c create mode 100644 hermit/usr/ircce/iRCCE_get.c create mode 100644 hermit/usr/ircce/iRCCE_irecv.c create mode 100644 hermit/usr/ircce/iRCCE_isend.c create mode 100644 hermit/usr/ircce/iRCCE_lib.h create mode 100644 hermit/usr/ircce/iRCCE_mcast.c create mode 100644 hermit/usr/ircce/iRCCE_put.c create mode 100644 hermit/usr/ircce/iRCCE_srecv.c create mode 100644 hermit/usr/ircce/iRCCE_ssend.c create mode 100644 hermit/usr/ircce/iRCCE_synch.c create mode 100644 hermit/usr/ircce/iRCCE_waitlist.c create mode 100644 hermit/usr/ircce/syscall.h diff --git a/hermit/usr/Makefile b/hermit/usr/Makefile index 7f7c9906f..c76804f17 100644 --- a/hermit/usr/Makefile +++ b/hermit/usr/Makefile @@ -55,6 +55,9 @@ libs: $Q$(MAKE) TARGET=$(TARGET) CC_FOR_TARGET=$(CC_FOR_TARGET) AR_FOR_TARGET=$(AR_FOR_TARGET) CFLAGS_FOR_TARGET+="-I. -Iplatform/hermit -Iplatform/helper -Wall" -C pte $Q$(MAKE) TARGET=$(TARGET) CC_FOR_TARGET=$(CC_FOR_TARGET) AR_FOR_TARGET=$(AR_FOR_TARGET) CFLAGS_FOR_TARGET+="-I. -Wall -pthread" -C libgomp depend $Q$(MAKE) TARGET=$(TARGET) CC_FOR_TARGET=$(CC_FOR_TARGET) AR_FOR_TARGET=$(AR_FOR_TARGET) CFLAGS_FOR_TARGET+="-I. -Wall -pthread" -C libgomp + $Q$(MAKE) TARGET=$(TARGET) CC_FOR_TARGET=$(CC_FOR_TARGET) AR_FOR_TARGET=$(AR_FOR_TARGET) CFLAGS_FOR_TARGET+="-I. -Wall" -C ircce depend + $Q$(MAKE) TARGET=$(TARGET) CC_FOR_TARGET=$(CC_FOR_TARGET) AR_FOR_TARGET=$(AR_FOR_TARGET) CFLAGS_FOR_TARGET+="-I. -Wall" -C ircce + $(TMP)/gcc: @echo Build final gcc @@ -73,6 +76,8 @@ veryclean: @echo Propper cleaning of the toolchain $Q$(MAKE) -C pte veryclean $Q$(MAKE) -C libgomp veryclean + $Q$(MAKE) -C ircce veryclean $Q$(MAKE) -C tests veryclean + $Q$(MAKE) -C benchmarks veryclean $Q$(RM) $(TOPDIR)/$(ARCH) $Q$(RM) $(TMP) diff --git a/hermit/usr/ircce/Makefile b/hermit/usr/ircce/Makefile new file mode 100644 index 000000000..8c5352c45 --- /dev/null +++ b/hermit/usr/ircce/Makefile @@ -0,0 +1,43 @@ +NEWLIB = ../x86/x86_64-hermit +MAKE = make +ARFLAGS_FOR_TARGET = rsv +CP = cp +C_source = $(wildcard *.c) +NAME = libircce.a +OBJS = $(C_source:.c=.o) + +# +# Prettify output +V = 0 +ifeq ($V,0) + Q = @ + P = > /dev/null +endif + +# other implicit rules +%.o : %.c + @echo [CC] $@ + $Q$(CC_FOR_TARGET) -c $(CFLAGS_FOR_TARGET) -o $@ $< + +default: all + +all: $(NAME) + +$(NAME): $(OBJS) + $Q$(AR_FOR_TARGET) $(ARFLAGS_FOR_TARGET) $@ $(OBJS) + $Q$(CP) $@ $(NEWLIB)/lib + $Q$(CP) *.h $(NEWLIB)/include + +clean: + @echo Cleaning examples + $Q$(RM) $(NAME) *.o *~ + +veryclean: + @echo Propper cleaning examples + $Q$(RM) $(NAME) *.o *~ + +depend: + $Q$(CC_FOR_TARGET) -MM $(CFLAGS_FOR_TARGET) *.c > Makefile.dep + +-include Makefile.dep +# DO NOT DELETE diff --git a/hermit/usr/ircce/RCCE.h b/hermit/usr/ircce/RCCE.h new file mode 100644 index 000000000..b04479394 --- /dev/null +++ b/hermit/usr/ircce/RCCE.h @@ -0,0 +1,374 @@ +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-10-25] added support for non-blocking send/recv operations +// - RCCE_isend(), ..._test(), ..._wait(), ..._push() +// - RCCE_irecv(), ..._test(), ..._wait(), ..._push() +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +// [2012-09-10] added support for "tagged" flags +// - RCCE_send_tagged(), RCCE_recv_tagged(), RCCE_recv_probe_tagged() +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +#ifndef RCCE_H +#define RCCE_H + +#include +#include + +#ifdef __hermit__ +#define SCC +#define COPPERRIDGE +#define USE_REMOTE_PUT_LOCAL_GET +#undef SHMADD +#endif + +#define _RCCE "1.0.13 release" +// #define USE_BYTE_FLAGS +// #define USE_FLAG_EXPERIMENTAL +// little trick to allow the application to be called "RCCE_APP" under +// OpenMP, and "main" otherwise + +#define ABS(x) ((x > 0)?x:-x) + +#ifndef _OPENMP + #define RCCE_APP main +#endif + +// modify next line for Intel BareMetal, which supports stdout, but not stdferr +#define STDERR stdout + +#ifdef __hermit__ +#define LOG2_LINE_SIZE 6 +#else +#define LOG2_LINE_SIZE 5 +#endif +#define RCCE_LINE_SIZE (1< + #include + #include + #include +#ifndef __hermit__ + #include + #include "SCC_API.h" +#else + #define RCCE_SESSION_ID 42 + #include "syscall.h" +#endif +#endif + #include + #include + #include + +// En-/ or disable debug prints... +#define DEBUG 1 +#define LOCKDEBUG 1 + +#undef SHMDBG + +#ifdef __hermit__ +static inline int tas(t_vcharp lock) +{ + register unsigned char _res = 1; + + asm volatile( + "lock; xchgb %0,%1" + : "=q"(_res), "=m"(*lock) + : "0"(_res)); + return (int) _res; +} +#define Test_and_Set(a) tas(virtual_lockaddress[a]) +#elif defined(SCC) +// Test and Set method +#define Test_and_Set(a) ((*(virtual_lockaddress[a])) & 0x01) +#endif +#define BACKOFF_MIN 8 +#define BACKOFF_MAX 256 + +#ifdef __hermit__ +#define RCCE_BASE 0x401fe000ULL + +typedef struct islelock { + // Internal queue + int32_t queue; + // Internal dequeue + int32_t dequeue; +} islelock_t; + +/* + * * Use a own implementation of "atomic_add_return" to gurantee + * * that the lock prefix is used. + * */ +inline static int _hermit_atomic_add(int32_t *d, int i) +{ + int res = i; + asm volatile("lock; xaddl %0, %1" : "=r"(i) : "m"(*d), "0"(i) : "memory", "cc"); + return res+i; +} + +static inline int islelock_lock(void) +{ + islelock_t* s = (islelock_t*) 0x401fe000ULL; + int ticket; + + ticket = _hermit_atomic_add(&s->queue, 1); + while(s->dequeue != ticket) { + asm volatile ("pause"); + } + + return 0; +} + +static inline int islelock_unlock(void) +{ + islelock_t* s = (islelock_t*) 0x401fe000ULL; + + _hermit_atomic_add(&s->dequeue, 1); + + return 0; +} +#endif + +//...................................................................................... +// GLOBAL VARIABLES USED BY THE LIBRARY +//...................................................................................... +unsigned int next; +int RCCE_NP; // number of participating cores +int RCCE_DEVICE_NR; // device number of the scc board +int RCCE_NUM_DEVICES; // total number of scc boards involved +int RCCE_NUM_UES_DEVICE[RCCE_MAX_BOARDS]; // number of participating cores per board +int RCCE_UE_TO_DEVICE[RCCE_MAXNP]; // device id of each core +int RCCE_DEVICE_LOCAL_UE; // device-local core id +double RC_REFCLOCKGHZ; // baseline CPU frequency (GHz) +int RC_MY_COREID; // physical ID of calling core +int RC_COREID[RCCE_MAXNP]; // array of physical core IDs for all participating + // cores, sorted by rank +int RCCE_IAM=-1; // rank of calling core (invalid by default) +RCCE_COMM RCCE_COMM_WORLD; // predefined global communicator +int RCCE_BUFF_SIZE; // available MPB size +t_vcharp RCCE_comm_buffer[RCCE_MAXNP]; // starts of MPB, sorted by rank +#ifndef __hermit__ +//#ifdef USE_FLAG_EXPERIMENTAL +t_vcharp RCCE_flag_buffer[RCCE_MAXNP]; +//#endif +#endif +#ifndef GORY + // ......................... non-GORY communication mode ............................. + // synchronization flags are predefined and maintained by the library + RCCE_FLAG RCCE_sent_flag[RCCE_MAXNP], RCCE_ready_flag[RCCE_MAXNP]; +#ifdef USE_PIPELINE_FLAGS + RCCE_FLAG RCCE_sent_flag_pipe[RCCE_MAXNP], RCCE_ready_flag_pipe[RCCE_MAXNP]; +#endif +#ifdef USE_PROBE_FLAGS + RCCE_FLAG RCCE_probe_flag[RCCE_MAXNP]; +#endif + RCCE_FLAG RCCE_barrier_flag[RCCE_MAXNP]; + RCCE_FLAG RCCE_barrier_release_flag; + // payload part of the MPBs starts at a specific address, not malloced space + t_vcharp RCCE_buff_ptr; + // maximum chunk size of message payload is also specified + size_t RCCE_chunk; + // synchronization flags will be allocated at this address + t_vcharp RCCE_flags_start; + +#ifndef USE_REMOTE_PUT_LOCAL_GET + // send request queue + RCCE_SEND_REQUEST* RCCE_send_queue; + // recv request queue + RCCE_RECV_REQUEST* RCCE_recv_queue[RCCE_MAXNP]; +#else + // send request queue + RCCE_SEND_REQUEST* RCCE_send_queue[RCCE_MAXNP]; + // recv request queue + RCCE_RECV_REQUEST* RCCE_recv_queue; +#endif + +#endif // !GORY + +#ifndef __hermit__ +t_vcharp RCCE_fool_write_combine_buffer; +#endif +// int air_counter = 0; + +#ifdef SCC + // virtual addresses of test&set registers + t_vcharp virtual_lockaddress[RCCE_MAXNP]; +#endif +//...................................................................................... +// END GLOBAL VARIABLES USED BY THE LIBRARY +//...................................................................................... + +#ifdef SCC +#ifndef __INTEL_COMPILER + inline volatile long long _rdtsc() { + register long long TSC __asm__("eax"); + __asm__ volatile (".byte 15, 49" : : : "eax", "edx"); + return TSC; + } +#endif +#endif + +//-------------------------------------------------------------------------------------- +// FUNCTION: RC_cache_invalidate +//-------------------------------------------------------------------------------------- +// invalidate (not flush!) lines in L1 that map to MPB lines +//-------------------------------------------------------------------------------------- +#ifndef __hermit__ +void RC_cache_invalidate() { +#ifdef SCC + __asm__ volatile ( ".byte 0x0f; .byte 0x0a;\n" ); // CL1FLUSHMB +#endif + return; +} +#endif + +static inline void RC_wait(int wait) { +#ifdef __hermit__ + asm volatile( "movq %%rax, %%rcx\n\t" + "L1: nop\n\t" + "loop L1" + : /* no output registers */ + : "a" (wait) + : "%rcx" ); +#else + asm volatile( "movl %%eax,%%ecx\n\t" + "L1: nop\n\t" + "loop L1" + : /* no output registers */ + : "a" (wait) + : "%ecx" ); + return; +#endif +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RC_COMM_BUFFER_SIZE +//-------------------------------------------------------------------------------------- +// return total available MPB size on chip +//-------------------------------------------------------------------------------------- +int RC_COMM_BUFFER_SIZE() { + return RCCE_BUFF_SIZE_MAX*RCCE_MAXNP; +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RC_COMM_BUFFER_START +//-------------------------------------------------------------------------------------- +// return (virtual) start address of MPB for UE with rank ue +//-------------------------------------------------------------------------------------- +t_vcharp RC_COMM_BUFFER_START(int ue){ +#ifdef __hermit__ + return (t_vcharp) SYSCALL2(__NR_rcce_malloc, RCCE_SESSION_ID, ue); +#elif defined(SCC) + // "Allocate" MPB, using memory mapping of physical addresses + t_vcharp retval; +#ifndef SCC_COUPLED_SYSTEMS + MPBalloc(&retval, X_PID(RC_COREID[ue]), Y_PID(RC_COREID[ue]), Z_PID(RC_COREID[ue]), + (X_PID(RC_COREID[ue]) == X_PID(RC_COREID[RCCE_IAM])) && + (Y_PID(RC_COREID[ue]) == Y_PID(RC_COREID[RCCE_IAM])) + ); +#else + MPBalloc(&retval, X_PID(RC_COREID[ue]), Y_PID(RC_COREID[ue]), Z_PID(RC_COREID[ue]), RC_COREID[ue] / RCCE_MAXNP_PER_BOARD, RCCE_DEVICE_NR, + (X_PID(RC_COREID[ue]) == X_PID(RC_COREID[RCCE_IAM])) && + (Y_PID(RC_COREID[ue]) == Y_PID(RC_COREID[RCCE_IAM])) + ); +#endif + return retval; +#else + // even in functional emulation mode we leave gaps in the global MPB + return RC_comm_buffer + RC_COREID[ue]*RC_COMM_BUFFER_SIZE()/RCCE_MAXNP; +#endif +} + +#ifndef __hermit__ +//#ifdef USE_FLAG_EXPERIMENTAL +t_vcharp RC_FLAG_BUFFER_START(int ue){ + // "Allocate" MPB, using memory mapping of physical addresses + t_vcharp retval; +#if SCC_COUPLED_SYSTEMS + FLAGalloc(&retval, X_PID(RC_COREID[ue]), Y_PID(RC_COREID[ue]), Z_PID(RC_COREID[ue]), RC_COREID[ue] / RCCE_MAXNP_PER_BOARD, RCCE_DEVICE_NR, (X_PID(RC_COREID[ue]) == X_PID(RC_COREID[RCCE_IAM])) && + (Y_PID(RC_COREID[ue]) == Y_PID(RC_COREID[RCCE_IAM])) + ); +#else + FLAGalloc(&retval, X_PID(RC_COREID[ue]), Y_PID(RC_COREID[ue]), Z_PID(RC_COREID[ue]),(X_PID(RC_COREID[ue]) == X_PID(RC_COREID[RCCE_IAM])) && + (Y_PID(RC_COREID[ue]) == Y_PID(RC_COREID[RCCE_IAM])) + ); +#endif + return retval; +} +//#endif +#endif + +//-------------------------------------------------------------------------------------- +// FUNCTION: RC_SHM_BUFFER_START +//-------------------------------------------------------------------------------------- +// return (virtual) start address of off-chip shared memory +//-------------------------------------------------------------------------------------- +#ifndef __hermit__ +#ifndef SCC_COUPLED_SYSTEMS +t_vcharp RC_SHM_BUFFER_START(){ +#ifdef SCC + t_vcharp retval; + SHMalloc(&retval); //SHMalloc() is in SCC_API.c + return retval; +#else + return RC_shm_buffer; +#endif +} +#else +t_vcharp RC_SHM_BUFFER_START(int device){ + t_vcharp retval; + if (device == RCCE_DEVICE_NR) + SHMalloc(&retval); + else + RMalloc(&retval, device); + + return retval; +} +#endif +#endif + +extern int isle_id(void); + +//-------------------------------------------------------------------------------------- +// FUNCTION: MYCOREID +//-------------------------------------------------------------------------------------- +// return physical core ID of calling core +//-------------------------------------------------------------------------------------- +int MYCOREID() { +#ifdef __hermit__ + return isle_id(); +#elif defined(SCC) + int tmp, x, y, z; + tmp=ReadConfigReg(CRB_OWN+MYTILEID); + x=(tmp>>3) & 0x0f; // bits 06:03 + y=(tmp>>7) & 0x0f; // bits 10:07 + z=(tmp ) & 0x07; // bits 02:00 +#ifndef SCC_COUPLED_SYSTEMS + return ( ( x + ( 6 * y ) ) * 2 ) + z; // True Processor ID! +#else + return ( ( x + ( 6 * y ) ) * 2 ) + z + RCCE_MAXNP_PER_BOARD * RCCE_DEVICE_NR; // True Processor ID! +#endif +#else + // the COREIDs are read into the main program in potentially random order. + // Each core can access its own Core ID. We simulate that by selecting + // the value in the list of coreids that corresponds to the sequence + // number of the OpenMP thread number + return RC_COREID[omp_get_thread_num()]; +#endif // SCC +} + +#if defined(SCC) +//-------------------------------------------------------------------------------------- +// FUNCTIONS: Locksuite for test-purpose +//-------------------------------------------------------------------------------------- +// acquire lock corresponding to core with rank ID +//-------------------------------------------------------------------------------------- +int RCCE_try_lock(int ID) { + if (Test_and_Set(ID)) + return(RCCE_SUCCESS); + return(RCCE_PENDING); +} + +int RCCE_TNS_barrier(RCCE_COMM* comm) { + +// two roundtrips to realize a barrier using a T&S Register for each core. + +// 1. search first free T&S Register to spin +// 2. last waiter wakes up first waiter and continues local wait +// 3. first waiter wakes up second waiter by releasing its lock ... +// At least every used T&S Register is 0 and no UE can overtake a barrier. + + int num = comm->size; + int step = 0; + //fprintf(stderr,"%d:\t enter barrier \n",id); + + while( !Test_and_Set(step) ) ++step; + // only one UE runs until T&S # num-1 + + //fprintf(stderr,"%d:\t step %d\n",id,step); + + if(step == num-1) { + //fprintf(stderr,"%d:\t I am the last one\n",id); + *(virtual_lockaddress[0]) = 0x0; + while(!Test_and_Set(step)); + *(virtual_lockaddress[step]) = 0x0; + } else { + while(!Test_and_Set(step)); + *(virtual_lockaddress[step]) = 0x0; + *(virtual_lockaddress[step+1]) = 0x0; + } + //fprintf(stderr,"released barrier! step: %d\n", step); + return RCCE_SUCCESS; +} + +int RCCE_nb_TNS_barrier(RCCE_COMM* comm) { + +// two roundtrips to realize a barrier using a T&S Register for each core. + +// 1. search first free T&S Register to spin +// 2. last waiter wakes up first waiter and continues local wait +// 3. first waiter wakes up second waiter by releasing its lock ... +// At least every used T&S Register is 0 and no UE can overtake a barrier. + + int num = comm->size; + int step = 0; + //fprintf(stderr,"%d:\t enter barrier \n",id); + + if(comm->label == 1) goto label1; + if(comm->label == 2) goto label2; + + while( !Test_and_Set(step) ) ++step; + // only one UE runs until T&S # num-1 + + //fprintf(stderr,"%d:\t step %d\n",id,step); + + if(step == num-1) { + //fprintf(stderr,"%d:\t I am the last one\n",id); + *(virtual_lockaddress[0]) = 0x0; + comm->step = step; + label1: + step = comm->step; + if(!Test_and_Set(step)) + { + comm->label = 1; + return RCCE_PENDING; + } + *(virtual_lockaddress[step]) = 0x0; + } else { + comm->step = step; + label2: + step = comm->step; + if(!Test_and_Set(step)) + { + comm->label = 2; + return RCCE_PENDING; + } + *(virtual_lockaddress[step]) = 0x0; + *(virtual_lockaddress[step+1]) = 0x0; + } + //fprintf(stderr,"released barrier! step: %d\n", step); + comm->label = 0; + return RCCE_SUCCESS; +} + +#ifdef AIR +RCCE_AIR RCCE_atomic_inc_regs[2*RCCE_MAXNP]; + +int RCCE_AIR_barrier2(RCCE_COMM *comm) +{ + static int idx = 0; + unsigned long long time, time1, time2; + float ran = 0; + int id, val = 0, val2 = 0; + int window = comm->size; + int ue = RCCE_ue(); + int x = X_PID(ue), y = Y_PID(ue); + int win = 1000000; + + // ++air_counter; + if (comm == &RCCE_COMM_WORLD) { + time = RCCE_wtime(); + if ((id = *RCCE_atomic_inc_regs[idx].counter) < (comm->size-1)) + { + if(window > 16) { + val = id; + val2 = val; + time1 = RCCE_wtime();; + + if(window > 26) + { + ran = ((y+x)%8)*window*window/24000000.0; + window = (RCCE_wtime() - time)*win;//(RCCE_wtime() - time)*1000000.0; + } + else + window = 1; + ran = ran+(rand()%(window))/(win*100.0); + do + { + time = RCCE_wtime() - time; + time2 = RCCE_wtime()-time1-time/2; + time1 = RCCE_wtime(); + while(RCCE_wtime()-time1 < (((0.424+ran)*(comm->size-val)*(time2)/(val-val2+1)-time/2))) + { + if(RCCE_wtime()-time1>0.0050) + break; + } + val2 = val; + time = RCCE_wtime(); + // ++air_counter; + } while ((val = *RCCE_atomic_inc_regs[idx].init) > 0 && (val < comm->size)); + } + else + { + do + { + // ++air_counter; + } + while ((val = *RCCE_atomic_inc_regs[idx].init) > 0 && (val < comm->size)); + } + + } + else + { + *RCCE_atomic_inc_regs[idx].init = 0; + } + idx = !idx; + return(RCCE_SUCCESS); + } + else + { + return RCCE_barrier(comm); + } +} + +#ifndef GORY +int RCCE_dissemination_barrier(RCCE_COMM *comm) +{ + int k, max_rounds; + int ue, num_ues, ue_signal; + ue = RCCE_ue(); + num_ues = RCCE_num_ues(); + max_rounds = num_ues*(1+(num_ues%2)?1:0); + + for(k = 1; k < max_rounds; k = k*2 ) + { + /* signalize process */ + ue_signal = (ue+k)%num_ues; + RCCE_flag_write(&RCCE_barrier_flag[RCCE_IAM], RCCE_FLAG_SET, ue_signal); + /* wait for process */ + ue_signal = (ue-k+num_ues+num_ues)%num_ues; + RCCE_wait_until(RCCE_barrier_flag[ue_signal], RCCE_FLAG_SET); + RCCE_flag_write(&RCCE_barrier_flag[ue_signal], RCCE_FLAG_UNSET, RCCE_IAM); + } + + return(RCCE_SUCCESS); +} +#endif + +int RCCE_tree_init(RCCE_COMM *comm, tree_t *tree, int num_children) { + int ue, num_ues; + int i, j, k; + tree_t nodes[RCCE_MAXNP]; + if(comm != &RCCE_COMM_WORLD) + return(!RCCE_SUCCESS); + ue = RCCE_ue(); + num_ues = RCCE_num_ues(); + + nodes[0].parent = -1; + k = 1; + + for(i = 0; i < num_ues; ++i) + { + nodes[i].num_children = 0; + for(j = 0; j < num_children && k < num_ues; ++j, ++k) + { + nodes[i].child[j] = k; + nodes[k].parent = i; + ++(nodes[i].num_children); + } + } + memcpy(tree, &nodes[RCCE_IAM], sizeof(tree_t)); + + // printf("%d: child0:%d child1:%d parent:%d\n", ue, tree->child[0], tree->child[1], tree->parent);fflush(0); + + return(RCCE_SUCCESS); +} + +#ifndef GORY +int RCCE_tree_barrier(RCCE_COMM *comm, tree_t *tree) +{ + int i; + /* Gather */ + for(i = 0; i < tree->num_children; ++i) + { + RCCE_wait_until(RCCE_barrier_flag[tree->child[i]], RCCE_FLAG_SET); + RCCE_flag_write(&RCCE_barrier_flag[tree->child[i]], RCCE_FLAG_UNSET, RCCE_IAM); + } + + if(tree->parent != -1) + { + RCCE_flag_write(&RCCE_barrier_flag[RCCE_IAM], RCCE_FLAG_SET, tree->parent); + + /* Release */ + RCCE_wait_until(RCCE_barrier_release_flag, RCCE_FLAG_SET); + RCCE_flag_write(&RCCE_barrier_release_flag, RCCE_FLAG_UNSET, RCCE_IAM); + } + + /* Release */ + for(i = 0; i < tree->num_children; ++i) + { + RCCE_flag_write(&RCCE_barrier_release_flag, RCCE_FLAG_SET, tree->child[i]); + } + + return(RCCE_SUCCESS); +} +#endif + +int RCCE_tournament_barrier(RCCE_COMM *comm) +{ + return(RCCE_SUCCESS); +} + +int RCCE_tournament_fixed_barrier(RCCE_COMM *comm) +{ + return(RCCE_SUCCESS); +} + +int RCCE_AIR_barrier(RCCE_COMM *comm) +{ + static int idx = 0; + static unsigned int rand = 0; + int backoff = BACKOFF_MIN, wait, i = 0; + + if (comm == &RCCE_COMM_WORLD) { + if (*RCCE_atomic_inc_regs[idx].counter < (comm->size-1)) + { + while (*RCCE_atomic_inc_regs[idx].init > 0) + { + rand = rand * 1103515245u + 12345u; + wait = BACKOFF_MIN + (rand % (backoff << i)); + RC_wait(wait); + if (wait < BACKOFF_MAX) i++; + } + } + else + { + *RCCE_atomic_inc_regs[idx].init = 0; + } + idx = !idx; + return(RCCE_SUCCESS); + } + else + { + return RCCE_barrier(comm); + } +} + +int RCCE_nb_AIR_barrier(RCCE_COMM *comm) +{ + static int idx = 0; + static unsigned int rand = 0; + int backoff = BACKOFF_MIN, wait, i = 0; + + if(comm->label == 1) goto label1; + + if (comm == &RCCE_COMM_WORLD) { + if (*RCCE_atomic_inc_regs[idx].counter < (comm->size-1)) + { +#if 0 // NO BACKOFF in Non-Blocking case ??? + while (*RCCE_atomic_inc_regs[idx].init > 0) + { + rand = rand * 1103515245u + 12345u; + wait = BACKOFF_MIN + (rand % (backoff << i)); + RC_wait(wait); + if (wait < BACKOFF_MAX) i++; + } +#else + label1: + if(*RCCE_atomic_inc_regs[idx].init > 0) + { + comm->label = 1; + return RCCE_PENDING; + } +#endif + } + else + { + *RCCE_atomic_inc_regs[idx].init = 0; + } + idx = !idx; + comm->label = 0; + return(RCCE_SUCCESS); + } + else + { + return RCCE_barrier(comm); + } +} +#endif + +int RCCE_acquire_treelock(RCCE_COMM* comm) { + int i = 1; // concurrency factor + int step; + int group = (1 << i); + int me = comm->my_rank; + + //fprintf(stdout,"%d\tstart treelock:\n", me); + while (1){ + + //group <<= 1; + //if(group > num) break; + + // first rank within group + mid of group (leftmost) + step = ( me - ( me % group) ) + ( ( group - 1 ) >> 1 ) ; + + //fprintf(stdout,"%d\t%d\n", me, step); + //fflush(stdout); + while(!Test_and_Set(comm->member[step])); + + if(group >= comm->size) break; + + group <<= i; + }// while ( group <= comm->size); + // group is next 2^x + + //fprintf(stdout,"\n"); + //fflush(stderr); + return(RCCE_SUCCESS); +} + +int RCCE_release_treelock(RCCE_COMM* comm) {//int myID, int num) { + int step; + int group; + int v = comm->size; + int me = comm->my_rank; + + // round up to the next highest power of 2 + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + // + group = v; + + //printf(stderr,"%d\trelease treelock: [%d] ",myID,group); + + while(1) { + step = ( me - ( me % group) ) + ( ( group - 1 ) >> 1 ); + //fprintf(stderr," %d",step); + *(virtual_lockaddress[(comm->member[step])]) = 0x0; + group >>= 1; + if(group < 2) break; + } + //fprintf(stderr,"\n"); + //fflush(stderr); + return(RCCE_SUCCESS); +} + +int RCCE_backoff_lock(int ID) { + //static int next = RC_MY_COREID; + // try lock with backoff + + int i = 0; + + int backoff = BACKOFF_MIN, wait = 0, tmp = 0; + unsigned int overflow = 0; + + + while (1) { + if (Test_and_Set(ID)) + break; + + // Kongruenzgenerator + next = ( next * 1103515245 + 12345 ) % ( INT_MAX ); + + wait = BACKOFF_MIN + ( next % ( backoff << i ) ); + + overflow += wait; + if( overflow > INT_MAX ) overflow = INT_MAX; + + RC_wait(wait); + if ( (backoff<=0; board--) + RCCE_shmalloc_init(RC_SHM_BUFFER_START(board),RCCE_SHM_SIZE_MAX/RCCE_MAX_BOARDS); +#endif +#endif +#endif + + // create global communicator (equivalent of MPI_COMM_WORLD); this will also allocate + // the two synchronization flags associated with the global barrier + RCCE_comm_split(RCCE_global_color, nothing, &RCCE_COMM_WORLD); + + // if power management is enabled, initialize more stuff; this includes two more + // communicators (for voltage and frequency domains), plus two synchronization flags + // associated with the barrier for each communicator +#ifdef RC_POWER_MANAGEMENT + int error; + if (error=RCCE_init_RPC(RC_COREID, RCCE_IAM, RCCE_NP)) + return(RCCE_error_return(RCCE_debug_RPC,error)); +#endif + +#ifndef GORY + // if we use the simplified API, we need to define more flags upfront + for (ue=0; ue 1) { + if(RCCE_IAM != RCCE_NP-1) { + RCCE_send((char*)&RCCE_DEVICE_NR, sizeof(int), RCCE_IAM+1); + } + if(RCCE_IAM != 0) { + RCCE_recv((char*)&tmp, sizeof(int), RCCE_IAM-1); + if(tmp != RCCE_DEVICE_NR) tmp = RCCE_IAM; + else tmp = -1; + RCCE_send((char*)&tmp, sizeof(int), 0); + } + else + { + RCCE_NUM_DEVICES = 0; + for(ue=1; ue 1) ) + { + printf("### %s: Remaining MPB space for communication: %zd Bytes per core\n", executable_name, RCCE_chunk); fflush(stdout); + } +#endif + + RCCE_barrier(&RCCE_COMM_WORLD); + + return (RCCE_SUCCESS); +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_finalize +//-------------------------------------------------------------------------------------- +// clean up at end of library usage (memory unmapping) and resetting of memory and +// registers +//-------------------------------------------------------------------------------------- +int RCCE_finalize(void){ + +#ifdef SCC +#ifndef __hermit__ + int ue, iword; +#endif + + RCCE_barrier(&RCCE_COMM_WORLD); + + // each UE clears its own MPB and test&set register + //ERROR: THIS IS NOT THE START OF THE COMM BUFFER, BUT OF THE PAYLOAD AREA!! +// for (iword=0; iword<(RCCE_BUFF_SIZE_MAX)/sizeof(int); iword++) +// ((int *)(RCCE_comm_buffer[ue]))[iword] = 0; +// MPBunalloc(&(RCCE_comm_buffer[ue])); +#ifndef __hermit__ + RCCE_release_lock(RCCE_IAM); + // each core needs to unmap all special memory locations + for (ue=0; ue +#include + +//-------------------------------------------------------------------------------------- +// RCCE_bcast +//-------------------------------------------------------------------------------------- +// function that sends data from UE root to all other UEs in the communicator +//-------------------------------------------------------------------------------------- +int RCCE_bcast( + char *buf, // private memory, used for sending (root) and receiving (other UEs) + size_t num, // number of bytes to be sent + int root, // source within "comm" of broadcast data + RCCE_COMM comm // communication domain + ) { + + int ue, ierr; +#ifdef GORY + printf("Collectives only implemented for simplified API\n"); + return(1); +#else + // check to make sure root is member of the communicator + if (root<0 || root >= comm.size) + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID)); + + if (RCCE_IAM == comm.member[root]) { + for (ue=0; uesize = 0; + for (i=0; imy_rank = comm->size; + comm->member[comm->size++] = i; + } + } + + // note: we only need to allocate new synch flags if the communicator has not yet been + // initialized. It is legal to overwrite an initialized communcator, in which case the + // membership may change, but the same synchronization flags can be used + if (comm->initialized == RCCE_COMM_INITIALIZED) return(RCCE_SUCCESS); + +#ifndef USE_FAT_BARRIER + if((error=RCCE_flag_alloc(&(comm->gather)))) + return(RCCE_error_return(RCCE_debug_comm,error)); +#else + for (i=0; igather[i])))) + return(RCCE_error_return(RCCE_debug_comm,error)); + } +#endif + + if(error=RCCE_flag_alloc(&(comm->release))) + return(RCCE_error_return(RCCE_debug_comm,error)); + + comm->label = 0; + + comm->initialized = RCCE_COMM_INITIALIZED; + + return(RCCE_SUCCESS); +} + +// DO NOT USE THIS FUNCTION IN NON-GORY MODE UNTIL MALLOC_FREE HAS BEEN IMPLEMENTED +int RCCE_comm_free(RCCE_COMM *comm) { + printf("DO NOT USE IN NON-GORY MODE UNTIL MALLOC_FREE HAS BEEN IMPLEMENTED\n"); + if (comm->initialized != RCCE_COMM_INITIALIZED) + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_COMM_INITIALIZED)); + +#ifndef USE_FAT_BARRIER + RCCE_flag_free(&(comm->gather)); +#else + { int i; + for (i=0; igather[i])); + } +#endif + + RCCE_flag_free(&(comm->release)); + comm->initialized = RCCE_COMM_NOT_INITIALIZED; + + return(RCCE_SUCCESS); +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_comm_size +// returns the number of UEs inside the communicator +//-------------------------------------------------------------------------------------- +int RCCE_comm_size( + RCCE_COMM comm, // communicator + int *size // return value (size) + ) { + + if (comm.initialized == RCCE_COMM_INITIALIZED) { + *size = comm.size; + return(RCCE_SUCCESS); + } + else return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_COMM_INITIALIZED)); +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_comm_rank +// returns the rank of the calling UE inside the communicator +//-------------------------------------------------------------------------------------- +int RCCE_comm_rank( + RCCE_COMM comm, // communicator + int *rank // return value (rank) + ) { + + if (comm.initialized == RCCE_COMM_INITIALIZED) { + *rank = comm.my_rank; + return(RCCE_SUCCESS); + } + else return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_COMM_INITIALIZED)); +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_global_color +// use this trivial color function to define global communicator +//-------------------------------------------------------------------------------------- +int RCCE_global_color(int rank, void *nothing) {return(1);} diff --git a/hermit/usr/ircce/RCCE_debug.c b/hermit/usr/ircce/RCCE_debug.c new file mode 100644 index 000000000..49f077e06 --- /dev/null +++ b/hermit/usr/ircce/RCCE_debug.c @@ -0,0 +1,163 @@ +//*************************************************************************************** +// Diagnostic routines. +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "RCCE_lib.h" +#include "RCCE_debug.h" + +#define MAX_ERROR_NUMBER 26 + +//...................................................................................... +// GLOBAL VARIABLES USED BY THE LIBRARY +//...................................................................................... +const char *RCCE_estrings[] = { +/* 0 */ "Success", +/* 1 */ "Invalid target buffer", +/* 2 */ "Invalid source buffer", +/* 3 */ "Invalid UE ID", +/* 4 */ "Invalid message length", +/* 5 */ "Flag variable undefined", +/* 6 */ "Emulated NUEs do not match requested NUEs", +/* 7 */ "Message buffers overlap in comm buffer", +/* 8 */ "Data buffer misalignment", +/* 9 */ "Debug flag not defined", +/* 10 */ "RCCE_flag variable not inside comm buffer", +/* 11 */ "Flag status not defined", +/* 12 */ "Flag not allocated", +/* 13 */ "Value not defined", +/* 14 */ "Invalid error code", +/* 15 */ "RPC data structure not allocated", +/* 16 */ "RPC internal error", +/* 17 */ "Multiple outstanding RPC requests", +/* 18 */ "Invalid power step", +/* 19 */ "Maximum allowable frequency exceeded", +/* 20 */ "No active RPC request", +/* 21 */ "Stale RPC request", +/* 22 */ "Communicator undefined", +/* 23 */ "Illegal reduction operator", +/* 24 */ "Illegal data type", +/* 25 */ "Memory allocation error", +/* 26 */ "Communicator initialization error", +/* 27 */ "Multicast is not supported in remote-put/local-get mode" +}; +// GLOBAL VARIABLES USED BY THE LIBRARY + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_error_string +//-------------------------------------------------------------------------------------- +// RCCE_error_string returns a descriptive error string +//-------------------------------------------------------------------------------------- +int RCCE_error_string( + int err_no, // number of error to be described + char *error_string, // copy of error string + int *string_length // length of error string + ) { + + if (err_no != RCCE_SUCCESS) { + err_no -= RCCE_ERROR_BASE; + if (err_no < 1 || err_no > MAX_ERROR_NUMBER) { + strcpy(error_string,""); + *string_length=0; + return(RCCE_error_return(RCCE_debug_debug,RCCE_ERROR_INVALID_ERROR_CODE)); + } + } + strcpy(error_string,RCCE_estrings[err_no]); + *string_length = strlen(RCCE_estrings[err_no]); + return(RCCE_SUCCESS); +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_print_error +//-------------------------------------------------------------------------------------- +// prints diagnostic error string, governed by input flag, also returns the error code +//-------------------------------------------------------------------------------------- +int RCCE_error_return( + int debug_flag, // flag that controls diagnostic printing + int err_no // number of error to be printed + ) { + char error_string[RCCE_MAX_ERROR_STRING]; + int string_length; + + if (debug_flag && err_no) { + RCCE_error_string(err_no, error_string, &string_length); + fprintf(STDERR,"Error on UE %d: %s\n", RCCE_IAM, error_string); fflush(NULL); + } + return(err_no); +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_debug_set +//-------------------------------------------------------------------------------------- +// turns on debugging of a certain library feature +//-------------------------------------------------------------------------------------- +int RCCE_debug_set( + int flag // flag that controls which library feaure is instrumented + ){ + + switch(flag) { + case(RCCE_DEBUG_ALL): RCCE_debug_synch=1; + RCCE_debug_comm=1; + RCCE_debug_debug=1; + RCCE_debug_RPC=1; + return(RCCE_SUCCESS); + case(RCCE_DEBUG_SYNCH): RCCE_debug_synch=1; + return(RCCE_SUCCESS); + case(RCCE_DEBUG_COMM): RCCE_debug_comm=1; + return(RCCE_SUCCESS); + case(RCCE_DEBUG_DEBUG): RCCE_debug_debug=1; + return(RCCE_SUCCESS); + case(RCCE_DEBUG_RPC): RCCE_debug_RPC=1; + return(RCCE_SUCCESS); + default: return(RCCE_error_return(RCCE_debug_debug, + RCCE_ERROR_DEBUG_FLAG)); + } +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_debug_unset +//-------------------------------------------------------------------------------------- +// turns off debugging of a certain library feature +//-------------------------------------------------------------------------------------- +int RCCE_debug_unset( + int flag // flag that controls which library feaure is uninstrumented + ){ + + switch(flag) { + case(RCCE_DEBUG_ALL): RCCE_debug_synch=0; + RCCE_debug_comm=0; + RCCE_debug_debug=0; + RCCE_debug_RPC=0; + return(RCCE_SUCCESS); + case(RCCE_DEBUG_SYNCH): RCCE_debug_synch=0; + return(RCCE_SUCCESS); + case(RCCE_DEBUG_COMM): RCCE_debug_comm=0; + return(RCCE_SUCCESS); + case(RCCE_DEBUG_DEBUG): RCCE_debug_debug=0; + return(RCCE_SUCCESS); + case(RCCE_DEBUG_RPC): RCCE_debug_RPC=0; + return(RCCE_SUCCESS); + default: return(RCCE_error_return(RCCE_debug_debug, + RCCE_ERROR_DEBUG_FLAG)); + } +} diff --git a/hermit/usr/ircce/RCCE_debug.h b/hermit/usr/ircce/RCCE_debug.h new file mode 100644 index 000000000..1218d7f77 --- /dev/null +++ b/hermit/usr/ircce/RCCE_debug.h @@ -0,0 +1,26 @@ +/************************************************************** + * Change the RCCE_debug_xxx values to get debug info. * + * Change RCCE_comm_init_val to 1 to see what happens if * + * the comm buffers are not properly initialized at startup . * + **************************************************************/ + +int RCCE_debug_synch=0; +int RCCE_debug_comm=0; +int RCCE_debug_debug=0; +int RCCE_debug_RPC=0; +int RCCE_comm_init_val=0; +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// diff --git a/hermit/usr/ircce/RCCE_flags.c b/hermit/usr/ircce/RCCE_flags.c new file mode 100644 index 000000000..d95d76040 --- /dev/null +++ b/hermit/usr/ircce/RCCE_flags.c @@ -0,0 +1,488 @@ +//************************************************************************************** +// Flag manipulation and access functions. +// Single-bit and whole-cache-line flags are sufficiently different that we provide +// separate implementations of all the flag routines for each case +//************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2012-09-07] added support for "tagged" flags +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +#include "RCCE_lib.h" +#if defined(COPPERRIDGE) && !defined(__hermit__) +#include "scc_memcpy.h" +#else +#define memcpy_scc memcpy +#endif + +#ifdef USE_BYTE_FLAGS +#include "RCCE_byte_flags.c" +#else + +#ifdef SINGLEBITFLAGS + +////////////////////////////////////////////////////////////////// +// LOCKING SYNCHRONIZATION USING ONE BIT PER FLAG +////////////////////////////////////////////////////////////////// + + +//...................................................................................... +// GLOBAL VARIABLES USED BY THE LIBRARY +//...................................................................................... +// single bit flags are accessed with the granularity of integers. Compute the +// number of flags per integer +int WORDSIZE = sizeof(int)*8; +int LEFTMOSTBIT = sizeof(int)*8-1; +//...................................................................................... +// END GLOBAL VARIABLES USED BY THE LIBRARY +//...................................................................................... + +RCCE_FLAG_LINE RCCE_flags = +// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + {{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + NULL, 0, NULL}; + +// next three utility functions are only used by the library, not the user. We assume +// there will never be errrors, so we do not return any error code. "location" of a +// flag bit // inside a cache line is reckoned from the most significant (leftmost) +// bit. Within a word, flag zero is also in the leftmost bit + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_bit_value +//-------------------------------------------------------------------------------------- +// return status of single bit flag at a specific location within cache line +//-------------------------------------------------------------------------------------- +#if 0 +// BUGGY VERSION (by Intel): +RCCE_FLAG_STATUS RCCE_bit_value(t_vcharp line_address, int location) { + t_vintp character = (t_vintp) (line_address + location/WORDSIZE); + int bit_position = (LEFTMOSTBIT-(location%WORDSIZE)); + unsigned int mask = 1<>bit_position); +} +#else +// FIXED VERSION (by LfBS): +RCCE_FLAG_STATUS RCCE_bit_value(t_vcharp line_address, int location) { + t_vcharp character = (t_vcharp) (line_address + location/8); + int bit_position = 7 - location%8; + unsigned char mask = 1<>bit_position); +} +#endif + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_flip_bit_value +//-------------------------------------------------------------------------------------- +// flip single bit in cache line and return value of changed bit. The location is that +// of the bit inside the line. To find the word it is in, divide by WORDSIZE. +//-------------------------------------------------------------------------------------- +#if 0 +// BUGGY VERSION (by Intel): +RCCE_FLAG_STATUS RCCE_flip_bit_value(t_vcharp line_address, int location) { + t_vintp character = (t_vintp) (line_address + location/WORDSIZE); + int bit_position = (LEFTMOSTBIT-(location%WORDSIZE)); + unsigned int mask = 1<>bit_position); +} +#else +// FIXED VERSION (by LfBS): +RCCE_FLAG_STATUS RCCE_flip_bit_value(t_vcharp line_address, int location) { + t_vcharp character = (t_vcharp) (line_address + location/8); + int bit_position = 7 - location%8; + unsigned char mask = 1<>bit_position); +} +#endif + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_write_bit_value +//-------------------------------------------------------------------------------------- +// write single bit in cache line and return value of changed bit. The location is that +// of the bit inside the line. To find the word it is in, divide by WORDSIZE. +//-------------------------------------------------------------------------------------- +#if 0 +// BUGGY VERSION (by Intel): +int RCCE_write_bit_value(t_vcharp line_address, int location, RCCE_FLAG_STATUS val) { + t_vintp character = (t_vintp)(line_address + location/WORDSIZE); + int bit_position = (LEFTMOSTBIT-(location%WORDSIZE)); + unsigned int mask; + switch (val) { + case RCCE_FLAG_UNSET: mask = ~(1<members == 256 && flagp->next) { + flagp = flagp->next; + } + + // if this is a new flag line, need to allocate MPB for it + if (!flagp->line_address) flagp->line_address = RCCE_malloc(RCCE_LINE_SIZE); + if (!flagp->line_address) return(RCCE_error_return(RCCE_debug_synch, + RCCE_ERROR_FLAG_NOT_ALLOCATED)); + + if (flagp->members < 256) { + // there is space in this line for a new flag; find first open slot + for (loc=0; locflag),loc)) { + RCCE_flip_bit_value((t_vcharp)(flagp->flag),loc); + flagp->members++; + flag->location = loc; + flag->line_address = flagp->line_address; + return(RCCE_SUCCESS); + } + } + else { + // must create new flag line if last one was full + flagp->next = (RCCE_FLAG_LINE *) malloc(sizeof(RCCE_FLAG_LINE)); + if (!(flagp->next)) return(RCCE_error_return(RCCE_debug_synch, + RCCE_ERROR_FLAG_NOT_ALLOCATED)); + flagp = flagp->next; + flagp->line_address = RCCE_malloc(RCCE_LINE_SIZE); + if (!(flagp->line_address)) return(RCCE_error_return(RCCE_debug_synch, + RCCE_ERROR_FLAG_NOT_ALLOCATED)); + // initialize the flag line + flagp->members=1; + flagp->next = NULL; + for (c=0; cflag[c] &= (unsigned int) 0; + + // flip the very first bit field to indicate that flag is not in use + RCCE_flip_bit_value((t_vcharp)(flagp->flag),0); + flag->location = 0; + flag->line_address = flagp->line_address; + } + return(RCCE_SUCCESS); +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_flag_free +//-------------------------------------------------------------------------------------- +// free space for single bit flag. Since 256 fit on a single cache line, we only +// need to free claimed MPB space when the all existing lines are completely emptied. +//-------------------------------------------------------------------------------------- +int RCCE_flag_free(RCCE_FLAG *flag) { + + RCCE_FLAG_LINE *flagp, *flagpminus1 = NULL; + + // check wether flag exists, and whether the location field is valid + if (!flag || flag->location < 0) + return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED)); + // find flag line in globally maintained structure + flagp = &RCCE_flags; + while (flagp->next && flag->line_address != flagp->line_address) { + flagpminus1 = flagp; + flagp = flagp->next; + } + if (flag->line_address != flagp->line_address) + return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED)); + + // error checking is done + flagp->members--; + RCCE_flip_bit_value((t_vcharp)(flagp->flag),flag->location); + // something special happens if we've emptied an entire line + if (flagp->members==0) { + if (flagpminus1) { + // there is a predecessor; splice out current flag line from linked list + RCCE_free(flagp->line_address); + flagpminus1->next = flagp->next; + free(flagp); + } + // if there is a successor but no predecessor, do nothing + } + // invalidate location field to make sure we won't free again by mistake + flag->location = -1; + flag->line_address = NULL; + + return(RCCE_SUCCESS); +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_flag_write +//-------------------------------------------------------------------------------------- +// This is the core flag manipulation routine. It requires locking to guarantee atomic +// access while updating one of a line of flags. +//-------------------------------------------------------------------------------------- +int RCCE_flag_write(RCCE_FLAG *flag, RCCE_FLAG_STATUS val, int ID) { + t_vchar val_array[RCCE_LINE_SIZE]; + int error; + +#ifdef GORY + // check input parameters + if (!flag || flag->location < 0 || flag->location > 255) + return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED)); + if (error = (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET)) + return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED)); +#endif + + // acquire lock to make sure nobody else fiddles with the flags on the target core + RCCE_acquire_lock(ID); + // copy entire MPB cache line containing flag to local space + if (error = RCCE_get(val_array, flag->line_address, RCCE_LINE_SIZE, ID)) + return(RCCE_error_return(RCCE_debug_synch,error)); + + // overwrite single bit within local copy of cache line + RCCE_write_bit_value(val_array, flag->location, val); + + // write copy back to the MPB + error = RCCE_put(flag->line_address, val_array, RCCE_LINE_SIZE, ID); + + // release write lock for the flags on the target core + RCCE_release_lock(ID); + return(RCCE_error_return(RCCE_debug_synch,error)); +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_flag_read +//-------------------------------------------------------------------------------------- +// This routine is rarely needed. We typically only read a flag when we're waiting for +// it to change value (function RCCE_wait_until). Reading does not require locking. The +// moment the target flag we're trying to read changes value, it is OK to read and +// return that value +//-------------------------------------------------------------------------------------- +int RCCE_flag_read(RCCE_FLAG flag, RCCE_FLAG_STATUS *val, int ID) { + volatile unsigned char val_array[RCCE_LINE_SIZE]; + int error; + +#ifdef GORY + if (flag.location < 0 || flag.location > 255) + return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED)); + if (!val) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_VAL_UNDEFINED)); +#endif + +// Should be able to use same technique as in RCCE_wait_until, i.e., should not need +// to copy out of MPB first. However, this function is not time critical + if(error=RCCE_get(val_array, flag.line_address, RCCE_LINE_SIZE, ID)) + return(RCCE_error_return(RCCE_debug_synch,error)); + *val = RCCE_bit_value(val_array, flag.location); + return(RCCE_SUCCESS); +} + +#else + +////////////////////////////////////////////////////////////////// +// LOCKLESS SYNCHRONIZATION USING ONE WHOLE CACHE LINE PER FLAG // +////////////////////////////////////////////////////////////////// + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_flag_alloc +//-------------------------------------------------------------------------------------- +// there is no internal structure to whole-cache-line flags; a new flag simply means a +// newly allocated line in the MPB +//-------------------------------------------------------------------------------------- +int RCCE_flag_alloc(RCCE_FLAG *flag) { + *flag = (RCCE_FLAG) RCCE_malloc(RCCE_LINE_SIZE); + if (!(*flag)) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED)); + else return(RCCE_SUCCESS); +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_flag_free +//-------------------------------------------------------------------------------------- +// there is no internal structure to whole-cache-line flags; deleting a flag simply +// means deallocating line in the MPB +//-------------------------------------------------------------------------------------- +int RCCE_flag_free(RCCE_FLAG *flag) { + if (!flag) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED)); + else RCCE_free((t_vcharp)(*flag)); + return(RCCE_SUCCESS); +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_flag_write +//-------------------------------------------------------------------------------------- +// This is the core flag manipulation routine. No locking required. We simple write the +// flag value into the first word of a local (private) buffer of the size of a cache +// line and copy it to the corresponding location in the NPB +// access while updating one of a line of flags. +//-------------------------------------------------------------------------------------- +int RCCE_flag_write(RCCE_FLAG *flag, RCCE_FLAG_STATUS val, int ID) { + int error; +#ifndef USE_FLAG_EXPERIMENTAL + volatile unsigned char val_array[RCCE_LINE_SIZE] = + // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + +#ifdef GORY + // check input parameters + if (!flag || !(*flag)) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED)); + if (error = (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET)) + return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED)); +#endif + +#ifndef USE_REVERTED_FLAGS + *(int *) val_array = val; +#else + *(int *) &val_array[RCCE_LINE_SIZE-sizeof(int)] = val; +#endif + + error = RCCE_put((t_vcharp)(*flag), val_array, RCCE_LINE_SIZE, ID); + +#else + //*flag = val; + volatile unsigned char value = val; + + error = RCCE_put_flag(*flag, &value, 1, ID); +#endif + + return(RCCE_error_return(RCCE_debug_synch,error)); +} + +#ifdef USE_TAGGED_FLAGS +int RCCE_flag_write_tagged(RCCE_FLAG *flag, RCCE_FLAG_STATUS val, int ID, void* tag, int len) { + + unsigned char val_array[RCCE_LINE_SIZE] = + // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + + int error, i, j; + +#ifndef USE_REVERTED_FLAGS + *(int *) val_array = val; +#else + *(int *) &val_array[RCCE_LINE_SIZE-sizeof(int)] = val; +#endif + + if(tag) + { + if( len > ( RCCE_LINE_SIZE - sizeof(int) ) ) len = RCCE_LINE_SIZE - sizeof(int); +#ifndef USE_REVERTED_FLAGS + memcpy_scc(&val_array[sizeof(int)], tag, len); +#else + memcpy_scc(&val_array[0], tag, len); +#endif + } + + error = RCCE_put((t_vcharp)(*flag), val_array, RCCE_LINE_SIZE, ID); + + return(RCCE_error_return(RCCE_debug_synch,error)); +} +#endif + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_flag_read +//-------------------------------------------------------------------------------------- +// This routine is rarely needed. We typically only read a flag when we're waiting for +// it to change value (function RCCE_wait_until). Reading requires copying the whole +// MPB cache line containing the flag to a private buffer and returning the first int. +//-------------------------------------------------------------------------------------- +int RCCE_flag_read(RCCE_FLAG flag, RCCE_FLAG_STATUS *val, int ID) { + int error; +#ifndef USE_FLAG_EXPERIMENTAL + volatile unsigned char val_array[RCCE_LINE_SIZE]; +#ifdef GORY + if (!flag) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED)); + if (!val) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_VAL_UNDEFINED)); +#endif + + if(error=RCCE_get(val_array, (t_vcharp)flag, RCCE_LINE_SIZE, ID)) + return(RCCE_error_return(RCCE_debug_synch,error)); + +#ifndef USE_REVERTED_FLAGS + if(val) *val = *(int *)val_array; +#else + if(val) *val = *(int *)&val_array[RCCE_LINE_SIZE-sizeof(int)]; +#endif + +#else + volatile unsigned char value; + + if(error=RCCE_get_flag(&value, (t_vcharp)flag, 1, ID)) + return(RCCE_error_return(RCCE_debug_synch,error)); + + if(val) *val = value; + +#endif + + return(RCCE_SUCCESS); +} +#ifdef USE_TAGGED_FLAGS +int RCCE_flag_read_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS *val, int ID, void *tag, int len) { + + unsigned char val_array[RCCE_LINE_SIZE]; + int error, i, j; + + if(error=RCCE_get(val_array, (t_vcharp)flag, RCCE_LINE_SIZE, ID)) + return(RCCE_error_return(RCCE_debug_synch,error)); + +#ifndef USE_REVERTED_FLAGS + if(val) *val = *(int *)val_array; +#else + if(val) *val = *(int *)&val_array[RCCE_LINE_SIZE-sizeof(int)]; +#endif + + if( (val) && (*val) && (tag) ) { + if( len > ( RCCE_LINE_SIZE - sizeof(int) ) ) len = RCCE_LINE_SIZE - sizeof(int); +#ifndef USE_REVERTED_FLAGS + memcpy_scc(tag, &val_array[sizeof(int)], len); +#else + memcpy_scc(tag, &val_array[0], len); +#endif + } + + return(RCCE_SUCCESS); +} +#endif +#endif + +#endif diff --git a/hermit/usr/ircce/RCCE_get.c b/hermit/usr/ircce/RCCE_get.c new file mode 100644 index 000000000..160e12512 --- /dev/null +++ b/hermit/usr/ircce/RCCE_get.c @@ -0,0 +1,169 @@ +//*************************************************************************************** +// Get data from communication buffer. +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-11-03] switched to SCC-optimized memcpy() functions in scc_memcpy.h: +// - memcpy_to_mpb() +// - memcpy_from_mpb() +// by Stefan Lankes, Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +#include "RCCE_lib.h" + +#ifdef COPPERRIDGE +#ifdef __hermit__ +#define memcpy_from_mpb memcpy +#else +#include "scc_memcpy.h" +#endif +#endif + +void *RCCE_memcpy_get(void *dest, const void *src, size_t count) +{ // function wrapper for external usage of improved memcpy()... +#ifdef COPPERRIDGE + return memcpy_from_mpb(dest, src, count); +#else + return memcpy(dest, src, count); +#endif +} + +#ifdef COPPERRIDGE +#define RCCE_memcpy_get(a,b,c) memcpy_from_mpb(a,b,c) +#else +#define RCCE_memcpy_get(a,b,c) memcpy(a,b,c) +#endif + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_get +//-------------------------------------------------------------------------------------- +// copy data from address "source" in the remote MPB to address "target" in either the +// local MPB, or in the calling UE's private memory. We do not test to see if a move +// into the calling UE's private memory stays within allocated memory * +//-------------------------------------------------------------------------------------- +int RCCE_get( + t_vcharp target, // target buffer, MPB or private memory + t_vcharp source, // source buffer, MPB + int num_bytes, // number of bytes to copy (must be multiple of cache line size + int ID // rank of source UE + ) { + +// printf("UE %d at top of RCCE_get\n", RCCE_IAM); fflush(NULL); + +#ifdef GORY + // we only need to do tests in GORY mode; in non-GORY mode ths function is never + // called by the user, but only be the library + int copy_mode; + + // check validity of parameters + if (!target) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_TARGET)); + if (!source) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_SOURCE)); + + if (ID<0 || ID>=RCCE_NP) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID)); + + if (num_bytes <0 || num_bytes%RCCE_LINE_SIZE!=0) + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_MESSAGE_LENGTH)); + + // determine if source data is in MPB; check using local buffer boundaries + if (source - RCCE_comm_buffer[RCCE_IAM] >=0 && + source+num_bytes - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<=0) + // shift source address to point to remote MPB + source = RCCE_comm_buffer[ID]+(source-RCCE_comm_buffer[RCCE_IAM]); + else return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_SOURCE)); + + // target can be either local MPB or private memory + if (target -RCCE_comm_buffer[RCCE_IAM] >= 0 && + target+num_bytes - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<=0) + copy_mode = BOTH_IN_COMM_BUFFER; + else + copy_mode = TARGET_IN_PRIVATE_MEMORY; + + // make sure that if the copy is between locations within the same MPB + // there is no overlap between source and target address ranges + if ( copy_mode == BOTH_IN_COMM_BUFFER) { + if (((source-target)>0 && (source+num_bytes-target)<0) || + ((target-source)>0 && (target+num_bytes-source)<0)) { + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_DATA_OVERLAP)); + } + } + + // ascertain that the start of the buffer is cache line aligned + int start_index = source-RCCE_comm_buffer[ID]; + if (start_index%RCCE_LINE_SIZE!=0) + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ALIGNMENT)); + + // only verify alignment of the target if it is in the MPB + if (copy_mode == BOTH_IN_COMM_BUFFER) { + start_index = target-RCCE_comm_buffer[ID]; + if (start_index%RCCE_LINE_SIZE!=0) + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ALIGNMENT)); + } +#else + // in non-GORY mode we only need to retain the MPB source shift; we + // already know the source is in the MPB, not private memory + source = RCCE_comm_buffer[ID]+(source-RCCE_comm_buffer[RCCE_IAM]); +#endif + +// printf("UE %d; target = %x, source = %x, nbytes= %d\n", RCCE_IAM, target, source, num_bytes); + fflush(NULL); + + // do the actual copy, making sure we copy fresh data +#ifdef _OPENMP + #pragma omp flush +#endif + RC_cache_invalidate(); + + RCCE_memcpy_get((void *)target, (void *)source, num_bytes); + + if (RCCE_debug_synch) + fprintf(STDERR,"UE %d get data: %d from address %X \n", RCCE_IAM,*target,source); + +// printf("UE %d finished the memcopy\n", RCCE_IAM); + + // flush data to make sure it is visible to all threads; cannot use a flush list + // because it concerns malloced space +#ifdef _OPENMP + #pragma omp flush +#endif + return(RCCE_SUCCESS); +} + +#ifdef USE_FLAG_EXPERIMENTAL +int RCCE_get_flag( + t_vcharp target, // target buffer, private memory + t_vcharp source, // source buffer, MPB ncm mapped + int num_bytes, // number of bytes to copy (must be multiple of cache line size + int ID // rank of source UE + ) { + + source = RCCE_flag_buffer[ID]+(source-RCCE_comm_buffer[RCCE_IAM]); + + //memcpy((void*)target, (void*)source, num_bytes); + + *target = *source; + + if (RCCE_debug_synch) + fprintf(STDERR,"UE %d get flag: %x from address %X \n", RCCE_IAM,*target,source); + + return(RCCE_SUCCESS); +} +#endif diff --git a/hermit/usr/ircce/RCCE_lib.h b/hermit/usr/ircce/RCCE_lib.h new file mode 100644 index 000000000..84d329762 --- /dev/null +++ b/hermit/usr/ircce/RCCE_lib.h @@ -0,0 +1,382 @@ +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef RCCE_LIB_H +#define RCCE_LIB_H +#include "RCCE.h" +#ifdef _OPENMP + #include +#endif +#include + +//#define AIR + +#undef USE_FLAG_EXPERIMENTAL +#undef USE_RCCE_COMM +#undef USE_FAT_BARRIER +#undef USE_PIPELINE_FLAGS +#undef USE_PROBE_FLAGS +#undef USE_TAGGED_FLAGS +#undef USE_TAGGED_FOR_SHORT +#undef USE_REVERTED_FLAGS +#undef USE_REMOTE_PUT_LOCAL_GET +#undef USE_PROBE_FLAGS_SHORTCUT +#define USE_SYNCH_FOR_ZERO_BYTE + +// override certain settings for SCC-MPICH: +//#include "scc-mpich-defs.h" + +// adjust settings automatically? +#undef AUTO_ADJUST_SETTINGS + +//////////////////////////////////////////////////////////////////////////////////////////////// +#ifdef AUTO_ADJUST_SETTINGS + +#ifdef SINGLEBITFLAGS +#ifdef USE_TAGGED_FLAGS +#warning TAGGED FLAGS CANNOT BE USED WITH SINGLEBITFLAGS! (#undef USE_TAGGED_FLAGS) +#undef USE_TAGGED_FLAGS +#undef USE_TAGGED_FOR_SHORT +#undef USE_PROBE_FLAGS_SHORTCUT +#endif +#ifdef USE_FAT_BARRIER +#warning FAT BARRIER CANNOT BE USED WITH SINGLEBITFLAGS! (#undef USE_FAT_BARRIER) +#undef USE_FAT_BARRIER +#endif +#endif + +#ifdef USE_PROBE_FLAGS_SHORTCUT +#ifndef USE_PROBE_FLAGS +#warning THE PROBE FLAGS SHORTCUT REQUIRES PROBE FLAGS! (#define USE_PROBE_FLAGS) +#define USE_PROBE_FLAGS +#endif +#ifndef USE_TAGGED_FOR_SHORT +#warning THE PROBE FLAGS SHORTCUT REQUIRES TAGGED FLAGS! (#define USE_TAGGED_FLAGS) +#define USE_TAGGED_FLAGS +#endif +#endif + +#ifdef USE_TAGGED_FOR_SHORT +#ifndef USE_TAGGED_FLAGS +#warning TAGGED SHORT MESSAGES REQUIRE TAGGED FLAGS! (#define USE_TAGGED_FLAGS) +#define USE_TAGGED_FLAGS +#endif +#endif + +#ifdef USE_REMOTE_PUT_LOCAL_GET +#ifndef USE_PROBE_FLAGS +#warning PROBING FOR MESSAGES IN REMOTE-PUT/LOCAL-GET NEEDS ADDITIONAL PROBE FLAGS! (#define USE_PROBE_FLAGS) +#define USE_PROBE_FLAGS +#endif +#endif + +#ifdef SCC_COUPLED_SYSTEMS +#ifndef USE_REVERTED_FLAGS +#ifdef USE_TAGGED_FLAGS +#warning COUPLED SYSTEMS REQUIRE REVERTED FLAGS WHEN USING TAGGED FLAGS! (#define USE_REVERTED_FLAGS) +#define USE_REVERTED_FLAGS +#endif +#endif +#ifndef USE_REMOTE_PUT_LOCAL_GET +#warning COUPLED SYSTEMS SHOULD USE REMOTE-PUT/LOCAL-GET! (#define USE_REMOTE_PUT_LOCAL_GET) +#define USE_REMOTE_PUT_LOCAL_GET +#endif +#else +#ifdef USE_PROBE_FLAGS +#warning NON-COUPLED SYSTEMS SHOULD NOT USE ADDITIONAL PROBE FLAGS! (#undef USE_PROBE_FLAGS) +#undef USE_PROBE_FLAGS +#endif +#endif + +#ifdef USE_PROBE_FLAGS +#ifdef USE_FAT_BARRIER +#warning PROBABLY TOO LITTLE MPB SPACE FOR USING FAT BARRIER WITH PROBE FLAGS ENABLED! (#undef USE_FAT_BARRIER) +#undef USE_FAT_BARRIER +#endif +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////// +#else // !AUTO_ADJUST_SETTINGS + +#ifdef SINGLEBITFLAGS +#ifdef USE_TAGGED_FLAGS +#error TAGGED FLAGS CANNOT BE USED WITH SINGLEBITFLAGS! (#undef USE_TAGGED_FLAGS) +#endif +#undef USE_TAGGED_FLAGS +#undef USE_TAGGED_FOR_SHORT +#undef USE_PROBE_FLAGS_SHORTCUT +#ifdef USE_FAT_BARRIER +#error FAT BARRIER CANNOT BE USED WITH SINGLEBITFLAGS! (#undef USE_FAT_BARRIER) +#endif +#endif + +#ifdef USE_PROBE_FLAGS_SHORTCUT +#ifndef USE_PROBE_FLAGS +#error THE PROBE FLAGS SHORTCUT REQUIRES PROBE FLAGS! (#define USE_PROBE_FLAGS) +#endif +#ifndef USE_TAGGED_FOR_SHORT +#error THE PROBE FLAGS SHORTCUT REQUIRES TAGGED FLAGS! (#define USE_TAGGED_FLAGS) +#endif +#endif + +#ifdef USE_TAGGED_FOR_SHORT +#ifndef USE_TAGGED_FLAGS +#error TAGGED SHORT MESSAGES REQUIRE TAGGED FLAGS! (#define USE_TAGGED_FLAGS) +#endif +#endif + +#ifdef USE_REMOTE_PUT_LOCAL_GET +#ifndef USE_PROBE_FLAGS +#warning PROBING FOR MESSAGES IN REMOTE-PUT/LOCAL-GET NEEDS ADDITIONAL PROBE FLAGS! (#define USE_PROBE_FLAGS) +#endif +#endif + +#ifdef SCC_COUPLED_SYSTEMS +#ifdef USE_TAGGED_FLAGS +#ifndef USE_REVERTED_FLAGS +#error COUPLED SYSTEMS REQUIRE REVERTED FLAGS WHEN USING TAGGED FLAGS! (#define USE_REVERTED_FLAGS) +#endif +#endif +#ifndef USE_REMOTE_PUT_LOCAL_GET +#warning COUPLED SYSTEMS SHOULD USE REMOTE-PUT/LOCAL-GET! (#define USE_REMOTE_PUT_LOCAL_GET) +#endif +#else +#ifdef USE_PROBE_FLAGS +#warning NON-COUPLED SYSTEMS SHOULD NOT USE ADDITIONAL PROBE FLAGS! (#undef USE_PROBE_FLAGS) +#endif +#endif + +#ifdef USE_PROBE_FLAGS +#ifdef USE_FAT_BARRIER +#warning PROBABLY TOO LITTLE MPB SPACE FOR USING FAT BARRIER WITH PROBE FLAGS ENABLED! (#undef USE_FAT_BARRIER) +#endif +#endif + + +#endif // !AUTO_ADJUST_SETTINGS +//////////////////////////////////////////////////////////////////////////////////////////////// + + +/* PAD32byte is used to compute a cacheline padded length of n (input) bytes */ +#define PAD32byte(n) ((n)%32==0 ? (n) : (n) + 32 - (n)%32) + +//#define BITSPERCHAR 8 + +#define BOTH_IN_COMM_BUFFER 12 +#define SOURCE_IN_PRIVATE_MEMORY 34 +#define TARGET_IN_PRIVATE_MEMORY 56 + +#ifdef SINGLEBITFLAGS +#define RCCE_FLAGS_PER_BYTE 8 +#else +#define RCCE_FLAGS_PER_BYTE 1 +#endif +#define RCCE_FLAGS_PER_LINE (RCCE_LINE_SIZE*RCCE_FLAGS_PER_BYTE) + +#define RCCE_SUM_INT (RCCE_SUM+(RCCE_NUM_OPS)*(RCCE_INT)) +#define RCCE_SUM_LONG (RCCE_SUM+(RCCE_NUM_OPS)*(RCCE_LONG)) +#define RCCE_SUM_FLOAT (RCCE_SUM+(RCCE_NUM_OPS)*(RCCE_FLOAT)) +#define RCCE_SUM_DOUBLE (RCCE_SUM+(RCCE_NUM_OPS)*(RCCE_DOUBLE)) +#define RCCE_MAX_INT (RCCE_MAX+(RCCE_NUM_OPS)*(RCCE_INT)) +#define RCCE_MAX_LONG (RCCE_MAX+(RCCE_NUM_OPS)*(RCCE_LONG)) +#define RCCE_MAX_FLOAT (RCCE_MAX+(RCCE_NUM_OPS)*(RCCE_FLOAT)) +#define RCCE_MAX_DOUBLE (RCCE_MAX+(RCCE_NUM_OPS)*(RCCE_DOUBLE)) +#define RCCE_MIN_INT (RCCE_MIN+(RCCE_NUM_OPS)*(RCCE_INT)) +#define RCCE_MIN_LONG (RCCE_MIN+(RCCE_NUM_OPS)*(RCCE_LONG)) +#define RCCE_MIN_FLOAT (RCCE_MIN+(RCCE_NUM_OPS)*(RCCE_FLOAT)) +#define RCCE_MIN_DOUBLE (RCCE_MIN+(RCCE_NUM_OPS)*(RCCE_DOUBLE)) +#define RCCE_PROD_INT (RCCE_PROD+(RCCE_NUM_OPS)*(RCCE_INT)) +#define RCCE_PROD_LONG (RCCE_PROD+(RCCE_NUM_OPS)*(RCCE_LONG)) +#define RCCE_PROD_FLOAT (RCCE_PROD+(RCCE_NUM_OPS)*(RCCE_FLOAT)) +#define RCCE_PROD_DOUBLE (RCCE_PROD+(RCCE_NUM_OPS)*(RCCE_DOUBLE)) + +#define RCCE_COMM_INITIALIZED 45328976 +#define RCCE_COMM_NOT_INITIALIZED -45328976 + +// auxiliary MPB pointer type +typedef volatile unsigned int* t_vintp; +// Also need dereferenced types +typedef volatile unsigned char t_vchar; +typedef volatile unsigned int t_vint; + +typedef struct rcce_block { + t_vcharp space; // pointer to space for data in block + size_t free_size; // actual free space in block (0 or whole block) + size_t size; // size of an allocated block + struct rcce_block *next; // pointer to next block in circular linked list +} RCCE_BLOCK; + +#if defined(SINGLEBITFLAGS) || defined(USE_BYTE_FLAGS) +typedef struct rcce_flag_line { + char flag[RCCE_FLAGS_PER_LINE]; + t_vcharp line_address; + int members; + struct rcce_flag_line *next; +} RCCE_FLAG_LINE; +#endif + + +typedef struct { + RCCE_BLOCK *tail; // "last" block in linked list of blocks +} RCCE_BLOCK_S; + +#ifdef AIR +#define FPGA_BASE 0xf9000000 +#define BACKOFF_MIN 8 +#define BACKOFF_MAX 256 +typedef volatile struct _RCCE_AIR { + int * counter; + int * init; +} RCCE_AIR; +#endif + +#ifndef GORY + extern RCCE_FLAG RCCE_sent_flag[RCCE_MAXNP]; + extern RCCE_FLAG RCCE_ready_flag[RCCE_MAXNP]; +#ifdef USE_PIPELINE_FLAGS + extern RCCE_FLAG RCCE_sent_flag_pipe[RCCE_MAXNP]; + extern RCCE_FLAG RCCE_ready_flag_pipe[RCCE_MAXNP]; +#endif +#ifdef USE_PROBE_FLAGS + extern RCCE_FLAG RCCE_probe_flag[RCCE_MAXNP]; +#endif + extern t_vcharp RCCE_buff_ptr; + extern size_t RCCE_chunk; + extern t_vcharp RCCE_flags_start; +#ifndef USE_REMOTE_PUT_LOCAL_GET + extern RCCE_SEND_REQUEST* RCCE_send_queue; + extern RCCE_RECV_REQUEST* RCCE_recv_queue[RCCE_MAXNP]; +#else + extern RCCE_SEND_REQUEST* RCCE_send_queue[RCCE_MAXNP]; + extern RCCE_RECV_REQUEST* RCCE_recv_queue; +#endif +#endif + +//#ifdef USE_FLAG_EXPERIMENTAL +extern t_vcharp RCCE_flag_buffer[RCCE_MAXNP]; +//#endif + +#ifndef __hermit__ +extern t_vcharp RCCE_fool_write_combine_buffer; +#endif +extern t_vcharp RCCE_comm_buffer[RCCE_MAXNP]; +extern int RCCE_NP; +extern int RCCE_BUFF_SIZE; +#ifndef COPPERRIDGE + extern omp_lock_t RCCE_corelock[RCCE_MAXNP]; + extern t_vchar RC_comm_buffer[RCCE_MAXNP*RCCE_BUFF_SIZE_MAX]; + extern t_vchar RC_shm_buffer[RCCE_SHM_SIZE_MAX]; +#endif +extern int RC_MY_COREID; +extern int RC_COREID[RCCE_MAXNP]; +extern double RC_REFCLOCKGHZ; +extern int RCCE_IAM; +extern int RCCE_debug_synch; +extern int RCCE_debug_comm; +extern int RCCE_debug_debug; +extern int RCCE_debug_RPC; +#ifdef SINGLEBITFLAGS + extern RCCE_FLAG_LINE RCCE_flags; + extern int WORDSIZE; + extern int LEFTMOSTBIT; + RCCE_FLAG_STATUS RCCE_bit_value(t_vcharp, int); + RCCE_FLAG_STATUS RCCE_flip_bit_value(t_vcharp, int); + int RCCE_write_bit_value(t_vcharp, int, RCCE_FLAG_STATUS); +#endif + +extern int RCCE_comm_init_val; + +void RCCE_malloc_init(t_vcharp, size_t); +void RCCE_shmalloc_init(t_vcharp, size_t); +int RCCE_qsort(char *, size_t, size_t, int (*)(const void*, const void*)); +int id_compare(const void *, const void *); +#if 0 +int RCCE_probe(RCCE_FLAG); +#endif +int RCCE_error_return(int, int); +#ifdef __hermit__ +#define RC_cache_invalidate() {} +#else +void RC_cache_invalidate(void); +#endif +int RCCE_acquire_treelock(RCCE_COMM*); +int RCCE_release_treelock(RCCE_COMM*); +int RCCE_TNS_barrier(RCCE_COMM*); +int RCCE_acquire_lock(int); +int RCCE_try_lock(int); +int RCCE_backoff_lock(int); +int RCCE_release_lock(int); +int RCCE_global_color(int, void *); +t_vcharp RC_COMM_BUFFER_START(int); +//#ifdef USE_FLAG_EXPERIMENTAL +t_vcharp RC_FLAG_BUFFER_START(int); +//#endif + +#ifndef GORY + t_vcharp RCCE_malloc(size_t); + t_vcharp RCCE_malloc_request(size_t, size_t *); + t_vcharp RCCE_palloc(size_t, int); + void RCCE_free(t_vcharp); + int RCCE_put(t_vcharp, t_vcharp, int, int); + int RCCE_get(t_vcharp, t_vcharp, int, int); + int RCCE_wait_until(RCCE_FLAG, RCCE_FLAG_STATUS); + int RCCE_test_flag(RCCE_FLAG, RCCE_FLAG_STATUS, int *); + int RCCE_flag_alloc(RCCE_FLAG *); + int RCCE_flag_free(RCCE_FLAG *); + int RCCE_flag_write(RCCE_FLAG *, RCCE_FLAG_STATUS, int); + int RCCE_flag_read(RCCE_FLAG, RCCE_FLAG_STATUS *, int); +#ifdef USE_FLAG_EXPERIMENTAL + int RCCE_put_flag(t_vcharp, t_vcharp, int, int); + int RCCE_get_flag(t_vcharp, t_vcharp, int, int); +#endif +#ifdef USE_TAGGED_FLAGS + int RCCE_flag_write_tagged(RCCE_FLAG *, RCCE_FLAG_STATUS, int, void*, int); + int RCCE_flag_read_tagged(RCCE_FLAG, RCCE_FLAG_STATUS *, int, void*, int); + int RCCE_wait_tagged(RCCE_FLAG, RCCE_FLAG_STATUS, void *, int); + int RCCE_test_tagged(RCCE_FLAG, RCCE_FLAG_STATUS, int *, void *, int); +#endif +#endif + +#ifdef _OPENMP + #pragma omp threadprivate (RC_COREID, RC_MY_COREID, RC_REFCLOCKGHZ) + #pragma omp threadprivate (RCCE_comm_buffer) + #pragma omp threadprivate (RCCE_BUFF_SIZE) + #pragma omp threadprivate (RCCE_IAM, RCCE_NP) + #pragma omp threadprivate (RCCE_debug_synch, RCCE_debug_comm, RCCE_debug_debug) + #ifdef SINGLEBITFLAGS + #pragma omp threadprivate (RCCE_flags, WORDSIZE, LEFTMOSTBIT) + #endif + #ifndef GORY + #pragma omp threadprivate (RCCE_send_queue, RCCE_recv_queue) + #pragma omp threadprivate (RCCE_sent_flag, RCCE_ready_flag) +#ifdef USE_PROBE_FLAGS + #pragma omp threadprivate (RCCE_probe_flag) +#endif +#ifdef USE_PIPELINE_FLAGS + #pragma omp threadprivate (RCCE_sent_flag_pipe, RCCE_ready_flag_pipe) +#endif + #pragma omp threadprivate (RCCE_buff_ptr, RCCE_chunk) + #pragma omp threadprivate (RCCE_flags_start) + #endif +#endif + +#ifdef SHMADD +unsigned int getCOREID(); +unsigned int readTILEID(); +unsigned int readLUT(unsigned int); +void writeLUT(unsigned int, unsigned int); +#endif + +#endif diff --git a/hermit/usr/ircce/RCCE_malloc.c b/hermit/usr/ircce/RCCE_malloc.c new file mode 100644 index 000000000..52f40fcdb --- /dev/null +++ b/hermit/usr/ircce/RCCE_malloc.c @@ -0,0 +1,255 @@ +//*************************************************************************************** +// MPB memory allocation routines. +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "RCCE_lib.h" + +//...................................................................................... +// GLOBAL VARIABLES USED BY THE LIBRARY +//...................................................................................... +static RCCE_BLOCK_S RCCE_space; // data structure used for trscking MPB memory blocks +static RCCE_BLOCK_S *RCCE_spacep; // pointer to RCCE_space +#ifdef _OPENMP +#pragma omp threadprivate (RCCE_space, RCCE_spacep) +#endif + +// END GLOBAL VARIABLES USED BY THE LIBRARY +//...................................................................................... + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_malloc_init +//-------------------------------------------------------------------------------------- +// initialize memory allocator +//-------------------------------------------------------------------------------------- +void RCCE_malloc_init( + t_vcharp mem, // pointer to MPB space that is to be managed by allocator + size_t size // size (bytes) of managed space +) { + +#ifndef GORY + + // in the simplified API MPB memory allocation merely uses running pointers + RCCE_flags_start = mem; + RCCE_chunk = size; + RCCE_buff_ptr = mem; + +#else + + // create one block containing all memory for truly dynamic memory allocator + RCCE_spacep = &RCCE_space; + RCCE_spacep->tail = (RCCE_BLOCK *) malloc(sizeof(RCCE_BLOCK)); + RCCE_spacep->tail->free_size = size; + RCCE_spacep->tail->space = mem; + /* make a circular list by connecting tail to itself */ + RCCE_spacep->tail->next = RCCE_spacep->tail; + +#endif +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_malloc +//-------------------------------------------------------------------------------------- +// Allocate memory inside MPB. In restricted mode we only use it to allocate new +// flags prompted by the creation of new communicators. Since communicators are never +// deleted, we do not need to deallocate MPB memory, so we can simply keep running +// pointers of where the next flag will be stored, and where payload data can go. In +// GORY mode we need to support fully dynamic memory allocation and deallocation. +//-------------------------------------------------------------------------------------- +t_vcharp RCCE_malloc( + size_t size // requested space +) { + + t_vcharp result; + +#ifndef GORY + + // new flag takes exactly one cache line, whether using single bit flags are not + if (size != RCCE_LINE_SIZE) { + fprintf(stderr, "ERROR in RCCE_malloc(): size != RCCE_LINE_SIZE!\n"); + exit(-1); + return(0); + } + + // if chunk size becomes zero, we have allocated too many flags + if (!(RCCE_chunk-RCCE_LINE_SIZE)) { + fprintf(stderr, "ERROR in RCCE_malloc(): No more MPB space left!\n"); + exit(-1); + return(0); + } + + result = RCCE_flags_start; + + // reduce maximum size of message payload chunk + RCCE_chunk -= RCCE_LINE_SIZE; + + // move running pointer to next available flags line + RCCE_flags_start += RCCE_LINE_SIZE; + + // move running pointer to new start of payload data area + RCCE_buff_ptr += RCCE_LINE_SIZE; + return(result); + +#else + + // simple memory allocator, loosely based on public domain code developed by + // Michael B. Allen and published on "The Scripts--IT /Developers Network". + // Approach: + // - maintain linked list of pointers to memory. A block is either completely + // malloced (free_size = 0), or completely free (free_size > 0). + // The space field always points to the beginning of the block + // - malloc: traverse linked list for first block that has enough space + // - free: Check if pointer exists. If yes, check if the new block should be + // merged with neighbors. Could be one or two neighbors. + + RCCE_BLOCK *b1, *b2, *b3; // running pointers for blocks + + if (size==0 || size%RCCE_LINE_SIZE!=0) return 0; + + // always first check if the tail block has enough space, because that + // is the most likely. If it does and it is exactly enough, we still + // create a new block that will be the new tail, whose free space is + // zero. This acts as a marker of where free space of predecessor ends + b1 = RCCE_spacep->tail; + if (b1->free_size >= size) { + // need to insert new block; new order is: b1->b2 (= new tail) + b2 = (RCCE_BLOCK *) malloc(sizeof(RCCE_BLOCK)); + b2->next = b1->next; + b1->next = b2; + b2->free_size = b1->free_size-size; + b2->space = b1->space + size; + b1->free_size = 0; + // need to update the tail + RCCE_spacep->tail = b2; + return(b1->space); + } + + // tail didn't have enough space; loop over whole list from beginning + while (b1->next->free_size < size) { + if (b1->next == RCCE_spacep->tail) { + return NULL; // we came full circle + } + b1 = b1->next; + } + + b2 = b1->next; + if (b2->free_size > size) { // split block; new block order: b1->b2->b3 + b3 = (RCCE_BLOCK *) malloc(sizeof(RCCE_BLOCK)); + b3->next = b2->next; // reconnect pointers to add block b3 + b2->next = b3; // " " " " " " + b3->free_size = b2->free_size - size; // b3 gets remainder free space + b3->space = b2->space + size; // need to shift space pointer + } + b2->free_size = 0; // block b2 is completely used + return (b2->space); +#endif +} + + +t_vcharp RCCE_palloc( + size_t size, // requested space + int CoreID // location +) { + + t_vcharp result = RCCE_malloc(size); + + if (result) + result = RCCE_comm_buffer[CoreID]+(result-RCCE_comm_buffer[RCCE_IAM]); + + return result; +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_free +//-------------------------------------------------------------------------------------- +// Deallocate memory in MPB; only used in GORY mode +//-------------------------------------------------------------------------------------- +void RCCE_free( + t_vcharp ptr // pointer to data to be freed + ) { + + RCCE_BLOCK *b1, *b2, *b3; // running block pointers + int j1, j2; // booleans determining merging of blocks + + // loop over whole list from the beginning until we locate space ptr + b1 = RCCE_spacep->tail; + while (b1->next->space != ptr && b1->next != RCCE_spacep->tail) { + b1 = b1->next; + } + + // b2 is target block whose space must be freed + b2 = b1->next; + // tail either has zero free space, or hasn't been malloc'ed + if (b2 == RCCE_spacep->tail) return; + + // reset free space for target block (entire block) + b3 = b2->next; + b2->free_size = b3->space - b2->space; + + // determine with what non-empty blocks the target block can be merged + j1 = (b1->free_size>0 && b1!=RCCE_spacep->tail); // predecessor block + j2 = (b3->free_size>0 || b3==RCCE_spacep->tail); // successor block + + if (j1) { + if (j2) { // splice all three blocks together: (b1,b2,b3) into b1 + b1->next = b3->next; + b1->free_size += b3->free_size + b2->free_size; + if (b3==RCCE_spacep->tail) RCCE_spacep->tail = b1; + free(b3); + } + else { // only merge (b1,b2) into b1 + b1->free_size += b2->free_size; + b1->next = b3; + } + free(b2); + } + else { + if (j2) { // only merge (b2,b3) into b2 + b2->next = b3->next; + b2->free_size += b3->free_size; + if (b3==RCCE_spacep->tail) RCCE_spacep->tail = b2; + free(b3); + } + } +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_malloc_request +//-------------------------------------------------------------------------------------- +// this function tries to return a (padded) amount of space in the MPB of size +// "size" bytes. If not available, the function keeps halving space until it fits +//-------------------------------------------------------------------------------------- +t_vcharp RCCE_malloc_request( + size_t size, // requested number of bytes + size_t *chunk // number of bytes of space returned + ) { + + t_vcharp combuf; + + combuf = 0; + *chunk = PAD32byte(size); + while (!combuf && *chunk >= RCCE_LINE_SIZE) { + combuf = RCCE_malloc(*chunk); + if (!combuf) *chunk = PAD32byte(*chunk/2); + } + return (combuf); +} diff --git a/hermit/usr/ircce/RCCE_put.c b/hermit/usr/ircce/RCCE_put.c new file mode 100644 index 000000000..fdc97c999 --- /dev/null +++ b/hermit/usr/ircce/RCCE_put.c @@ -0,0 +1,165 @@ +//*************************************************************************************** +// Put data into communication buffer. +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-11-03] switched to SCC-optimized memcpy() functions in scc_memcpy.h: +// - memcpy_to_mpb() +// - memcpy_from_mpb() +// by Stefan Lankes, Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +#include "RCCE_lib.h" + +#if defined(COPPERRIDGE) && !defined(__hermit__) +#include "scc_memcpy.h" +#endif + +void *RCCE_memcpy_put(void *dest, const void *src, size_t count) +{ // function wrapper for external usage of improved memcpy()... +#if defined(COPPERRIDGE) && !defined(__hermit__) + return memcpy_to_mpb(dest, src, count); +#else + return memcpy(dest, src, count); +#endif +} + +#if defined(COPPERRIDGE) && !defined(__hermit__) +#define RCCE_memcpy_put(a,b,c) memcpy_to_mpb(a, b, c) +#else +#define RCCE_memcpy_put(a,b,c) memcpy(a, b, c) +#endif + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_put +//-------------------------------------------------------------------------------------- +// copy data from address "source" in the local MPB or the calling UE's private memory +// to address "target" in the remote MPB. We do not test to see if a move from the +// calling UE's private memory stays within allocated memory +//-------------------------------------------------------------------------------------- +int RCCE_put( + t_vcharp target, // target buffer, MPB + t_vcharp source, // source buffer, MPB or private memory + int num_bytes, + int ID + ) { + +#ifdef GORY + // we only need to do tests in GORY mode; in non-GORY mode ths function is never + // called by the user, but only be the library + int copy_mode; + + // check validity of parameters + if (!target) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_TARGET)); + if (!source) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_SOURCE)); + if (ID<0 || + ID>=RCCE_NP) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID)); + if (num_bytes < 0 || num_bytes%RCCE_LINE_SIZE!=0) + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_MESSAGE_LENGTH)); + // determine if target data is in MPB; check using local buffer boundaries + if (target - RCCE_comm_buffer[RCCE_IAM]>=0 && + target+num_bytes - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<=0) + // shift target address to point to remote MPB + target = RCCE_comm_buffer[ID]+(target-RCCE_comm_buffer[RCCE_IAM]); + else return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_TARGET)); + + // source can be either local MPB or private memory + if (source - RCCE_comm_buffer[RCCE_IAM] >= 0 && + source+num_bytes - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<=0) + copy_mode = BOTH_IN_COMM_BUFFER; + else + copy_mode = SOURCE_IN_PRIVATE_MEMORY; + + // make sure that if the copy is between locations within the same MPB + // there is no overlap between source and target address ranges + if ( copy_mode == BOTH_IN_COMM_BUFFER) { + if (((source-target)>0 && (source+num_bytes-target)<0) || + ((target-source)>0 && (target+num_bytes-source)<0)) { + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_DATA_OVERLAP)); + } + } + + // ascertain that the start of the buffer is cache line aligned + int start_index = target-RCCE_comm_buffer[ID]; + if (start_index%RCCE_LINE_SIZE!=0) + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ALIGNMENT)); + + // only verify alignment of the target if it is in the MPB + if (copy_mode == BOTH_IN_COMM_BUFFER) { + start_index = source-RCCE_comm_buffer[ID]; + if (start_index%RCCE_LINE_SIZE!=0) + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ALIGNMENT)); + } +#else + // in non-GORY mode we only need to retain the MPB target shift; we + // already know the target is in the MPB, not private memory + target = RCCE_comm_buffer[ID]+(target-RCCE_comm_buffer[RCCE_IAM]); +#endif + + // make sure that any data that has been put in our MPB by another UE is visible +#ifdef _OPENMP + #pragma omp flush +#endif + + // do the actual copy + RC_cache_invalidate(); + + RCCE_memcpy_put((void *)target, (void *)source, num_bytes); + + // flush data to make it visible to all threads; cannot use flush list because it + // concerns malloced space +#ifdef _OPENMP + #pragma omp flush +#endif + +#ifdef USE_FLAG_EXPERIMENTAL + if(RCCE_debug_synch) + fprintf(STDERR,"UE %d put data: %d address %X \n", RCCE_IAM,*source,target); +#endif + + return(RCCE_SUCCESS); +} + +#ifdef USE_FLAG_EXPERIMENTAL +int RCCE_put_flag( + t_vcharp target, // target buffer, MPB + t_vcharp source, // source buffer, MPB or private memory + int num_bytes, + int ID + ) { + + target = RCCE_flag_buffer[ID]+(target-RCCE_comm_buffer[RCCE_IAM]); + + if (RCCE_debug_synch) + fprintf(STDERR,"UE %d put flag: %x address %X \n", RCCE_IAM,*source,target); + + //if( num_bytes == 1 ) { + *target = *source; + return(RCCE_SUCCESS); + //} + + //RCCE_memcpy_put((void *)target, (void *)source, 1); + + //*RCCE_fool_write_combine_buffer = 1; + //return(RCCE_ERROR_DEBUG_FLAG); +} +#endif diff --git a/hermit/usr/ircce/RCCE_qsort.c b/hermit/usr/ircce/RCCE_qsort.c new file mode 100644 index 000000000..f436fccf4 --- /dev/null +++ b/hermit/usr/ircce/RCCE_qsort.c @@ -0,0 +1,131 @@ +//*************************************************************************************** +// Sorting-related routines +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//-------------------------------------------------------------------------------------- +// FUNCTION: id_compare +//-------------------------------------------------------------------------------------- +// comparison function used in routine to sort core IDs +//-------------------------------------------------------------------------------------- +int id_compare( + const void *e1, // first element to be compared + const void *e2 // second element to be compared + ) { + int v1 = *(int *)e1; + int v2 = *(int *)e2; + return(v1v2) ? 1 : 0; +} + +// qsort -- qsort interface implemented by faster quicksort. +// J. L. Bentley and M. D. McIlroy, SPE 23 (1993) 1249-1265. +// Copyright 1993, John Wiley. + + /*assume sizeof(long) is a power of 2 */ +#define SWAPINIT(a, es) swaptype = \ + (a-(char*)0 | es) % sizeof(long) ? 2 : es > sizeof(long); +#define swapcode(TYPE, parmi, parmj, n) { \ + register TYPE *pi = (TYPE *) (parmi); \ + register TYPE *pj = (TYPE *) (parmj); \ + do { \ + register TYPE t = *pi; \ + *pi++ = *pj; \ + *pj++ = t; \ + } while ((n -= sizeof(TYPE)) > 0); \ +} +#include +static void swapfunc(char *a, char *b, size_t n, int swaptype) +{ if (swaptype <= 1) swapcode(long, a, b, n) + else swapcode(char, a, b, n) +} +#define swap(a, b) \ + if (swaptype == 0) { \ + t = *(long*)(a); \ + *(long*)(a) = *(long*)(b); \ + *(long*)(b) = t; \ + } else \ + swapfunc(a, b, es, swaptype) + +#define PVINIT(pv, pm) \ + if (swaptype != 0) { pv = a; swap(pv, pm); } \ + else { pv = (char*)&v; *(long*)pv = *(long*)pm; } + +#define vecswap(a, b, n) if (n > 0) swapfunc(a, b, n, swaptype) + +#define min(x, y) ((x)<=(y) ? (x) : (y)) + +static char *med3(char *a, char *b, char *c, int (*cmp)(const void*, const void*)) +{ return cmp(a, b) < 0 ? + (cmp(b, c) < 0 ? b : cmp(a, c) < 0 ? c : a) + : (cmp(b, c) > 0 ? b : cmp(a, c) > 0 ? c : a); +} + +void RCCE_qsort(char *a, size_t n, size_t es, int (*cmp)(const void*, const void*)) +{ + char *pa, *pb, *pc, *pd, *pl, *pm, *pn, *pv; + int r, swaptype; + long t, v; + size_t s; + + SWAPINIT(a, es); + if (n < 7) { /* Insertion sort on smallest arrays */ + for (pm = a + es; pm < a + n*es; pm += es) + for (pl = pm; pl > a && cmp(pl-es, pl) > 0; pl -= es) + swap(pl, pl-es); + return; + } + pm = a + (n/2)*es; /* Small arrays, middle element */ + if (n > 7) { + pl = a; + pn = a + (n-1)*es; + if (n > 40) { /* Big arrays, pseudomedian of 9 */ + s = (n/8)*es; + pl = med3(pl, pl+s, pl+2*s, cmp); + pm = med3(pm-s, pm, pm+s, cmp); + pn = med3(pn-2*s, pn-s, pn, cmp); + } + pm = med3(pl, pm, pn, cmp); /* Mid-size, med of 3 */ + } + PVINIT(pv, pm); /* pv points to partition value */ + pa = pb = a; + pc = pd = a + (n-1)*es; + for (;;) { + while (pb <= pc && (r = cmp(pb, pv)) <= 0) { + if (r == 0) { swap(pa, pb); pa += es; } + pb += es; + } + while (pb <= pc && (r = cmp(pc, pv)) >= 0) { + if (r == 0) { swap(pc, pd); pd -= es; } + pc -= es; + } + if (pb > pc) break; + swap(pb, pc); + pb += es; + pc -= es; + } + pn = a + n*es; + s = min(pa-a, pb-pa ); vecswap(a, pb-s, s); + s = min(pd-pc, pn-pd-es); vecswap(pb, pn-s, s); + if ((s = pb-pa) > es) RCCE_qsort(a, s/es, es, cmp); + if ((s = pd-pc) > es) RCCE_qsort(pn-s, s/es, es, cmp); +} + diff --git a/hermit/usr/ircce/RCCE_recv.c b/hermit/usr/ircce/RCCE_recv.c new file mode 100644 index 000000000..32fc3d41c --- /dev/null +++ b/hermit/usr/ircce/RCCE_recv.c @@ -0,0 +1,1350 @@ +//*************************************************************************************** +// Synchronized receive routines. +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-10-25] added support for non-blocking send/recv operations +// - RCCE_isend(), ..._test(), ..._wait(), ..._push() +// - RCCE_irecv(), ..._test(), ..._wait(), ..._push() +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +// [2012-09-10] added support for "tagged" flags +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +#include "RCCE_lib.h" +#if defined(COPPERRIDGE) && !defined(__hermit__) +#include "scc_memcpy.h" +#else +#define memcpy_scc memcpy +#endif + +#include +#include + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_recv_general +//-------------------------------------------------------------------------------------- +// Synchronized receive function (gory and non-gory mode) +//-------------------------------------------------------------------------------------- +static int RCCE_recv_general( + char *privbuf, // destination buffer in local private memory (receive buffer) + t_vcharp combuf, // intermediate buffer in MPB + size_t chunk, // size of MPB available for this message (bytes) + RCCE_FLAG *ready, // flag indicating whether receiver is ready + RCCE_FLAG *sent, // flag indicating whether message has been sent by source + size_t size, // size of message (bytes) + int source, // UE that sent the message + int *test, // if 1 upon entry, do nonblocking receive; if message available + // set to 1, otherwise to 0 + int copy, // set to 0 for cancel function + int pipe, // use pipelining? + int mcast, // multicast? + void* tag, // additional tag? + int len, // length of additional tag + RCCE_FLAG *probe // flag for probing for incoming messages + ) { + + char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size + size_t wsize, // offset within receive buffer when pulling in "chunk" bytes + remainder, // bytes remaining to be received + nbytes; // number of bytes to be received in single RCCE_get call + int first_test; // only use first chunk to determine if message has been received yet + char *bufptr; // running pointer inside privbuf for current location + RCCE_FLAG_STATUS flag; + + first_test = 1; + +#ifdef USE_REMOTE_PUT_LOCAL_GET + if(mcast) return(RCCE_error_return(1, RCCE_ERROR_NO_MULTICAST_SUPPORT)); +#endif + + if(probe) { +#ifdef USE_TAGGED_FLAGS + RCCE_wait_tagged(*probe, RCCE_FLAG_SET, tag, len); +#else + RCCE_wait_until(*probe, RCCE_FLAG_SET); +#endif + RCCE_flag_write(probe, RCCE_FLAG_UNSET, RCCE_IAM); + } + +#ifdef USE_SYNCH_FOR_ZERO_BYTE + // synchronize even in case of zero byte messages: + if(size == 0) { +#ifdef USE_REMOTE_PUT_LOCAL_GET + RCCE_flag_write(ready, RCCE_FLAG_SET, source); +#ifdef USE_TAGGED_FLAGS + if(!probe) + RCCE_wait_tagged(*sent, RCCE_FLAG_SET, tag, len); + else +#endif + RCCE_wait_until(*sent, RCCE_FLAG_SET); + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); +#else // LOCAL PUT / REMOTE GET: (standard) +#ifdef USE_TAGGED_FLAGS + if(!probe) + RCCE_wait_tagged(*sent, RCCE_FLAG_SET, tag, len); + else +#endif + RCCE_wait_until(*sent, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_SET, source); +#endif // !USE_REMOTE_PUT_LOCAL_GET + return(RCCE_SUCCESS); + } +#endif // USE_SYNCH_FOR_ZERO_BYTE + +#ifdef USE_REMOTE_PUT_LOCAL_GET + + first_test = 0; /* force blocking function, does not work for now */ + *test = 1; + + // tell the source I am ready to receive + RCCE_flag_write(ready, RCCE_FLAG_SET, source); + + if(!pipe) { + // receive data in units of available chunk size of MPB + for (wsize=0; wsize< (size/chunk)*chunk; wsize+=chunk) { + bufptr = privbuf + wsize; + nbytes = chunk; + // if function is called in test mode, check if first chunk has been sent already. + // If so, proceed as usual. If not, exit immediately + if (*test && first_test) { + first_test = 0; + RCCE_test_flag(*sent, RCCE_FLAG_SET, test); + if (!(*test)) return(RCCE_SUCCESS); + } + + if (wsize != 0) + RCCE_flag_write(ready, RCCE_FLAG_SET, source); + +#ifdef USE_TAGGED_FLAGS + if( (wsize == 0) && (!probe) ) + RCCE_wait_tagged(*sent, RCCE_FLAG_SET, tag, len); + else +#endif + RCCE_wait_until(*sent, RCCE_FLAG_SET); + + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + + // copy data from local MPB space to private memory + if(copy) RCCE_get((t_vcharp)bufptr, combuf, nbytes, RCCE_IAM); + } + } + +#else // LOCAL PUT / REMOTE GET: (standard) + + if(!pipe) { + // receive data in units of available chunk size of MPB + for (wsize=0; wsize< (size/chunk)*chunk; wsize+=chunk) { + bufptr = privbuf + wsize; + nbytes = chunk; + // if function is called in test mode, check if first chunk has been sent already. + // If so, proceed as usual. If not, exit immediately + if (*test && first_test) { + first_test = 0; + RCCE_test_flag(*sent, RCCE_FLAG_SET, test); + if (!(*test)) return(RCCE_SUCCESS); + } + if(!mcast) + { +#ifdef USE_TAGGED_FLAGS + if( (wsize == 0) && (!probe) ) + RCCE_wait_tagged(*sent, RCCE_FLAG_SET, tag, len); + else +#endif + RCCE_wait_until(*sent, RCCE_FLAG_SET); + + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + } + else { + RCCE_TNS_barrier(&RCCE_COMM_WORLD); + } + // copy data from remote MPB space to private memory + if(copy) RCCE_get((t_vcharp)bufptr, combuf, nbytes, source); + + if(!mcast) { + // tell the source I have moved data out of its comm buffer + RCCE_flag_write(ready, RCCE_FLAG_SET, source); + } + else { + RCCE_TNS_barrier(&RCCE_COMM_WORLD); + } + } + } +#endif // !USE_REMOTE_PUT_LOCAL_GET + +#ifdef USE_REMOTE_PUT_LOCAL_GET + + else // if(!pipe) -> if(pipe) + { + // pipelined version of send/recv: + + size_t subchunk1, subchunk2; + + for (wsize=0; wsize < (size/chunk)*chunk; wsize+=chunk) { + + if (*test && first_test) { + first_test = 0; + RCCE_test_flag(*sent, RCCE_FLAG_SET, test); + if (!(*test)) return(RCCE_SUCCESS); + } + + if(wsize == 0) { + // allign sub-chunks to cache line granularity: + subchunk1 = ( (chunk / 2) / RCCE_LINE_SIZE ) * RCCE_LINE_SIZE; + subchunk2 = chunk - subchunk1; + } + + bufptr = privbuf + wsize; + nbytes = subchunk1; + +#ifdef USE_TAGGED_FLAGS + if( (wsize == 0) && (!probe) ) + RCCE_wait_tagged(*sent, RCCE_FLAG_SET, tag, len); + else +#endif + RCCE_wait_until(*sent, RCCE_FLAG_SET); + + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + + RCCE_flag_write(ready, RCCE_FLAG_SET, source); + + // copy data chunk 1 from local MPB space to private memory + if(copy) RCCE_get((t_vcharp)bufptr, combuf, nbytes, RCCE_IAM); + + bufptr = privbuf + wsize + subchunk1; + nbytes = subchunk2; + + RCCE_wait_until(*sent, RCCE_FLAG_SET); + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + + if (wsize + chunk < (size/chunk)*chunk) + RCCE_flag_write(ready, RCCE_FLAG_SET, source); + + // copy data chunk 2 from local MPB space to private memory + if(copy) RCCE_get((t_vcharp)bufptr, combuf + subchunk1, nbytes, RCCE_IAM); + } + + } // if(pipe) + +#else // LOCAL PUT / REMOTE GET: (standard) + + else // if(!pipe) -> if(pipe) + { + // pipelined version of send/recv: + + size_t subchunk1, subchunk2; + + for (wsize=0; wsize < (size/chunk)*chunk; wsize+=chunk) { + + if (*test && first_test) { + first_test = 0; + RCCE_test_flag(*sent, RCCE_FLAG_SET, test); + if (!(*test)) return(RCCE_SUCCESS); + } + + if(wsize == 0) { + // allign sub-chunks to cache line granularity: + subchunk1 = ( (chunk / 2) / RCCE_LINE_SIZE ) * RCCE_LINE_SIZE; + subchunk2 = chunk - subchunk1; + } + + bufptr = privbuf + wsize; + nbytes = subchunk1; + +#ifdef USE_TAGGED_FLAGS + if( (wsize == 0) && (!probe) ) + RCCE_wait_tagged(*sent, RCCE_FLAG_SET, tag, len); + else +#endif + RCCE_wait_until(*sent, RCCE_FLAG_SET); + + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + RCCE_flag_write(ready, RCCE_FLAG_SET, source); + + // copy data chunk 1 from remote MPB space to private memory + if(copy) RCCE_get((t_vcharp)bufptr, combuf, nbytes, source); + + bufptr = privbuf + wsize + subchunk1; + nbytes = subchunk2; + + RCCE_wait_until(*sent, RCCE_FLAG_SET); + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + RCCE_flag_write(ready, RCCE_FLAG_SET, source); + + // copy data chunk 2 from remote MPB space to private memory + if(copy) RCCE_get((t_vcharp)bufptr, combuf + subchunk1, nbytes, source); + } + + } // if(pipe) + +#endif // !USE_REMOTE_PUT_LOCAL_GET + + remainder = size%chunk; + // if nothing is left over, we are done + if (!remainder) return(RCCE_SUCCESS); + + // receive remainder of data--whole cache lines + bufptr = privbuf + (size/chunk)*chunk; + nbytes = remainder - remainder%RCCE_LINE_SIZE; + + if (nbytes) { + + // if function is called in test mode, check if first chunk has been sent already. + // If so, proceed as usual. If not, exit immediately + if (*test && first_test) { + first_test = 0; + RCCE_test_flag(*sent, RCCE_FLAG_SET, test); + if (!(*test)) return(RCCE_SUCCESS); + } + +#ifdef USE_REMOTE_PUT_LOCAL_GET + + if (wsize != 0) + RCCE_flag_write(ready, RCCE_FLAG_SET, source); + +#ifdef USE_TAGGED_FLAGS + if( (wsize == 0) && (!probe) ) + RCCE_wait_tagged(*sent, RCCE_FLAG_SET, tag, len); + else +#endif + RCCE_wait_until(*sent, RCCE_FLAG_SET); + + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + + // copy data from local MPB space to private memory + if(copy) RCCE_get((t_vcharp)bufptr, combuf, nbytes, RCCE_IAM); + wsize += nbytes; + +#else // LOCAL PUT / REMOTE GET: (standard) + + if(!mcast) { +#ifdef USE_TAGGED_FLAGS + if( (wsize == 0) && (!probe) ) + RCCE_wait_tagged(*sent, RCCE_FLAG_SET, tag, len); + else +#endif + RCCE_wait_until(*sent, RCCE_FLAG_SET); + + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + } + else { + RCCE_TNS_barrier(&RCCE_COMM_WORLD); + } + + // copy data from remote MPB space to private memory + if(copy) RCCE_get((t_vcharp)bufptr, combuf, nbytes, source); + + if(!mcast) { + // tell the source I have moved data out of its comm buffer + RCCE_flag_write(ready, RCCE_FLAG_SET, source); + } + else { + RCCE_TNS_barrier(&RCCE_COMM_WORLD); + } +#endif // !USE_REMOTE_PUT_LOCAL_GET + + } // if (nbytes) + + remainder = remainder%RCCE_LINE_SIZE; + if (!remainder) return(RCCE_SUCCESS); + + // remainder is less than cache line. This must be copied into appropriately sized + // intermediate space before exact number of bytes get copied to the final destination + bufptr = privbuf + (size/chunk)*chunk + nbytes; + nbytes = RCCE_LINE_SIZE; + + // if function is called in test mode, check if first chunk has been sent already. + // If so, proceed as usual. If not, exit immediately + if (*test && first_test) { + first_test = 0; + RCCE_test_flag(*sent, RCCE_FLAG_SET, test); + if (!(*test)) return(RCCE_SUCCESS); + } + +#ifdef USE_REMOTE_PUT_LOCAL_GET + + if (wsize != 0) + RCCE_flag_write(ready, RCCE_FLAG_SET, source); + +#ifdef USE_TAGGED_FLAGS + if( (wsize == 0) && (!probe) ) + RCCE_wait_tagged(*sent, RCCE_FLAG_SET, tag, len); + else +#endif + RCCE_wait_until(*sent, RCCE_FLAG_SET); + + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + + // copy data from local MPB space to private memory + if(copy) { + RCCE_get((t_vcharp)padline, combuf, nbytes, RCCE_IAM); +#ifdef COPPERRIDGE + memcpy_scc(bufptr,padline,remainder); +#else + memcpy(bufptr,padline,remainder); +#endif + } + +#else // LOCAL PUT / REMOTE GET: (standard) + + if(!mcast) { +#ifdef USE_TAGGED_FLAGS + if( (wsize == 0) && (!probe) ) + RCCE_wait_tagged(*sent, RCCE_FLAG_SET, tag, len); + else +#endif + RCCE_wait_until(*sent, RCCE_FLAG_SET); + + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + } + else { + RCCE_TNS_barrier(&RCCE_COMM_WORLD); + } + + // copy data from remote MPB space to private memory + if(copy) { + RCCE_get((t_vcharp)padline, combuf, nbytes, source); +#ifdef COPPERRIDGE + memcpy_scc(bufptr,padline,remainder); +#else + memcpy(bufptr,padline,remainder); +#endif + } + + if(!mcast) { + // tell the source I have moved data out of its comm buffer + RCCE_flag_write(ready, RCCE_FLAG_SET, source); + } + else { + RCCE_TNS_barrier(&RCCE_COMM_WORLD); + } + +#endif // !USE_REMOTE_PUT_LOCAL_GET + + return(RCCE_SUCCESS); +} + + +static int RCCE_push_recv_request(RCCE_RECV_REQUEST *request) { + + char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size + int test; // flag for calling RCCE_test_flag() + + if(request->finished) return(RCCE_SUCCESS); + + if(request->label == 1) goto label1; + if(request->label == 2) goto label2; + if(request->label == 3) goto label3; + if(request->label == 4) goto label4; + + if(request->probe) { +#ifdef USE_TAGGED_FLAGS + RCCE_test_tagged(*(request->probe), RCCE_FLAG_SET, &test, request->tag, request->len); +#else + RCCE_test_flag(*(request->probe), RCCE_FLAG_SET, &test); +#endif + if(!test) { + request->label = 0; + return(RCCE_PENDING); + } + RCCE_flag_write(request->probe, RCCE_FLAG_UNSET, RCCE_IAM); + } + +#ifdef USE_SYNCH_FOR_ZERO_BYTE + // synchronize even in case of zero byte messages: + if(request->size == 0) { +#ifdef USE_REMOTE_PUT_LOCAL_GET + RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source); + label1: +#ifdef USE_TAGGED_FLAGS + if(!request->probe) + RCCE_test_tagged(*(request->sent), RCCE_FLAG_SET, &test, request->tag, request->len); + else +#endif + RCCE_test_flag(*(request->sent), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 1; + return(RCCE_PENDING); + } + RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM); +#else // LOCAL PUT / REMOTE GET: (standard) + label1: +#ifdef USE_TAGGED_FLAGS + if(!request->probe) + RCCE_test_tagged(*(request->sent), RCCE_FLAG_SET, &test, request->tag, request->len); + else +#endif + RCCE_test_flag(*(request->sent), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 1; + return(RCCE_PENDING); + } + RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM); + RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source); +#endif // !USE_REMOTE_PUT_LOCAL_GET + request->finished = 1; + return(RCCE_SUCCESS); + } +#endif // USE_SYNCH_FOR_ZERO_BYTE + + + // receive data in units of available chunk size of MPB + for (; request->wsize < (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) { + request->bufptr = request->privbuf + request->wsize; + request->nbytes = request->chunk; + +#ifdef USE_REMOTE_PUT_LOCAL_GET + + // tell the source I am ready to receive + RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source); + + label2: +#ifdef USE_TAGGED_FLAGS + if( (request->wsize == 0) && (!request->probe) ) + RCCE_test_tagged(*(request->sent), RCCE_FLAG_SET, &test, request->tag, request->len); + else +#endif + RCCE_test_flag(*(request->sent), RCCE_FLAG_SET, &test); + + if(!test) { + request->label = 2; + return(RCCE_PENDING); + } + RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM); + + // copy data from local MPB space to private memory + if(request->copy) RCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, RCCE_IAM); + +#else // LOCAL PUT / REMOTE GET: (standard) + + label2: +#ifdef USE_TAGGED_FLAGS + if( (request->wsize == 0) && (!request->probe) ) + RCCE_test_tagged(*(request->sent), RCCE_FLAG_SET, &test, request->tag, request->len); + else +#endif + RCCE_test_flag(*(request->sent), RCCE_FLAG_SET, &test); + + if(!test) { + request->label = 2; + return(RCCE_PENDING); + } + RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM); + + // copy data from remote MPB space to private memory + if(request->copy) RCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, request->source); + + // tell the source I have moved data out of its comm buffer + RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source); + +#endif // !USE_REMOTE_PUT_LOCAL_GET + + } // for + + request->remainder = request->size % request->chunk; + // if nothing is left over, we are done + if (!request->remainder) { + request->finished = 1; + return(RCCE_SUCCESS); + } + + // receive remainder of data--whole cache lines + request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk; + request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE; + + if (request->nbytes) { + +#ifdef USE_REMOTE_PUT_LOCAL_GET + + // tell the source I am ready to receive + RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source); + + label3: +#ifdef USE_TAGGED_FLAGS + if( (request->wsize == 0) && (!request->probe) ) + RCCE_test_tagged(*(request->sent), RCCE_FLAG_SET, &test, request->tag, request->len); + else +#endif + RCCE_test_flag(*(request->sent), RCCE_FLAG_SET, &test); + + if(!test) { + request->label = 3; + return(RCCE_PENDING); + } + RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM); + + // copy data from local MPB space to private memory + if(request->copy) RCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, RCCE_IAM); + +#else // LOCAL PUT / REMOTE GET: (standard) + + label3: +#ifdef USE_TAGGED_FLAGS + if( (request->wsize == 0) && (!request->probe) ) + RCCE_test_tagged(*(request->sent), RCCE_FLAG_SET, &test, request->tag, request->len); + else +#endif + RCCE_test_flag(*(request->sent), RCCE_FLAG_SET, &test); + + if(!test) { + request->label = 3; + return(RCCE_PENDING); + } + + RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM); + + // copy data from remote MPB space to private memory + if(request->copy) RCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, request->source); + + // tell the source I have moved data out of its comm buffer + RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source); + +#endif // !USE_REMOTE_PUT_LOCAL_GET + + } // if(request->nbytes) + + request->remainder = request->size % request->chunk; + request->remainder = request->remainder % RCCE_LINE_SIZE; + + if (!request->remainder) { + request->finished = 1; + return(RCCE_SUCCESS); + } + + // remainder is less than cache line. This must be copied into appropriately sized + // intermediate space before exact number of bytes get copied to the final destination + request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes; + request->nbytes = RCCE_LINE_SIZE; + +#ifdef USE_REMOTE_PUT_LOCAL_GET + + // tell the source I am ready to receive + RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source); + +label4: +#ifdef USE_TAGGED_FLAGS +#ifdef USE_PROBE_FLAGS_SHORTCUT + if(request->privbuf == NULL) + { + request->finished = 1; + return(RCCE_SUCCESS); + } +#endif + if( (request->wsize == 0) && (!request->probe) ) + RCCE_test_tagged(*(request->sent), RCCE_FLAG_SET, &test, request->tag, request->len); + else +#endif + RCCE_test_flag(*(request->sent), RCCE_FLAG_SET, &test); + + if(!test) { + request->label = 4; + return(RCCE_PENDING); + } + RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM); + + // copy data from local MPB space to private memory + if(request->copy) { + RCCE_get((t_vcharp)padline, request->combuf, request->nbytes, RCCE_IAM); +#ifdef COPPERRIDGE + memcpy_scc(request->bufptr,padline,request->remainder); +#else + memcpy(request->bufptr,padline,request->remainder); +#endif + } + +#else // LOCAL PUT / REMOTE GET: (standard) + + label4: +#ifdef USE_TAGGED_FLAGS + if( (request->wsize == 0) && (!request->probe) ) + RCCE_test_tagged(*(request->sent), RCCE_FLAG_SET, &test, request->tag, request->len); + else +#endif + RCCE_test_flag(*(request->sent), RCCE_FLAG_SET, &test); + + if(!test) { + request->label = 4; + return(RCCE_PENDING); + } + RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM); + + // copy data from remote MPB space to private memory + if(request->copy) { + RCCE_get((t_vcharp)padline, request->combuf, request->nbytes, request->source); +#ifdef COPPERRIDGE + memcpy_scc(request->bufptr,padline,request->remainder); +#else + memcpy(request->bufptr,padline,request->remainder); +#endif + } + + // tell the source I have moved data out of its comm buffer + RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source); + +#endif // !USE_REMOTE_PUT_LOCAL_GET + + request->finished = 1; + return(RCCE_SUCCESS); +} + +static void RCCE_init_recv_request( + char *privbuf, // source buffer in local private memory (send buffer) + t_vcharp combuf, // intermediate buffer in MPB + size_t chunk, // size of MPB available for this message (bytes) + RCCE_FLAG *ready, // flag indicating whether receiver is ready + RCCE_FLAG *sent, // flag indicating whether message has been sent by source + size_t size, // size of message (bytes) + int source, // UE that will send the message + int copy, // set to 0 for cancel function + void* tag, // additional tag? + int len, // length of additional tag + RCCE_FLAG *probe, // flag for probing for incoming messages + RCCE_RECV_REQUEST *request + ) { + + request->privbuf = privbuf; + request->combuf = combuf; + request->chunk = chunk; + request->ready = ready; + request->sent = sent; + request->size = size; + request->source = source; + + request->copy = copy; + request->tag = tag; + request->len = len; + request->probe = probe; + + request->wsize = 0; + request->remainder = 0; + request->nbytes = 0; + request->bufptr = NULL; + + request->label = 0; + request->finished = 0; + + request->next = NULL; + + return; +} + +#ifndef GORY +// this is the LfBS-customized message passing API + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_recv +//-------------------------------------------------------------------------------------- +// recv function for simplified API; use library-maintained variables for synchronization +// and set the test variable to 0 (ignore) +//-------------------------------------------------------------------------------------- +int RCCE_recv(char *privbuf, size_t size, int source) { + int ignore; + +#ifdef USE_PROBE_FLAGS + RCCE_FLAG* probe = &RCCE_probe_flag[source]; +#else + RCCE_FLAG* probe = 0; +#endif + +#ifndef USE_REMOTE_PUT_LOCAL_GET + if(RCCE_recv_queue[source] != NULL) +#else + if(RCCE_recv_queue != NULL) +#endif + return(RCCE_REJECTED); + + ignore = 0; +#ifdef USE_TAGGED_FOR_SHORT + if(size <= (RCCE_LINE_SIZE - sizeof(int))) + { +#ifdef USE_PROBE_FLAGS + RCCE_wait_tagged(*probe, RCCE_FLAG_SET, privbuf, size); + RCCE_flag_write(probe, RCCE_FLAG_UNSET, RCCE_IAM); +#endif + +#ifdef USE_REMOTE_PUT_LOCAL_GET + + RCCE_flag_write(&RCCE_ready_flag[RCCE_IAM], RCCE_FLAG_SET, source); + +#ifndef USE_PROBE_FLAGS_SHORTCUT +#ifdef USE_PROBE_FLAGS + RCCE_wait_until(RCCE_sent_flag[source], RCCE_FLAG_SET); +#else + RCCE_wait_tagged(RCCE_sent_flag[source], RCCE_FLAG_SET, privbuf, size); +#endif + RCCE_flag_write(&RCCE_sent_flag[source], RCCE_FLAG_UNSET, RCCE_IAM); +#endif + +#else // LOCAL PUT / REMOTE GET: (standard) + +#ifdef USE_PROBE_FLAGS + RCCE_wait_until(RCCE_sent_flag[source], RCCE_FLAG_SET); +#else + RCCE_wait_tagged(RCCE_sent_flag[source], RCCE_FLAG_SET, privbuf, size); +#endif + RCCE_flag_write(&RCCE_sent_flag[source], RCCE_FLAG_UNSET, RCCE_IAM); + + RCCE_flag_write(&RCCE_ready_flag[RCCE_IAM], RCCE_FLAG_SET, source); + +#endif // !USE_REMOTE_PUT_LOCAL_GET + + return(RCCE_SUCCESS); + } + else +#endif + return(RCCE_recv_general(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source], + size, source, &ignore, + 1, 0, 0, // copy, pipe, mcast + NULL, 0, probe)); // tag, len, probe +} + +int RCCE_recv_tagged(char *privbuf, size_t size, int source, void* tag, int len) { + int ignore; + +#ifdef USE_PROBE_FLAGS + RCCE_FLAG* probe = &RCCE_probe_flag[source]; +#else + RCCE_FLAG* probe = 0; +#endif + +#ifndef USE_REMOTE_PUT_LOCAL_GET + if(RCCE_recv_queue[source] != NULL) +#else + if(RCCE_recv_queue != NULL) +#endif + return(RCCE_REJECTED); + + ignore = 0; +#ifdef USE_TAGGED_FLAGS + return(RCCE_recv_general(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source], + size, source, &ignore, + 1, 0, 0, // copy, pipe, mcast + tag, len, probe)); // tag, len, probe +#else + RCCE_recv_general(tag, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source], + len, source, &ignore, + 1, 0, 0, // copy, pipe, mcast + NULL, 0, probe); // tag, len, probe + + return(RCCE_recv_general(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source], + size, source, &ignore, + 1, 0, 0, // copy, pipe, mcast + NULL, 0, probe)); // tag, len, probe +#endif +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_recv_pipe +//-------------------------------------------------------------------------------------- +// recv function for simplified API; use library-maintained variables for synchronization +// and set the test variable to 0 (ignore) +//-------------------------------------------------------------------------------------- +int RCCE_recv_pipe(char *privbuf, size_t size, int source) { + int ignore; + +#ifdef USE_PROBE_FLAGS + RCCE_FLAG* probe = &RCCE_probe_flag[source]; +#else + RCCE_FLAG* probe = 0; +#endif + +#ifndef USE_REMOTE_PUT_LOCAL_GET + if(RCCE_recv_queue[source] != NULL) +#else + if(RCCE_recv_queue != NULL) +#endif + return(RCCE_REJECTED); + + ignore = 0; + +#ifdef USE_PIPELINE_FLAGS + return(RCCE_recv_general(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag_pipe[RCCE_IAM], &RCCE_sent_flag_pipe[source], + size, source, &ignore, + 1, 1, 0, // copy, pipe, mcast + NULL, 0, probe)); // tag, len, probe +#else + return(RCCE_recv_general(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source], + size, source, &ignore, + 1, 1, 0, // copy, pipe, mcast + NULL, 0, probe)); // tag, len, probe +#endif +} + +int RCCE_recv_mcast(char *privbuf, size_t size, int source) { + int ignore; + +#ifdef USE_PROBE_FLAGS + RCCE_FLAG* probe = &RCCE_probe_flag[source]; +#else + RCCE_FLAG* probe = 0; +#endif + +#ifndef USE_REMOTE_PUT_LOCAL_GET + if(RCCE_recv_queue[source] != NULL) +#else + if(RCCE_recv_queue != NULL) +#endif + return(RCCE_REJECTED); + + ignore = 0; + return(RCCE_recv_general(privbuf, RCCE_buff_ptr, RCCE_chunk, + NULL, NULL, + size, source, &ignore, + 1, 0, 1, // copy, pipe, mcast + NULL, 0, probe)); // tag, len, probe +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_recv_cancel +//-------------------------------------------------------------------------------------- +// recv function without copying the message into the recv buffer +//-------------------------------------------------------------------------------------- +int RCCE_recv_cancel(size_t size, int source) { + int ignore; + +#ifdef USE_PROBE_FLAGS + RCCE_FLAG* probe = &RCCE_probe_flag[source]; +#else + RCCE_FLAG* probe = 0; +#endif + +#ifndef USE_REMOTE_PUT_LOCAL_GET + if(RCCE_recv_queue[source] != NULL) +#else + if(RCCE_recv_queue != NULL) +#endif + return(RCCE_REJECTED); + + ignore = 0; +#ifdef USE_TAGGED_FOR_SHORT + if(size <= (RCCE_LINE_SIZE - sizeof(int))) + { +#ifdef USE_PROBE_FLAGS + RCCE_wait_until(*probe, RCCE_FLAG_SET); + RCCE_flag_write(probe, RCCE_FLAG_UNSET, RCCE_IAM); +#endif + +#ifdef USE_REMOTE_PUT_LOCAL_GET + + RCCE_flag_write(&RCCE_ready_flag[RCCE_IAM], RCCE_FLAG_SET, source); +#ifndef USE_PROBE_FLAGS_SHORTCUT + RCCE_wait_until(RCCE_sent_flag[source], RCCE_FLAG_SET); + RCCE_flag_write(&RCCE_sent_flag[source], RCCE_FLAG_UNSET, RCCE_IAM); +#endif + +#else // LOCAL PUT / REMOTE GET: (standard) + + RCCE_wait_until(RCCE_sent_flag[source], RCCE_FLAG_SET); + RCCE_flag_write(&RCCE_sent_flag[source], RCCE_FLAG_UNSET, RCCE_IAM); + RCCE_flag_write(&RCCE_ready_flag[RCCE_IAM], RCCE_FLAG_SET, source); + +#endif // !USE_REMOTE_PUT_LOCAL_GET + + return(RCCE_SUCCESS); + } + else +#endif + return(RCCE_recv_general(NULL, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source], + size, source, &ignore, + 0, 0, 0, // copy, pipe, mcast + NULL, 0, probe)); // tag, len, probe +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_recv_test +//-------------------------------------------------------------------------------------- +// recv_test function for simplified API; use library-maintained variables for +// synchronization and set the test variable to 1 (do test) +//-------------------------------------------------------------------------------------- +int RCCE_recv_test(char *privbuf, size_t size, int source, int *test) { + +#ifdef USE_PROBE_FLAGS + RCCE_FLAG* probe = &RCCE_probe_flag[source]; +#else + RCCE_FLAG* probe = 0; +#endif + +#ifndef USE_REMOTE_PUT_LOCAL_GET + if(RCCE_recv_queue[source] != NULL) { +#else + if(RCCE_recv_queue != NULL) { +#endif + (*test) = 0; + return(RCCE_REJECTED); + } + + + /* make sure the test flag is set, regardless of input value */ + *test = 1; + return(RCCE_recv_general(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source], + size, source, test, + 1, 0, 0, // copy, pipe, mcast + NULL, 0, probe)); // tag, len, probe +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_recv_probe +//-------------------------------------------------------------------------------------- +// probe for a message; just like RCCE_recv_test, but without any receiving +//-------------------------------------------------------------------------------------- +int RCCE_recv_probe(int source, int *test, t_vcharp *combuf) { + +#ifdef USE_PROBE_FLAGS + RCCE_FLAG* flag = &RCCE_probe_flag[source]; +#else + RCCE_FLAG* flag = &RCCE_sent_flag[source]; +#endif + +#ifndef USE_REMOTE_PUT_LOCAL_GET + if(RCCE_recv_queue[source] != NULL) { +#else + if(RCCE_recv_queue != NULL) { +#endif + (*test) = 0; + (*combuf) = NULL; + return(RCCE_REJECTED); + } + + if(test) { + RCCE_test_flag((*flag), RCCE_FLAG_SET, test); +#ifdef USE_REMOTE_PUT_LOCAL_GET + if(combuf && (*test)) (*combuf) = RCCE_buff_ptr; +#else + if(combuf && (*test)) (*combuf) = RCCE_comm_buffer[source]+(RCCE_buff_ptr-RCCE_comm_buffer[RCCE_IAM]); +#endif + } + else { + RCCE_wait_until((*flag), RCCE_FLAG_SET); +#ifdef USE_REMOTE_PUT_LOCAL_GET + if(combuf) (*combuf) = RCCE_buff_ptr; +#else + if(combuf) (*combuf) = RCCE_comm_buffer[source]+(RCCE_buff_ptr-RCCE_comm_buffer[RCCE_IAM]); +#endif + } + +#ifdef USE_PROBE_FLAGS + (*combuf) = NULL; +#endif + + return(RCCE_SUCCESS); +} + +int RCCE_recv_probe_tagged(int source, int *test, t_vcharp *combuf, void* tag, int len) { + +#ifdef USE_PROBE_FLAGS + RCCE_FLAG* flag = &RCCE_probe_flag[source]; +#else + RCCE_FLAG* flag = &RCCE_sent_flag[source]; +#endif + +#ifndef USE_REMOTE_PUT_LOCAL_GET + if(RCCE_recv_queue[source] != NULL) { +#else + if(RCCE_recv_queue != NULL) { +#endif + (*test) = 0; + (*combuf) = NULL; + return(RCCE_REJECTED); + } + +#ifdef USE_TAGGED_FLAGS + if(test) { + RCCE_test_tagged((*flag), RCCE_FLAG_SET, test, tag, len); +#ifdef USE_REMOTE_PUT_LOCAL_GET + if(combuf && (*test)) (*combuf) = RCCE_buff_ptr; +#else + if(combuf && (*test)) (*combuf) = RCCE_comm_buffer[source]+(RCCE_buff_ptr-RCCE_comm_buffer[RCCE_IAM]); +#endif + } + else { + RCCE_wait_tagged((*flag), RCCE_FLAG_SET, tag, len); +#ifdef USE_REMOTE_PUT_LOCAL_GET + if(combuf) (*combuf) = RCCE_buff_ptr; +#else + if(combuf) (*combuf) = RCCE_comm_buffer[source]+(RCCE_buff_ptr-RCCE_comm_buffer[RCCE_IAM]); +#endif + } +#else + if(test) { + RCCE_test_flag((*flag), RCCE_FLAG_SET, test); + } + else { + RCCE_wait_until((*flag), RCCE_FLAG_SET); + } + + if(!test || (test && (*test))) + { + RCCE_recv(tag, len, source); + RCCE_wait_until((*flag), RCCE_FLAG_SET); +#ifdef USE_REMOTE_PUT_LOCAL_GET + if(combuf) (*combuf) = RCCE_buff_ptr; +#else + if(combuf) (*combuf) = RCCE_comm_buffer[source]+(RCCE_buff_ptr-RCCE_comm_buffer[RCCE_IAM]); +#endif + } +#endif + +#ifdef USE_PROBE_FLAGS + (*combuf) = NULL; +#endif + + return(RCCE_SUCCESS); +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_irecv +//-------------------------------------------------------------------------------------- +// non-blocking recv function; returns an handle of type RCCE_RECV_REQUEST +//-------------------------------------------------------------------------------------- +int RCCE_irecv(char *privbuf, size_t size, int source, RCCE_RECV_REQUEST *request) { + +#ifdef USE_PROBE_FLAGS + RCCE_FLAG* probe = &RCCE_probe_flag[source]; +#else + RCCE_FLAG* probe = 0; +#endif + + if(request == NULL){ + RCCE_RECV_REQUEST dummy_request; + RCCE_irecv(privbuf, size, source, &dummy_request); + RCCE_irecv_wait(&dummy_request); + return(RCCE_SUCCESS); + } + +#ifdef USE_TAGGED_FOR_SHORT + if(size <= (RCCE_LINE_SIZE - sizeof(int))) + RCCE_init_recv_request(NULL, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source], + size, source, 0, privbuf, size, probe, request); + else +#endif + RCCE_init_recv_request(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source], + size, source, 1, NULL, 0, probe, request); + +#ifndef USE_REMOTE_PUT_LOCAL_GET + if(RCCE_recv_queue[source] == NULL) { +#else + if(RCCE_recv_queue == NULL) { +#endif + + if(RCCE_push_recv_request(request) == RCCE_SUCCESS) { + return(RCCE_SUCCESS); + } + else { +#ifndef USE_REMOTE_PUT_LOCAL_GET + RCCE_recv_queue[source] = request; +#else + RCCE_recv_queue = request; +#endif + return(RCCE_PENDING); + } + } + else { +#ifndef USE_REMOTE_PUT_LOCAL_GET + if(RCCE_recv_queue[source]->next == NULL) { + RCCE_recv_queue[source]->next = request; + } +#else + if(RCCE_recv_queue->next == NULL) { + RCCE_recv_queue->next = request; + } +#endif + else { +#ifndef USE_REMOTE_PUT_LOCAL_GET + RCCE_RECV_REQUEST *run = RCCE_recv_queue[source]; +#else + RCCE_RECV_REQUEST *run = RCCE_recv_queue; +#endif + while(run->next != NULL) run = run->next; + run->next = request; + } + return(RCCE_RESERVED); + } +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_irecv_test +//-------------------------------------------------------------------------------------- +// test function for completion of the requested non-blocking receive operation +//-------------------------------------------------------------------------------------- +int RCCE_irecv_test(RCCE_RECV_REQUEST *request, int *test) { + + int source = request->source; + + if(request->finished) { + (*test) = 1; + return(RCCE_SUCCESS); + } + +#ifndef USE_REMOTE_PUT_LOCAL_GET + if(RCCE_recv_queue[source] != request) { +#else + if(RCCE_recv_queue != request) { +#endif + (*test) = 0; + return(RCCE_RESERVED); + } + + RCCE_push_recv_request(request); + + if(request->finished) { +#ifndef USE_REMOTE_PUT_LOCAL_GET + RCCE_recv_queue[source] = request->next; +#else + RCCE_recv_queue = request->next; +#endif + + (*test) = 1; + return(RCCE_SUCCESS); + } + + (*test) = 0; + return(RCCE_PENDING); +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_irecv_push +//-------------------------------------------------------------------------------------- +// progress function for pending requests in the irecv queue +//-------------------------------------------------------------------------------------- +int RCCE_irecv_push(int source) { + +#ifndef USE_REMOTE_PUT_LOCAL_GET + RCCE_RECV_REQUEST *request = RCCE_recv_queue[source]; +#else + RCCE_RECV_REQUEST *request = RCCE_recv_queue; +#endif + + if(request == NULL) { + return(RCCE_SUCCESS); + } + + if(request->finished) { + return(RCCE_SUCCESS); + } + + RCCE_push_recv_request(request); + + if(request->finished) { +#ifndef USE_REMOTE_PUT_LOCAL_GET + RCCE_recv_queue[source] = request->next; +#else + RCCE_recv_queue = request->next; +#endif + return(RCCE_SUCCESS); + } + + return(RCCE_PENDING); +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_irecv_wait +//-------------------------------------------------------------------------------------- +// just wait for completion of the requestes non-blocking send operation +//-------------------------------------------------------------------------------------- +int RCCE_irecv_wait(RCCE_RECV_REQUEST *request) { + + int ue; + +#ifndef USE_REMOTE_PUT_LOCAL_GET + while(!request->finished) { + + RCCE_irecv_push(request->source); + + if(!request->finished) { + + RCCE_isend_push(-1); + + for(ue=0; uefinished) { + + RCCE_irecv_push(-1); + + if(!request->finished) { + + for(ue=0; ue (y) ? (x) : (y) ) + +#include +#include + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_reduce_general +//-------------------------------------------------------------------------------------- +// function used to implement both reduce and allreduce +//-------------------------------------------------------------------------------------- +static int RCCE_reduce_general( + char *inbuf, // source buffer for reduction datan + char *outbuf, // target buffer for reduction data + int num, // number of data elements to be reduced + int type, // type of data elements + int op, // reduction operation + int root, // root of reduction tree, used for all reductions + int all, // if 1, use allreduce, if 0, use reduce + RCCE_COMM comm // communication domain within which to reduce + ) { + + int ue, i, type_size, ierr; + int *iin, *iout; + long *lin, *lout; + float *fin, *fout; + double *din, *dout; + // create aliases for source and target buffers to simplify arithmetic operations + iin = (int *) inbuf; iout = (int *) outbuf; + lin = (long *) inbuf; lout = (long *) outbuf; + fin = (float *) inbuf; fout = (float *) outbuf; + din = (double *) inbuf; dout = (double *) outbuf; + +#ifdef GORY + printf("Reduction only implemented for non-gory API\n"); + return(1); +#else + switch (op) { + case RCCE_SUM: + case RCCE_MAX: + case RCCE_MIN: + case RCCE_PROD: break; + default: return(RCCE_ERROR_ILLEGAL_OP); + } + + switch (type) { + case RCCE_INT: type_size = sizeof(int); + break; + case RCCE_LONG: type_size = sizeof(long); + break; + case RCCE_FLOAT: type_size = sizeof(float); + break; + case RCCE_DOUBLE: type_size = sizeof(double); + break; + default: return(RCCE_ERROR_ILLEGAL_TYPE); + } + + if (RCCE_IAM != comm.member[root]) { + // non-root UEs send their source buffers to the root + if (ierr=RCCE_send(inbuf, num*type_size, comm.member[root])) + return(ierr); + // in case of allreduce they also receive the reduced buffer + if (all) if (ierr=RCCE_recv(outbuf, num*type_size, comm.member[root])) + return(ierr); + } + else { + // the root can copy directly from source to target buffer + memcpy(outbuf, inbuf, num*type_size); + for (ue=0; ue= comm.size) + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID)); + + return(RCCE_error_return(RCCE_debug_comm, + RCCE_reduce_general(inbuf, outbuf, num, type, op, root, all, comm))); +} + diff --git a/hermit/usr/ircce/RCCE_send.c b/hermit/usr/ircce/RCCE_send.c new file mode 100644 index 000000000..5d54a6a65 --- /dev/null +++ b/hermit/usr/ircce/RCCE_send.c @@ -0,0 +1,992 @@ +//*************************************************************************************** +// Synchronized receive routines. +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-10-25] added support for non-blocking send/recv operations +// - RCCE_isend(), ..._test(), ..._wait(), ..._push() +// - RCCE_irecv(), ..._test(), ..._wait(), ..._push() +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +// [2012-09-10] added support for "tagged" flags +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +#include "RCCE_lib.h" +#if defined(COPPERRIDGE) && !defined(__hermit__) +#include "scc_memcpy.h" +#else +#define memcpy_scc memcpy +#endif + +#include +#include + + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_send_general +//-------------------------------------------------------------------------------------- +// Synchronized send function (gory and non-gory mode) +//-------------------------------------------------------------------------------------- +static int RCCE_send_general( + char *privbuf, // source buffer in local private memory (send buffer) + t_vcharp combuf, // intermediate buffer in MPB + size_t chunk, // size of MPB available for this message (bytes) + RCCE_FLAG *ready, // flag indicating whether receiver is ready + RCCE_FLAG *sent, // flag indicating whether message has been sent by source + size_t size, // size of message (bytes) + int dest, // UE that will receive the message + int copy, // set to 0 for synchronization only (no copying/sending) + int pipe, // use pipelining? + int mcast, // multicast? + void* tag, // additional tag? + int len, // length of additional tag + RCCE_FLAG *probe // flag for probing for incoming messages + ) { + + char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size + size_t wsize, // offset within send buffer when putting in "chunk" bytes + remainder, // bytes remaining to be sent + nbytes; // number of bytes to be sent in single RCCE_put call + char *bufptr; // running pointer inside privbuf for current location + +#ifdef USE_REMOTE_PUT_LOCAL_GET + if(mcast) return(RCCE_error_return(1, RCCE_ERROR_NO_MULTICAST_SUPPORT)); +#endif + + if(probe) +#ifdef USE_TAGGED_FLAGS + RCCE_flag_write_tagged(probe, RCCE_FLAG_SET, dest, tag, len); +#else + RCCE_flag_write(probe, RCCE_FLAG_SET, dest); +#endif + +#ifdef USE_SYNCH_FOR_ZERO_BYTE + // synchronize even in case of zero byte messages: + if(size == 0) { +#ifdef USE_REMOTE_PUT_LOCAL_GET + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); +#ifdef USE_TAGGED_FLAGS + if(!probe) + RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); + else +#endif + RCCE_flag_write(sent, RCCE_FLAG_SET, dest); +#else // LOCAL PUT / REMOTE GET: (standard) +#ifdef USE_TAGGED_FLAGS + if(!probe) + RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); + else +#endif + RCCE_flag_write(sent, RCCE_FLAG_SET, dest); + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); +#endif // !USE_REMOTE_PUT_LOCAL_GET + return(RCCE_SUCCESS); + } +#endif // USE_SYNCH_FOR_ZERO_BYTE + + if(!pipe) { + // send data in units of available chunk size of comm buffer + for (wsize=0; wsize< (size/chunk)*chunk; wsize+=chunk) { + bufptr = privbuf + wsize; + nbytes = chunk; + +#ifdef USE_REMOTE_PUT_LOCAL_GET + + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); + + // copy private data to remote comm buffer + if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, dest); + +#ifdef USE_TAGGED_FLAGS + if( (wsize == 0) && (!probe) ) + RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); + else +#endif + RCCE_flag_write(sent, RCCE_FLAG_SET, dest); + +#else // LOCAL PUT / REMOTE GET: (standard) + + // copy private data to own comm buffer + if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, RCCE_IAM); + + if(!mcast) { +#ifdef USE_TAGGED_FLAGS + if( (wsize == 0) && (!probe) ) + RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); + else +#endif + RCCE_flag_write(sent, RCCE_FLAG_SET, dest); + + // wait for the destination to be ready to receive a message + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); + } + else { + RCCE_TNS_barrier(&RCCE_COMM_WORLD); + RCCE_TNS_barrier(&RCCE_COMM_WORLD); + } +#endif // !USE_REMOTE_PUT_LOCAL_GET + + } // for + } + else // if(!pipe) -> if(pipe) + { + // pipelined version of send/recv: + size_t subchunk1, subchunk2; + + for(wsize = 0; wsize < (size/chunk)*chunk; wsize+=chunk) { + + if(wsize == 0) { + // allign sub-chunks to cache line granularity: + subchunk1 = ( (chunk / 2) / RCCE_LINE_SIZE ) * RCCE_LINE_SIZE; + subchunk2 = chunk - subchunk1; + } + + bufptr = privbuf + wsize; + nbytes = subchunk1; + +#ifdef USE_REMOTE_PUT_LOCAL_GET + + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); + + // copy private data chunk 1 to remote comm buffer + if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, dest); + +#ifdef USE_TAGGED_FLAGS + if( (wsize == 0) && (!probe) ) + RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); + else +#endif + RCCE_flag_write(sent, RCCE_FLAG_SET, dest); + +#else // LOCAL PUT / REMOTE GET: (standard) + + // copy private data chunk 1 to own comm buffer + if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, RCCE_IAM); + +#ifdef USE_TAGGED_FLAGS + if( (wsize == 0) && (!probe) ) + RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); + else +#endif + RCCE_flag_write(sent, RCCE_FLAG_SET, dest); + + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); + +#endif // !USE_REMOTE_PUT_LOCAL_GET + + bufptr = privbuf + wsize + subchunk1; + nbytes = subchunk2; + +#ifdef USE_REMOTE_PUT_LOCAL_GET + + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); + + // copy private data chunk 2 to remote comm buffer + if(copy) RCCE_put(combuf + subchunk1, (t_vcharp) bufptr, nbytes, dest); + + RCCE_flag_write(sent, RCCE_FLAG_SET, dest); + +#else // LOCAL PUT / REMOTE GET: (standard) + + // copy private data chunk 2 to own comm buffer + if(copy) RCCE_put(combuf + subchunk1, (t_vcharp) bufptr, nbytes, RCCE_IAM); + + RCCE_flag_write(sent, RCCE_FLAG_SET, dest); + + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); + +#endif // !USE_REMOTE_PUT_LOCAL_GET + + } //for + + } // if(pipe) + + remainder = size%chunk; + // if nothing is left over, we are done + if (!remainder) return(RCCE_SUCCESS); + + // send remainder of data--whole cache lines + bufptr = privbuf + (size/chunk)*chunk; + nbytes = remainder - remainder%RCCE_LINE_SIZE; + + if (nbytes) { + +#ifdef USE_REMOTE_PUT_LOCAL_GET + + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); + + // copy private data to remote comm buffer + if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, dest); + +#ifdef USE_TAGGED_FLAGS + if( (wsize == 0) && (!probe) ) + RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); + else +#endif + RCCE_flag_write(sent, RCCE_FLAG_SET, dest); + +#else // LOCAL PUT / REMOTE GET: (standard) + + // copy private data to own comm buffer + if(copy) RCCE_put(combuf, (t_vcharp)bufptr, nbytes, RCCE_IAM); + + if(!mcast) { +#ifdef USE_TAGGED_FLAGS + if( (wsize == 0) && (!probe) ) + RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); + else +#endif + RCCE_flag_write(sent, RCCE_FLAG_SET, dest); + + // wait for the destination to be ready to receive a message + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); + } + else { + RCCE_TNS_barrier(&RCCE_COMM_WORLD); + RCCE_TNS_barrier(&RCCE_COMM_WORLD); + } +#endif // !USE_REMOTE_PUT_LOCAL_GET + + } // if(nbytes) + + remainder = remainder%RCCE_LINE_SIZE; + if (!remainder) return(RCCE_SUCCESS); + + // remainder is less than a cache line. This must be copied into appropriately sized + // intermediate space before it can be sent to the receiver + bufptr = privbuf + (size/chunk)*chunk + nbytes; + nbytes = RCCE_LINE_SIZE; + + if(copy) { +#ifdef COPPERRIDGE + memcpy_scc(padline,bufptr,remainder); +#else + memcpy(padline,bufptr,remainder); +#endif + } + +#ifdef USE_REMOTE_PUT_LOCAL_GET + + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); + + // copy private data to remote comm buffer + if(copy) RCCE_put(combuf, (t_vcharp) padline, nbytes, dest); + +#ifdef USE_TAGGED_FLAGS + if( (wsize == 0) && (!probe) ) + RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); + else +#endif + RCCE_flag_write(sent, RCCE_FLAG_SET, dest); + +#else // LOCAL PUT / REMOTE GET: (standard) + + // copy private data to own comm buffer + if(copy) RCCE_put(combuf, (t_vcharp)padline, nbytes, RCCE_IAM); + + if(!mcast) { +#ifdef USE_TAGGED_FLAGS + if( (wsize == 0) && (!probe) ) + RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len); + else +#endif + RCCE_flag_write(sent, RCCE_FLAG_SET, dest); + + // wait for the destination to be ready to receive a message + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); + } + else { + RCCE_TNS_barrier(&RCCE_COMM_WORLD); + RCCE_TNS_barrier(&RCCE_COMM_WORLD); + } + +#endif // !USE_REMOTE_PUT_LOCAL_GET + + return(RCCE_SUCCESS); +} + +static int RCCE_push_send_request(RCCE_SEND_REQUEST *request) { + + char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size + int test; // flag for calling RCCE_test_flag() + + if(request->finished) return(RCCE_SUCCESS); + + if(request->label == 1) goto label1; + if(request->label == 2) goto label2; + if(request->label == 3) goto label3; + if(request->label == 4) goto label4; + + if(request->probe) +#ifdef USE_TAGGED_FLAGS + RCCE_flag_write_tagged(request->probe, RCCE_FLAG_SET, request->dest, request->tag, request->len); +#else + RCCE_flag_write(request->probe, RCCE_FLAG_SET, request->dest); +#endif + +#ifdef USE_SYNCH_FOR_ZERO_BYTE + // synchronize even in case of zero byte messages: + if(request->size == 0) { +#ifdef USE_REMOTE_PUT_LOCAL_GET + label1: + RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 1; + return(RCCE_PENDING); + } + RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); +#ifdef USE_TAGGED_FLAGS + if(!request->probe) + RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len); + else +#endif + RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest); +#else // LOCAL PUT / REMOTE GET: (standard) +#ifdef USE_TAGGED_FLAGS + if(!request->probe) + RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len); + else +#endif + RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest); + label1: + RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 1; + return(RCCE_PENDING); + } + RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); +#endif // !USE_REMOTE_PUT_LOCAL_GET + request->finished = 1; + return(RCCE_SUCCESS); + } +#endif // USE_SYNCH_FOR_ZERO_BYTE + + // send data in units of available chunk size of comm buffer + for (; request->wsize < (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) { + request->bufptr = request->privbuf + request->wsize; + request->nbytes = request->chunk; + +#ifdef USE_REMOTE_PUT_LOCAL_GET + + // wait for the destination to be ready to receive a message + label2: + RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 2; + return(RCCE_PENDING); + } + RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); + + // copy private data to remote comm buffer + if(request->copy) RCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, request->dest); + +#ifdef USE_TAGGED_FLAGS + if( (request->wsize == 0) && (!request->probe) ) + RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len); + else +#endif + RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest); + +#else // LOCAL PUT / REMOTE GET: (standard) + + // copy private data to own comm buffer + if(request->copy) RCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, RCCE_IAM); + +#ifdef USE_TAGGED_FLAGS + if( (request->wsize == 0) && (!request->probe) ) + RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len); + else +#endif + RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest); + + // wait for the destination to be ready to receive a message + label2: + RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 2; + return(RCCE_PENDING); + } + RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); + +#endif // !USE_REMOTE_PUT_LOCAL_GET + + } // for + + request->remainder = request->size % request->chunk; + // if nothing is left over, we are done + if (!request->remainder) { + request->finished = 1; + return(RCCE_SUCCESS); + } + + // send remainder of data--whole cache lines + request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk; + request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE; + + if (request->nbytes) { + +#ifdef USE_REMOTE_PUT_LOCAL_GET + + // wait for the destination to be ready to receive a message + label3: + RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 3; + return(RCCE_PENDING); + } + RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); + + // copy private data to remote comm buffer + if(request->copy) RCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, request->dest); + +#ifdef USE_TAGGED_FLAGS + if( (request->wsize == 0) && (!request->probe) ) + RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len); + else +#endif + RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest); + +#else // LOCAL PUT / REMOTE GET: (standard) + + // copy private data to own comm buffer + if(request->copy) RCCE_put(request->combuf, (t_vcharp)request->bufptr, request->nbytes, RCCE_IAM); + +#ifdef USE_TAGGED_FLAGS + if( (request->wsize == 0) && (!request->probe) ) + RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len); + else +#endif + RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest); + + // wait for the destination to be ready to receive a message + label3: + RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 3; + return(RCCE_PENDING); + } + RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); + +#endif // !USE_REMOTE_PUT_LOCAL_GET + + } // if(request->nbytes) + + request->remainder = request->size % request->chunk; + request->remainder = request->remainder%RCCE_LINE_SIZE; + + // if nothing is left over, we are done + if (!request->remainder) + { + request->finished = 1; + return(RCCE_SUCCESS); + } + + // remainder is less than a cache line. This must be copied into appropriately sized + // intermediate space before it can be sent to the receiver + request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes; + request->nbytes = RCCE_LINE_SIZE; + +#ifdef USE_REMOTE_PUT_LOCAL_GET + + // wait for the destination to be ready to receive a message + label4: + RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 4; + return(RCCE_PENDING); + } + RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); + + // copy private data to remote comm buffer + if(request->copy) { +#ifdef COPPERRIDGE + memcpy_scc(padline,request->bufptr,request->remainder); +#else + memcpy(padline,request->bufptr,request->remainder); +#endif + RCCE_put(request->combuf, (t_vcharp) padline, request->nbytes, request->dest); + } + +#ifdef USE_TAGGED_FLAGS +#ifdef USE_PROBE_FLAGS_SHORTCUT + if(request->privbuf == NULL) + { + request->finished = 1; + return(RCCE_SUCCESS); + } +#endif + if( (request->wsize == 0) && (!request->probe) ) + RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len); + else +#endif + RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest); + +#else // LOCAL PUT / REMOTE GET: (standard) + + // copy private data to own comm buffer + if(request->copy) { +#ifdef COPPERRIDGE + memcpy_scc(padline,request->bufptr,request->remainder); +#else + memcpy(padline,request->bufptr,request->remainder); +#endif + RCCE_put(request->combuf, (t_vcharp)padline, request->nbytes, RCCE_IAM); + } + +#ifdef USE_TAGGED_FLAGS + if( (request->wsize == 0) && (!request->probe) ) + RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len); + else +#endif + RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest); + + // wait for the destination to be ready to receive a message + label4: + RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 4; + return(RCCE_PENDING); + } + RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); + +#endif // !USE_REMOTE_PUT_LOCAL_GET + + request->finished = 1; + return(RCCE_SUCCESS); +} + +static void RCCE_init_send_request( + char *privbuf, // source buffer in local private memory (send buffer) + t_vcharp combuf, // intermediate buffer in MPB + size_t chunk, // size of MPB available for this message (bytes) + RCCE_FLAG *ready, // flag indicating whether receiver is ready + RCCE_FLAG *sent, // flag indicating whether message has been sent by source + size_t size, // size of message (bytes) + int dest, // UE that will receive the message + int copy, // set to 0 for synchronization only (no copying/sending) + void* tag, // additional tag? + int len, // length of additional tag + RCCE_FLAG *probe, // flag for probing for incoming messages + RCCE_SEND_REQUEST *request + ) { + + request->privbuf = privbuf; + request->combuf = combuf; + request->chunk = chunk; + request->ready = ready; + request->sent = sent; + request->size = size; + request->dest = dest; + + request->copy = copy; + request->tag = tag; + request->len = len; + request->probe = probe; + + request->wsize = 0; + request->remainder = 0; + request->nbytes = 0; + request->bufptr = NULL; + + request->label = 0; + + request->finished = 0; + + request->next = NULL; + + return; +} + +#ifndef GORY +// this is the LfBS-customized synchronized message passing API + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_send +//-------------------------------------------------------------------------------------- +// send function for simplified API; use library-maintained variables for synchronization +//-------------------------------------------------------------------------------------- +int RCCE_send(char *privbuf, size_t size, int dest) { + +#ifdef USE_PROBE_FLAGS + RCCE_FLAG* probe = &RCCE_probe_flag[RCCE_IAM]; +#else + RCCE_FLAG* probe = NULL; +#endif + +#ifndef USE_REMOTE_PUT_LOCAL_GET + if(RCCE_send_queue != NULL) +#else + if(RCCE_send_queue[dest] != NULL) +#endif + return(RCCE_REJECTED); + +#ifdef USE_TAGGED_FOR_SHORT + if(size <= (RCCE_LINE_SIZE - sizeof(int))) + { +#ifdef USE_PROBE_FLAGS + RCCE_flag_write_tagged(probe, RCCE_FLAG_SET, dest, privbuf, size); +#endif + +#ifdef USE_REMOTE_PUT_LOCAL_GET + + RCCE_wait_until(RCCE_ready_flag[dest], RCCE_FLAG_SET); + RCCE_flag_write(&RCCE_ready_flag[dest], RCCE_FLAG_UNSET, RCCE_IAM); + +#ifndef USE_PROBE_FLAGS_SHORTCUT +#ifdef USE_PROBE_FLAGS + RCCE_flag_write(&RCCE_sent_flag[RCCE_IAM], RCCE_FLAG_SET, dest); +#else + RCCE_flag_write_tagged(&RCCE_sent_flag[RCCE_IAM], RCCE_FLAG_SET, dest, privbuf, size); +#endif +#endif + +#else // LOCAL PUT / REMOTE GET: (standard) + +#ifdef USE_PROBE_FLAGS + RCCE_flag_write(&RCCE_sent_flag[RCCE_IAM], RCCE_FLAG_SET, dest); +#else + RCCE_flag_write_tagged(&RCCE_sent_flag[RCCE_IAM], RCCE_FLAG_SET, dest, privbuf, size); +#endif + + RCCE_wait_until(RCCE_ready_flag[dest], RCCE_FLAG_SET); + RCCE_flag_write(&RCCE_ready_flag[dest], RCCE_FLAG_UNSET, RCCE_IAM); + +#endif // !USE_REMOTE_PUT_LOCAL_GET + + return(RCCE_SUCCESS); + } + else +#endif + + return(RCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM], + size, dest, + 1, 0, 0, // copy, pipe, mcast + NULL, 0, probe)); // tag, len +} + +int RCCE_send_tagged(char *privbuf, size_t size, int dest, void* tag, int len) { + +#ifdef USE_PROBE_FLAGS + RCCE_FLAG* probe = &RCCE_probe_flag[RCCE_IAM]; +#else + RCCE_FLAG* probe = NULL; +#endif + +#ifndef USE_REMOTE_PUT_LOCAL_GET + if(RCCE_send_queue != NULL) +#else + if(RCCE_send_queue[dest] != NULL) +#endif + return(RCCE_REJECTED); + +#ifdef USE_TAGGED_FLAGS + return(RCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM], + size, dest, + 1, 0, 0, // copy, pipe, mcast + tag, len, probe)); // tag, len, probe +#else + + RCCE_send_general(tag, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM], + len, dest, + 1, 0, 0, // copy, pipe, mcast + NULL, 0, probe); // tag, len, probe + + return(RCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM], + size, dest, + 1, 0, 0, // copy, pipe, mcast + NULL, 0, NULL)); // tag, len, probe +#endif +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_send_pipe +//-------------------------------------------------------------------------------------- +// send function for simplified API; use library-maintained variables for synchronization +//-------------------------------------------------------------------------------------- +int RCCE_send_pipe(char *privbuf, size_t size, int dest) { + +#ifdef USE_PROBE_FLAGS + RCCE_FLAG* probe = &RCCE_probe_flag[RCCE_IAM]; +#else + RCCE_FLAG* probe = NULL; +#endif + +#ifndef USE_REMOTE_PUT_LOCAL_GET + if(RCCE_send_queue != NULL) +#else + if(RCCE_send_queue[dest] != NULL) +#endif + return(RCCE_REJECTED); + +#ifdef USE_PIPELINE_FLAGS + return(RCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag_pipe[dest], &RCCE_sent_flag_pipe[RCCE_IAM], + size, dest, + 1, 1, 0, // copy, pipe, mcast + NULL, 0, probe)); // tag, len, probe +#else + return(RCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM], + size, dest, + 1, 1, 0, // copy, pipe, mcast + NULL, 0, probe)); // tag, len, probe +#endif +} + +int RCCE_send_mcast(char *privbuf, size_t size) { + +#ifdef USE_PROBE_FLAGS + RCCE_FLAG* probe = &RCCE_probe_flag[RCCE_IAM]; +#else + RCCE_FLAG* probe = NULL; +#endif + +#ifndef USE_REMOTE_PUT_LOCAL_GET + if(RCCE_send_queue != NULL) +#else + if(RCCE_send_queue != NULL) +#endif + return(RCCE_REJECTED); + + return(RCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk, + NULL, NULL, + size, -1, + 1, 0, 1, // copy, pipe, mcast + NULL, 0, probe)); // tag, len +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_isend +//-------------------------------------------------------------------------------------- +// non-blocking send function; returns an handle of type RCCE_SEND_REQUEST +//-------------------------------------------------------------------------------------- +int RCCE_isend(char *privbuf, size_t size, int dest, RCCE_SEND_REQUEST *request) { + +#ifdef USE_PROBE_FLAGS + RCCE_FLAG* probe = &RCCE_probe_flag[RCCE_IAM]; +#else + RCCE_FLAG* probe = NULL; +#endif + +#ifdef USE_TAGGED_FOR_SHORT + if(size <= (RCCE_LINE_SIZE - sizeof(int))) + { + RCCE_init_send_request(NULL, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM], + size, dest, 0, privbuf, size, probe, request); + } + else +#endif + + RCCE_init_send_request(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM], + size, dest, 1, NULL, 0, probe, request); + +#ifndef USE_REMOTE_PUT_LOCAL_GET + if(RCCE_send_queue == NULL) { +#else + if(RCCE_send_queue[dest] == NULL) { +#endif + + if(RCCE_push_send_request(request) == RCCE_SUCCESS) { + return(RCCE_SUCCESS); + } + else { +#ifndef USE_REMOTE_PUT_LOCAL_GET + RCCE_send_queue = request; +#else + RCCE_send_queue[dest] = request; +#endif + return(RCCE_PENDING); + } + } + else { +#ifndef USE_REMOTE_PUT_LOCAL_GET + if(RCCE_send_queue->next == NULL) { + RCCE_send_queue->next = request; + } +#else + if(RCCE_send_queue[dest]->next == NULL) { + RCCE_send_queue[dest]->next = request; + } +#endif + else { +#ifndef USE_REMOTE_PUT_LOCAL_GET + RCCE_SEND_REQUEST *run = RCCE_send_queue; +#else + RCCE_SEND_REQUEST *run = RCCE_send_queue[dest]; +#endif + while(run->next != NULL) run = run->next; + run->next = request; + } + return(RCCE_RESERVED); + } +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_isend_test +//-------------------------------------------------------------------------------------- +// test function for completion of the requestes non-blocking send operation +//-------------------------------------------------------------------------------------- +int RCCE_isend_test(RCCE_SEND_REQUEST *request, int *test) { + + if(request->finished) { + (*test) = 1; + return(RCCE_SUCCESS); + } + +#ifndef USE_REMOTE_PUT_LOCAL_GET + if(RCCE_send_queue != request) { +#else + if(RCCE_send_queue[request->dest] != request) { +#endif + (*test) = 0; + return(RCCE_RESERVED); + } + + RCCE_push_send_request(request); + + if(request->finished) { +#ifndef USE_REMOTE_PUT_LOCAL_GET + RCCE_send_queue = request->next; +#else + RCCE_send_queue[request->dest] = request->next; +#endif + + (*test) = 1; + return(RCCE_SUCCESS); + } + + (*test) = 0; + return(RCCE_PENDING); +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_isend_push +//-------------------------------------------------------------------------------------- +// progress function for pending requests in the isend queue +//-------------------------------------------------------------------------------------- +int RCCE_isend_push(int dest) { + +#ifndef USE_REMOTE_PUT_LOCAL_GET + RCCE_SEND_REQUEST *request = RCCE_send_queue; +#else + RCCE_SEND_REQUEST *request = RCCE_send_queue[dest]; +#endif + + if(request == NULL) { + return(RCCE_SUCCESS); + } + + if(request->finished) { + return(RCCE_SUCCESS); + } + + RCCE_push_send_request(request); + + if(request->finished) { +#ifndef USE_REMOTE_PUT_LOCAL_GET + RCCE_send_queue = request->next; +#else + RCCE_send_queue[request->dest] = request->next; +#endif + return(RCCE_SUCCESS); + } + + return(RCCE_PENDING); +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_isend_wait +//-------------------------------------------------------------------------------------- +// just wait for completion of the requested non-blocking send operation +//-------------------------------------------------------------------------------------- +int RCCE_isend_wait(RCCE_SEND_REQUEST *request) { + + int ue; + +#ifndef USE_REMOTE_PUT_LOCAL_GET + while(!request->finished) { + + RCCE_isend_push(-1); + + if(!request->finished) { + + for(ue=0; uefinished) { + + RCCE_isend_push(request->dest); + + if(!request->finished) { + + RCCE_irecv_push(-1); + + for(ue=0; ue=0 && + cflag+RCCE_LINE_SIZE - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<0){} + else { + return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_IN_COMM_BUFFER)); + } +#endif + + // always flush/invalidate to ensure we read the most recent value of *flag + // keep reading it until it has the required value + do { +#ifdef _OPENMP + #pragma omp flush +#endif + RC_cache_invalidate(); + } + while ((RCCE_bit_value(cflag, flag.location) != val)); + + return(RCCE_SUCCESS); +} + +int RCCE_test_flag(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result) { + t_vcharp cflag; + + cflag = flag.line_address; + +// avoid tests if we use the simplified API +#ifdef GORY + if (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET) + return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED)); + if (!cflag) + return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_ALLOCATED)); + // check to see if flag is properly contained in the local comm buffer + if (cflag - RCCE_comm_buffer[RCCE_IAM]>=0 && + cflag+RCCE_LINE_SIZE - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<0){} + else { + return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_IN_COMM_BUFFER)); + } +#endif + + // always flush/invalidate to ensure we read the most recent value of *flag + // keep reading it until it has the required value + +#ifdef _OPENMP + #pragma omp flush +#endif + RC_cache_invalidate(); + + if(RCCE_bit_value(cflag, flag.location) != val) { + (*result) = 0; + } + else { + (*result) = 1; + } + + return(RCCE_SUCCESS); +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_barrier +//-------------------------------------------------------------------------------------- +// very simple, linear barrier +//-------------------------------------------------------------------------------------- +int RCCE_barrier(RCCE_COMM *comm) { + + int counter, i, error; + int ROOT = 0; + t_vchar cyclechar[RCCE_LINE_SIZE]; + t_vchar valchar [RCCE_LINE_SIZE]; + t_vcharp gatherp, releasep; + RCCE_FLAG_STATUS cycle; + + counter = 0; + gatherp = comm->gather.line_address; + if (RCCE_debug_synch) + fprintf(STDERR,"UE %d has checked into barrier\n", RCCE_IAM); + // flip local barrier variable + if (error = RCCE_get(cyclechar, gatherp, RCCE_LINE_SIZE, RCCE_IAM)) + return(RCCE_error_return(RCCE_debug_synch,error)); + cycle = RCCE_flip_bit_value(cyclechar, comm->gather.location); + if (error = RCCE_put(comm->gather.line_address, cyclechar, RCCE_LINE_SIZE, RCCE_IAM)) + return(RCCE_error_return(RCCE_debug_synch,error)); + + if (RCCE_IAM==comm->member[ROOT]) { + // read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size), + // we know all UEs have reached the barrier + while (counter != comm->size) { + // skip the first member (#0), because that is the ROOT + for (counter=i=1; isize; i++) { + // copy flag values out of comm buffer + if (error = RCCE_get(valchar, comm->gather.line_address, RCCE_LINE_SIZE, + comm->member[i])) + return(RCCE_error_return(RCCE_debug_synch,error)); + if (RCCE_bit_value(valchar, comm->gather.location) == cycle) counter++; + } + } + // set release flags + for (i=1; isize; i++) + if (error = RCCE_flag_write(&(comm->release), cycle, comm->member[i])) + return(RCCE_error_return(RCCE_debug_synch,error)); + } + else { + if (error = RCCE_wait_until(comm->release, cycle)) + return(RCCE_error_return(RCCE_debug_synch,error)); + } + if (RCCE_debug_synch) fprintf(STDERR,"UE %d has cleared barrier\n", RCCE_IAM); + return(RCCE_SUCCESS); +} + +#else + +////////////////////////////////////////////////////////////////// +// LOCKLESS SYNCHRONIZATION USING ONE WHOLE CACHE LINE PER FLAG // +////////////////////////////////////////////////////////////////// + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_wait_until +//-------------------------------------------------------------------------------------- +// wait until flag in local MPB becomes set or unset. To avoid reading stale data from +// the cache instead of new flag value from the MPB, issue MPB cache invalidation before +// each read, including within the spin cycle +//-------------------------------------------------------------------------------------- +int RCCE_wait_until(RCCE_FLAG flag, RCCE_FLAG_STATUS val) { + t_vcharp cflag; + + cflag = (t_vcharp) flag; +#ifdef GORY + if (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET) + return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED)); + if (!cflag) + return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_ALLOCATED)); + // check to see if flag is properly contained in the local comm buffer + if (cflag - RCCE_comm_buffer[RCCE_IAM]>=0 && + cflag+RCCE_LINE_SIZE - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<0){} + else { + return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_IN_COMM_BUFFER)); + } +#endif + +#ifdef USE_REVERTED_FLAGS + flag = flag + RCCE_LINE_SIZE / sizeof(int) - 1; +#endif + + // always flush/invalidate to ensure we read the most recent value of *flag + // keep reading it until it has the required value. We only need to read the + // first int of the MPB cache line containing the flag +#ifndef USE_FLAG_EXPERIMENTAL + do { +#ifdef _OPENMP + #pragma omp flush +#endif + RC_cache_invalidate(); + } while ((*flag) != val); +#else + if (RCCE_debug_synch) + fprintf(STDERR,"UE %d wait flag: %x from address %X \n", RCCE_IAM,val,flag); + flag = RCCE_flag_buffer[RCCE_IAM]+(flag-RCCE_comm_buffer[RCCE_IAM]); + while ((*flag) != val); +#endif + return(RCCE_SUCCESS); +} + +#ifdef USE_TAGGED_FLAGS +int RCCE_wait_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS val, void *tag, int len) { + + int i, j; + RCCE_FLAG flag_pos; + +#ifndef USE_REVERTED_FLAGS + flag_pos = flag; +#else + flag_pos = flag + RCCE_LINE_SIZE / sizeof(int) - 1; +#endif + + do { +#ifdef _OPENMP +#pragma omp flush +#endif + RC_cache_invalidate(); + } while ((*flag_pos) != val); + + if(tag) { + if( len > ( RCCE_LINE_SIZE - sizeof(int) ) ) len = RCCE_LINE_SIZE - sizeof(int); +#ifndef USE_REVERTED_FLAGS + memcpy_scc(tag, &((char*)flag)[sizeof(int)], len); +#else + memcpy_scc(tag, &((char*)flag)[0], len); +#endif + } + + return(RCCE_SUCCESS); +} +#endif + +int RCCE_test_flag(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result) { + t_vcharp cflag; + + cflag = (t_vcharp) flag; +#ifdef GORY + if (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET) + return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED)); + if (!cflag) + return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_ALLOCATED)); + // check to see if flag is properly contained in the local comm buffer + if (cflag - RCCE_comm_buffer[RCCE_IAM]>=0 && + cflag+RCCE_LINE_SIZE - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<0){} + else { + return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_IN_COMM_BUFFER)); + } +#endif + +#ifdef USE_REVERTED_FLAGS + flag = flag + RCCE_LINE_SIZE / sizeof(int) - 1; +#endif + + // always flush/invalidate to ensure we read the most recent value of *flag + // keep reading it until it has the required value. We only need to read the + // first int of the MPB cache line containing the flag +#ifdef _OPENMP +#pragma omp flush +#endif +#ifndef USE_FLAG_EXPERIMENTAL + RC_cache_invalidate(); +#endif + if((*flag) != val) { + (*result) = 0; + } + else { + (*result) = 1; + } + + return(RCCE_SUCCESS); +} + +#ifdef USE_TAGGED_FLAGS +int RCCE_test_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result, void *tag, int len) { + + int i, j; + RCCE_FLAG flag_pos; + +#ifndef USE_REVERTED_FLAGS + flag_pos = flag; +#else + flag_pos = flag + RCCE_LINE_SIZE / sizeof(int) -1; +#endif + + RC_cache_invalidate(); + + if((*flag_pos) != val) { + (*result) = 0; + } + else { + (*result) = 1; + } + + if((*result) && tag) { + if( len > ( RCCE_LINE_SIZE - sizeof(int) ) ) len = RCCE_LINE_SIZE - sizeof(int); +#ifndef USE_REVERTED_FLAGS + memcpy_scc(tag, &((char*)flag)[sizeof(int)], len); +#else + memcpy_scc(tag, &((char*)flag)[0], len); +#endif + } + + return(RCCE_SUCCESS); +} +#endif + + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_barrier +//-------------------------------------------------------------------------------------- +// very simple, linear barrier +//-------------------------------------------------------------------------------------- +int RCCE_barrier(RCCE_COMM *comm) { + + int counter, i, error; + int ROOT = 0; + volatile unsigned char cyclechar[RCCE_LINE_SIZE]; + volatile unsigned char valchar[RCCE_LINE_SIZE]; + volatile char *cycle; + volatile char *val; + + counter = 0; + cycle = (volatile char *)cyclechar; + val = (volatile char *)valchar; + + if (RCCE_debug_synch) + fprintf(STDERR,"UE %d has checked into barrier\n", RCCE_IAM); + +#ifdef USE_FAT_BARRIER + + // flip local barrier variable +#ifndef USE_FLAG_EXPERIMENTAL + if ((error = RCCE_get(cyclechar, (t_vcharp)(comm->gather[RCCE_IAM]), RCCE_LINE_SIZE, RCCE_IAM))) +#else + if ((error = RCCE_get_flag(cyclechar, (t_vcharp)(comm->gather[RCCE_IAM]), RCCE_LINE_SIZE, RCCE_IAM))) +#endif + return(RCCE_error_return(RCCE_debug_synch,error)); + *cycle = !(*cycle); +#ifndef USE_FLAG_EXPERIMENTAL + if ((error = RCCE_put((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))) +#else + if ((error = RCCE_put_flag((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))) +#endif + return(RCCE_error_return(RCCE_debug_synch,error)); + if ((error = RCCE_put((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, comm->member[ROOT]))) + return(RCCE_error_return(RCCE_debug_synch,error)); + + if (RCCE_IAM==comm->member[ROOT]) { + // read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size), + // we know all UEs have reached the barrier + while (counter != comm->size) { + // skip the first member (#0), because that is the ROOT + for (counter=i=1; isize; i++) { + /* copy flag values out of comm buffer */ +#ifndef USE_FLAG_EXPERIMENTAL + if ((error = RCCE_get(valchar, (t_vcharp)(comm->gather[i]), RCCE_LINE_SIZE, RCCE_IAM))) +#else + if ((error = RCCE_get_flag(valchar, (t_vcharp)(comm->gather[i]), RCCE_LINE_SIZE, RCCE_IAM))) +#endif + return(RCCE_error_return(RCCE_debug_synch,error)); + if (*val == *cycle) counter++; + } + } + // set release flags + for (i=1; isize; i++) { + if ((error = RCCE_flag_write(&(comm->release), *cycle, comm->member[i]))) + return(RCCE_error_return(RCCE_debug_synch,error)); + } + } + else { + if ((error = RCCE_wait_until(comm->release, *cycle))) + return(RCCE_error_return(RCCE_debug_synch,error)); + } + +#else // !USE_FAT_BARRIER + + // flip local barrier variable +#ifndef USE_FLAG_EXPERIMENTAL + if (error = RCCE_get(cyclechar, (t_vcharp)(comm->gather), RCCE_LINE_SIZE, RCCE_IAM)) +#else + if (error = RCCE_get_flag(cyclechar, (t_vcharp)(comm->gather), RCCE_LINE_SIZE, RCCE_IAM)) +#endif + return(RCCE_error_return(RCCE_debug_synch,error)); + *cycle = !(*cycle); +#ifndef USE_FLAG_EXPERIMENTAL + if (error = RCCE_put((t_vcharp)(comm->gather), cyclechar, RCCE_LINE_SIZE, RCCE_IAM)) +#else + if (error = RCCE_put_flag((t_vcharp)(comm->gather), cyclechar, RCCE_LINE_SIZE, RCCE_IAM)) +#endif + return(RCCE_error_return(RCCE_debug_synch,error)); + + if (RCCE_IAM==comm->member[ROOT]) { + // read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size), + // we know all UEs have reached the barrier + while (counter != comm->size) { + // skip the first member (#0), because that is the ROOT + for (counter=i=1; isize; i++) { + /* copy flag values out of comm buffer */ +#ifndef USE_FLAG_EXPERIMENTAL + if (error = RCCE_get(valchar, (t_vcharp)(comm->gather), RCCE_LINE_SIZE, + comm->member[i])) +#else + if (error = RCCE_get_flag(valchar, (t_vcharp)(comm->gather), RCCE_LINE_SIZE, + comm->member[i])) +#endif + return(RCCE_error_return(RCCE_debug_synch,error)); + if (*val == *cycle) counter++; + } + } + // set release flags + for (i=1; isize; i++) { + if (error = RCCE_flag_write(&(comm->release), *cycle, comm->member[i])) + return(RCCE_error_return(RCCE_debug_synch,error)); + } + } + else { + if (error = RCCE_wait_until(comm->release, *cycle)) { + return(RCCE_error_return(RCCE_debug_synch,error)); + } + } + +#endif // !USE_FAT_BARRIER + if (RCCE_debug_synch) fprintf(STDERR,"UE %d has cleared barrier\n", RCCE_IAM); + return(RCCE_SUCCESS); +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: RCCE_nb_barrier +//-------------------------------------------------------------------------------------- +// non-blocking version of the linear barrier +//-------------------------------------------------------------------------------------- +int RCCE_nb_barrier(RCCE_COMM *comm) { + + int i, error; + int ROOT = 0; + volatile unsigned char cyclechar[RCCE_LINE_SIZE]; + volatile unsigned char valchar[RCCE_LINE_SIZE]; +#ifdef USE_FLAG_EXPERIMENTAL + volatile char *cycle; + volatile char *val; + cycle = (volatile char *)cyclechar; + val = (volatile char *)valchar; +#else + volatile int *cycle; + volatile int *val; + cycle = (volatile int *)cyclechar; + val = (volatile int *)valchar; +#endif + + if(comm->label == 1) goto label1; + if(comm->label == 2) goto label2; + + comm->count = 0; + + if (RCCE_debug_synch) + fprintf(STDERR,"UE %d has checked into barrier\n", RCCE_IAM); + +#ifdef USE_FAT_BARRIER + + // flip local barrier variable +#ifndef USE_FLAG_EXPERIMENTAL + if ((error = RCCE_get(cyclechar, (t_vcharp)(comm->gather[RCCE_IAM]), RCCE_LINE_SIZE, RCCE_IAM))) +#else + if ((error = RCCE_get_flag(cyclechar, (t_vcharp)(comm->gather[RCCE_IAM]), RCCE_LINE_SIZE, RCCE_IAM))) +#endif + return(RCCE_error_return(RCCE_debug_synch,error)); + *cycle = !(*cycle); +#ifndef USE_FLAG_EXPERIMENTAL + if ((error = RCCE_put((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))) +#else + if ((error = RCCE_put_flag((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))) +#endif + return(RCCE_error_return(RCCE_debug_synch,error)); + if ((error = RCCE_put((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, comm->member[ROOT]))) + return(RCCE_error_return(RCCE_debug_synch,error)); + + if (RCCE_IAM==comm->member[ROOT]) { + // read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size), + // we know all UEs have reached the barrier + comm->cycle = *cycle; +label1: + while (comm->count != comm->size) { + // skip the first member (#0), because that is the ROOT + for (comm->count=i=1; isize; i++) { + /* copy flag values out of comm buffer */ +#ifndef USE_FLAG_EXPERIMENTAL + if ((error = RCCE_get(valchar, (t_vcharp)(comm->gather[i]), RCCE_LINE_SIZE, RCCE_IAM))) +#else + if ((error = RCCE_get_flag(valchar, (t_vcharp)(comm->gather[i]), RCCE_LINE_SIZE, RCCE_IAM))) +#endif + return(RCCE_error_return(RCCE_debug_synch,error)); + if (*val == comm->cycle) comm->count++; + } + if(comm->count != comm->size) { + comm->label = 1; + return(RCCE_PENDING); + } + } + // set release flags + for (i=1; isize; i++) { + if ((error = RCCE_flag_write(&(comm->release), comm->cycle, comm->member[i]))) + return(RCCE_error_return(RCCE_debug_synch,error)); + } + } + else { + int test; + comm->cycle = *cycle; +label2: + RCCE_test_flag(comm->release, comm->cycle, &test); + if(!test) { + comm->label = 2; + return(RCCE_PENDING); + } + } + + comm->label = 0; + +#else // !USE_FAT_BARRIER + + // flip local barrier variable +#ifndef USE_FLAG_EXPERIMENTAL + if (error = RCCE_get(cyclechar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE, RCCE_IAM)) +#else + if (error = RCCE_get_flag(cyclechar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE, RCCE_IAM)) +#endif + return(RCCE_error_return(RCCE_debug_synch,error)); + *cycle = !(*cycle); +#ifndef USE_FLAG_EXPERIMENTAL + if (error = RCCE_put((t_vcharp)(comm->gather[0]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM)) +#else + if (error = RCCE_put_flag((t_vcharp)(comm->gather[0]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM)) +#endif + return(RCCE_error_return(RCCE_debug_synch,error)); + + if (RCCE_IAM==comm->member[ROOT]) { + // read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size), + // we know all UEs have reached the barrier + comm->cycle = *cycle; +label1: + while (comm->count != comm->size) { + // skip the first member (#0), because that is the ROOT + for (comm->count=i=1; isize; i++) { + /* copy flag values out of comm buffer */ +#ifndef USE_FLAG_EXPERIMENTAL + if (error = RCCE_get(valchar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE, + comm->member[i])) +#else + if (error = RCCE_get_flag(valchar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE, + comm->member[i])) +#endif + return(RCCE_error_return(RCCE_debug_synch,error)); + if (*val == comm->cycle) comm->count++; + } + if(comm->count != comm->size) { + comm->label = 1; + return(RCCE_PENDING); + } + } + // set release flags + for (i=1; isize; i++) { + if (error = RCCE_flag_write(&(comm->release), comm->cycle, comm->member[i])) + return(RCCE_error_return(RCCE_debug_synch,error)); + } + } + else { + int test; + comm->cycle = *cycle; +label2: + RCCE_test_flag(comm->release, comm->cycle, &test); + if(!test) { + comm->label = 2; + return(RCCE_PENDING); + } + } + + comm->label = 0; + +#endif // !USE_FAT_BARRIER + if (RCCE_debug_synch) fprintf(STDERR,"UE %d has cleared barrier\n", RCCE_IAM); + return(RCCE_SUCCESS); +} + +#endif + +void RCCE_fence() { + return; +} + +#endif diff --git a/hermit/usr/ircce/iRCCE.h b/hermit/usr/ircce/iRCCE.h new file mode 100644 index 000000000..777e5e706 --- /dev/null +++ b/hermit/usr/ircce/iRCCE.h @@ -0,0 +1,290 @@ +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-10-25] added support for non-blocking send/recv operations +// - iRCCE_isend(), ..._test(), ..._wait(), ..._push() +// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push() +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +// [2010-11-12] extracted non-blocking code into separate library +// by Carsten Scholtes, University of Bayreuth +// +// [2010-12-09] added functions for a convenient handling of multiple +// pending non-blocking requests +// by Jacek Galowicz, Chair for Operating Systems +// RWTH Aachen University +// +// [2011-04-19] added wildcard mechanism (iRCCE_ANY_SOURCE) for receiving +// a message from an arbitrary remote rank +// by Simon Pickartz, Chair for Operating Systems, +// RWTH Aachen University +// +// [2011-06-16] iRCCE_ANY_LENGTH wildcard mechanism can only be used in +// the SINGLEBITFLAGS=0 case (-> bigflags must be enabled!) +// +// [2011-06-27] merged iRCCE_ANY_SOURCE branch with trunk (iRCCE_ANY_LENGTH) +// +// [2011-11-03] - renamed blocking (pipelined) send/recv functions to +// iRCCE_ssend() / iRCCE_srecv() (strictly synchronous!) +// - added non-blocking by synchronous send/recv functions: +// iRCCE_issend() / iRCCE_isrecv() +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +// [2012-10-29] - added functions for handling "Tagged Flags" +// iRCCE_flag_read/write_tagged(), iRCCE_test/wait_tagged() +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// + +#ifndef IRCCE_H +#define IRCCE_H + +#include "RCCE.h" + +#define iRCCE_VERSION "2.0" +#define iRCCE_FLAIR + +#define iRCCE_SUCCESS RCCE_SUCCESS +#define iRCCE_ERROR -1 +#define iRCCE_PENDING -2 +#define iRCCE_RESERVED -3 +#define iRCCE_NOT_ENQUEUED -4 + +#if !defined(SINGLEBITFLAGS) && !defined(RCCE_VERSION) +#define _iRCCE_ANY_LENGTH_ +extern const int iRCCE_ANY_LENGTH; +#endif + +#if !defined(SINGLEBITFLAGS) +#ifdef _OPENMP +#define iRCCE_MAX_TAGGED_LEN (RCCE_LINE_SIZE - 2 * sizeof(int)) +#else +#define iRCCE_MAX_TAGGED_LEN (RCCE_LINE_SIZE - sizeof(int)) +#endif +#endif + +extern const int iRCCE_ANY_SOURCE; + +typedef struct _iRCCE_SEND_REQUEST { + char *privbuf; // source buffer in local private memory (send buffer) + t_vcharp combuf; // intermediate buffer in MPB + size_t chunk; // size of MPB available for this message (bytes) + size_t subchunk1; // sub-chunks for the pipelined message transfe + size_t subchunk2; + RCCE_FLAG *ready; // flag indicating whether receiver is ready + RCCE_FLAG *sent; // flag indicating whether message has been sent by source + RCCE_FLAG_STATUS flag_set_value; // used for iRCCE_ANY_LENGTH wildcard + size_t size; // size of message (bytes) + int dest; // UE that will receive the message + int sync; // flag indicating whether send is synchronous or not + + size_t wsize; // offset within send buffer when putting in "chunk" bytes + size_t remainder; // bytes remaining to be sent + size_t nbytes; // number of bytes to be sent in single RCCE_put call + char *bufptr; // running pointer inside privbuf for current location + + int label; // jump/goto label for the reentrance of the respective poll function + int finished; // flag that indicates whether the request has already been finished + + struct _iRCCE_SEND_REQUEST *next; +} iRCCE_SEND_REQUEST; + + +typedef struct _iRCCE_RECV_REQUEST { + char *privbuf; // source buffer in local private memory (send buffer) + t_vcharp combuf; // intermediate buffer in MPB + size_t chunk; // size of MPB available for this message (bytes) + size_t subchunk1; // sub-chunks for the pipelined message transfe + size_t subchunk2; + RCCE_FLAG *ready; // flag indicating whether receiver is ready + RCCE_FLAG *sent; // flag indicating whether message has been sent by source + RCCE_FLAG_STATUS flag_set_value; // used for iRCCE_ANY_LENGTH wildcard + size_t size; // size of message (bytes) + int source; // UE that will send the message + int sync; // flag indicating whether recv is synchronous or not + + size_t wsize; // offset within send buffer when putting in "chunk" bytes + size_t remainder; // bytes remaining to be sent + size_t nbytes; // number of bytes to be sent in single RCCE_put call + char *bufptr; // running pointer inside privbuf for current location + + int label; // jump/goto label for the reentrance of the respective poll function + int finished; // flag that indicates whether the request has already been finished + int started; // flag that indicates whether message parts have already been received + + struct _iRCCE_RECV_REQUEST *next; +} iRCCE_RECV_REQUEST; + +#define iRCCE_WAIT_LIST_RECV_TYPE 0 +#define iRCCE_WAIT_LIST_SEND_TYPE 1 + +typedef struct _iRCCE_WAIT_LISTELEM { + int type; + struct _iRCCE_WAIT_LISTELEM * next; + void * req; +} iRCCE_WAIT_LISTELEM; + +typedef struct _iRCCE_WAIT_LIST { + iRCCE_WAIT_LISTELEM * first; + iRCCE_WAIT_LISTELEM * last; +} iRCCE_WAIT_LIST; + +#ifdef AIR +typedef volatile struct _iRCCE_AIR { +#ifndef _OPENMP + int * counter; + int * init; +#else + int counter; + int init; +#endif +} iRCCE_AIR; +#endif + +/////////////////////////////////////////////////////////////// +// +// THE iRCCE API: +// +// Initialize function: +int iRCCE_init(void); +// +// Non-blocking send/recv functions: +int iRCCE_isend(char *, ssize_t, int, iRCCE_SEND_REQUEST *); +int iRCCE_isend_test(iRCCE_SEND_REQUEST *, int *); +int iRCCE_isend_wait(iRCCE_SEND_REQUEST *); +int iRCCE_isend_push(void); +int iRCCE_irecv(char *, ssize_t, int, iRCCE_RECV_REQUEST *); +int iRCCE_irecv_test(iRCCE_RECV_REQUEST *, int *); +int iRCCE_irecv_wait(iRCCE_RECV_REQUEST *); +int iRCCE_irecv_push(void); +// +// Pipelined send/recv functions: (syncronous and blocking) +int iRCCE_ssend(char *, ssize_t, int); +int iRCCE_srecv(char *, ssize_t, int); +int iRCCE_srecv_test(char *, ssize_t, int, int*); +// +// Non-blocking pipelined send/recv functions: +int iRCCE_issend(char *, ssize_t, int, iRCCE_SEND_REQUEST *); +int iRCCE_isrecv(char *, ssize_t, int, iRCCE_RECV_REQUEST *); +// +// SCC-customized put/get and memcpy functions: +int iRCCE_put(t_vcharp, t_vcharp, int, int); +int iRCCE_get(t_vcharp, t_vcharp, int, int); +void* iRCCE_memcpy_put(void*, const void*, size_t); +void* iRCCE_memcpy_get(void*, const void*, size_t); +t_vcharp iRCCE_malloc(size_t); +#define iRCCE_memcpy iRCCE_memcpy_put +// +// Blocking and non-blocking 'probe' functions for incommimg messages: +int iRCCE_probe(int, int*); +int iRCCE_iprobe(int, int*, int*); +// +// Wait/test-all/any functions: +void iRCCE_init_wait_list(iRCCE_WAIT_LIST*); +void iRCCE_add_to_wait_list(iRCCE_WAIT_LIST*, iRCCE_SEND_REQUEST *, iRCCE_RECV_REQUEST *); +int iRCCE_test_all(iRCCE_WAIT_LIST*, int *); +int iRCCE_wait_all(iRCCE_WAIT_LIST*); +int iRCCE_test_any(iRCCE_WAIT_LIST*, iRCCE_SEND_REQUEST **, iRCCE_RECV_REQUEST **); +int iRCCE_wait_any(iRCCE_WAIT_LIST*, iRCCE_SEND_REQUEST **, iRCCE_RECV_REQUEST **); +// +// Query functions for request handle parameters: +int iRCCE_get_dest(iRCCE_SEND_REQUEST*); +int iRCCE_get_source(iRCCE_RECV_REQUEST*); +int iRCCE_get_size(iRCCE_SEND_REQUEST*, iRCCE_RECV_REQUEST*); +int iRCCE_get_length(void); +// +// Cancel functions for yet not started non-blocking requests: +int iRCCE_isend_cancel(iRCCE_SEND_REQUEST *, int *); +int iRCCE_irecv_cancel(iRCCE_RECV_REQUEST *, int *); +// +// Functions for handling tagged flags: (need whole cache line per flag) +#ifndef SINGLEBITFLAGS +int iRCCE_flag_alloc_tagged(RCCE_FLAG *); +int iRCCE_flag_write_tagged(RCCE_FLAG *, RCCE_FLAG_STATUS, int, void *, int); +int iRCCE_flag_read_tagged(RCCE_FLAG, RCCE_FLAG_STATUS *, int, void *, int); +int iRCCE_wait_tagged(RCCE_FLAG, RCCE_FLAG_STATUS, void *, int); +int iRCCE_test_tagged(RCCE_FLAG, RCCE_FLAG_STATUS, int *, void *, int); +int iRCCE_get_max_tagged_len(void); +#endif +// +// Functions for handling Atomic Increment Registers (AIR): +#ifdef AIR +int iRCCE_atomic_alloc(iRCCE_AIR **); +int iRCCE_atomic_inc(iRCCE_AIR*, int*); +int iRCCE_atomic_read(iRCCE_AIR*, int*); +int iRCCE_atomic_write(iRCCE_AIR*, int); +#endif +// +// Improved Collectives: +int iRCCE_barrier(RCCE_COMM*); +int iRCCE_bcast(char *, size_t, int, RCCE_COMM); +int iRCCE_mcast(char *, size_t, int); +int iRCCE_msend(char *, ssize_t); +int iRCCE_mrecv(char *, ssize_t, int); +// +// Functions form the GORY RCCE interface mapped to iRCCE: +t_vcharp iRCCE_malloc(size_t); +int iRCCE_flag_alloc(RCCE_FLAG *); +int iRCCE_flag_write(RCCE_FLAG *, RCCE_FLAG_STATUS, int); +int iRCCE_flag_read(RCCE_FLAG, RCCE_FLAG_STATUS *, int); +int iRCCE_wait_until(RCCE_FLAG, RCCE_FLAG_STATUS); +// +// Please Note: Since we're running in NON-GORY mode, there are no "free()" functions! +// +/////////////////////////////////////////////////////////////// +// +// Just for convenience: +#if 1 +#define RCCE_isend iRCCE_isend +#define RCCE_isend_test iRCCE_isend_test +#define RCCE_isend_wait iRCCE_isend_wait +#define RCCE_isend_push iRCCE_isend_push +#define RCCE_irecv iRCCE_irecv +#define RCCE_irecv_test iRCCE_irecv_test +#define RCCE_irecv_wait iRCCE_irecv_wait +#define RCCE_irecv_push iRCCE_irecv_push +#define RCCE_SEND_REQUEST iRCCE_SEND_REQUEST +#define RCCE_RECV_REQUEST iRCCE_RECV_REQUEST +#ifdef _iRCCE_TAGGED_FLAGS_ +#define RCCE_flag_write_tagged iRCCE_flag_write_tagged +#define RCCE_flag_read_tagged iRCCE_flag_read_tagged +#define RCCE_wait_tagged iRCCE_wait_tagged +#define RCCE_test_tagged iRCCE_test_tagged +#define RCCE_flag_alloc_tagged iRCCE_flag_alloc_tagged +#define RCCE_flag_free_tagged iRCCE_flag_free_tagged +#endif +#endif +// +#if 1 +#define iRCCE_send iRCCE_ssend +#define iRCCE_recv iRCCE_srecv +#define iRCCE_recv_test iRCCE_srecv_test +#endif +// +#if 1 +#define iRCCE_issend_test iRCCE_isend_test +#define iRCCE_issend_wait iRCCE_isend_wait +#define iRCCE_issend_push iRCCE_isend_push +#define iRCCE_isrecv_test iRCCE_irecv_test +#define iRCCE_isrecv_wait iRCCE_irecv_wait +#define iRCCE_isrecv_push iRCCE_irecv_push +#endif +// +/////////////////////////////////////////////////////////////// + +#endif + diff --git a/hermit/usr/ircce/iRCCE_admin.c b/hermit/usr/ircce/iRCCE_admin.c new file mode 100644 index 000000000..688aeafeb --- /dev/null +++ b/hermit/usr/ircce/iRCCE_admin.c @@ -0,0 +1,195 @@ +//*************************************************************************************** +// Administrative routines. +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-10-25] added support for non-blocking send/recv operations +// - iRCCE_isend(), ..._test(), ..._wait(), ..._push() +// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push() +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +// [2010-11-12] extracted non-blocking code into separate library +// by Carsten Scholtes +// +// [2011-02-21] added support for multiple incoming queues +// (one recv queue per remote rank) +// +// [2011-04-19] added wildcard mechanism (iRCCE_ANY_SOURCE) for receiving +// a message from an arbitrary remote rank +// by Simon Pickartz, Chair for Operating Systems, +// RWTH Aachen University +// +// [2011-06-27] merged iRCCE_ANY_SOURCE branch with trunk (iRCCE_ANY_LENGTH) +// + +#include "RCCE.h" +#if defined(SCC) && !defined(__hermit__) +#include "SCC_API.h" +#endif +#include "iRCCE_lib.h" + +// send request queue +iRCCE_SEND_REQUEST* iRCCE_isend_queue; +// recv request queue +iRCCE_RECV_REQUEST* iRCCE_irecv_queue[RCCE_MAXNP]; + +// recv request queue for those with source = iRCCE_ANY_SOURCE +iRCCE_RECV_REQUEST* iRCCE_irecv_any_source_queue; + +// global variables for for inquiring recent source rank and recent message length +int iRCCE_recent_source = -1; +int iRCCE_recent_length = 0; + +#ifdef _iRCCE_ANY_LENGTH_ +const int iRCCE_ANY_LENGTH = -1 >> 1; +#endif + +const int iRCCE_ANY_SOURCE = -1; + +#ifdef AIR +iRCCE_AIR iRCCE_atomic_inc_regs[2*RCCE_MAXNP]; +int iRCCE_atomic_alloc_counter = 0; +iRCCE_AIR* iRCCE_atomic_barrier[2]; +#endif + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_init +//-------------------------------------------------------------------------------------- +// initialize the library +//-------------------------------------------------------------------------------------- +int iRCCE_init(void) +{ + int i; + +#ifdef AIR +#ifndef _OPENMP + int * air_base = (int *) MallocConfigReg(FPGA_BASE + 0xE000); +#endif +#endif + + for(i=0; i RCCE_chunk) return NULL; + + result = RCCE_flags_start; + + // reduce maximum size of message payload chunk + RCCE_chunk -= size; + + // move running pointer to next available flags line + RCCE_flags_start += size; + + // move running pointer to new start of payload data area + RCCE_buff_ptr += size; + + return result; +} + +int iRCCE_flag_alloc(RCCE_FLAG *flag) +{ +#if !defined(SINGLEBITFLAGS) + return iRCCE_flag_alloc_tagged(flag); +#else + return RCCE_flag_alloc(flag); +#endif +} + +int iRCCE_flag_write(RCCE_FLAG *flag, RCCE_FLAG_STATUS val, int ID) +{ +#if !defined(SINGLEBITFLAGS) + return iRCCE_flag_write_tagged(flag, val, ID, NULL, 0); +#else + return RCCE_flag_write(flag, val, ID); +#endif +} + +int iRCCE_flag_read(RCCE_FLAG flag, RCCE_FLAG_STATUS *val, int ID) +{ +#if !defined(SINGLEBITFLAGS) + return iRCCE_flag_read_tagged(flag, val, ID, NULL, 0); +#else + return RCCE_flag_read(flag, val, ID); +#endif +} + +int iRCCE_wait_until(RCCE_FLAG flag, RCCE_FLAG_STATUS val) +{ +#if !defined(SINGLEBITFLAGS) + return iRCCE_wait_tagged(flag, val, NULL, 0); +#else + return iRCCE_wait_until(flag, val); +#endif +} diff --git a/hermit/usr/ircce/iRCCE_atomic.c b/hermit/usr/ircce/iRCCE_atomic.c new file mode 100644 index 000000000..5253d8205 --- /dev/null +++ b/hermit/usr/ircce/iRCCE_atomic.c @@ -0,0 +1,195 @@ +//*************************************************************************************** +// Functions for handling Atomic Increment Registers (AIR). +//*************************************************************************************** +// +// Copyright 2012, Chair for Operating Systems, RWTH Aachen University +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + + +#include "iRCCE_lib.h" + +#ifdef AIR + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_atomic_alloc +//-------------------------------------------------------------------------------------- +// Allocates a new AIR register; returns iRCCE_ERRO if all AIRs are already allocated +//-------------------------------------------------------------------------------------- +int iRCCE_atomic_alloc(iRCCE_AIR** reg) +{ + if(iRCCE_atomic_alloc_counter < 2 * RCCE_NP) { + + int next_reg = RC_COREID[iRCCE_atomic_alloc_counter]; + + if(iRCCE_atomic_alloc_counter > RCCE_NP) next_reg += RCCE_MAXNP; + + (*reg) = &iRCCE_atomic_inc_regs[next_reg]; + +#ifdef _OPENMP +#pragma omp master + { + iRCCE_atomic_alloc_counter++; + } +#pragma omp barrier +#else + iRCCE_atomic_alloc_counter++; +#endif + + iRCCE_atomic_write((*reg), 0); + + return iRCCE_SUCCESS; + } + else { + + return iRCCE_ERROR; + } +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_atomic_inc +//-------------------------------------------------------------------------------------- +// Increments an AIR register and returns its privious content +//-------------------------------------------------------------------------------------- +int iRCCE_atomic_inc(iRCCE_AIR* reg, int* value) +{ + int _value; + if(value == NULL) value = &value; + +#ifndef _OPENMP + (*value) = (*reg->counter); +#else +#pragma omp critical + { + (*value) = reg->counter; + reg->counter++; + reg->init = reg->counter; + } +#endif + + return iRCCE_SUCCESS; +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_atomic_read +//-------------------------------------------------------------------------------------- +// Returns the current value of an AIR register +//-------------------------------------------------------------------------------------- +int iRCCE_atomic_read(iRCCE_AIR* reg, int* value) +{ +#ifndef _OPENMP + (*value) = (*reg->init); +#else +#pragma omp critical + { + (*value) =reg->init; + } +#endif + + return iRCCE_SUCCESS; +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_atomic_write +//-------------------------------------------------------------------------------------- +// Initializes an AIR register by writing a start value +//-------------------------------------------------------------------------------------- +int iRCCE_atomic_write(iRCCE_AIR* reg, int value) +{ +#ifndef _OPENMP + (*reg->init) = value; +#else +#pragma omp critical + { + reg->init = value; + reg->counter = value; + } +#endif + + return iRCCE_SUCCESS; +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_barrier +//-------------------------------------------------------------------------------------- +// A barrier version based on the Atomic Increment Registers (AIR); if AIRs are not +// supported, the function makes a fall-back to the common RCCE_barrier(). +//-------------------------------------------------------------------------------------- + +static void RC_wait(int wait) { +#ifndef _OPENMP + asm volatile( "movl %%eax,%%ecx\n\t" + "test:nop\n\t" + "loop test" + : /* no output registers */ + : "a" (wait) + : "%ecx" ); +#endif + return; +} + +static int idx = 0; +static unsigned int rnd = 0; +#ifdef _OPENMP +#pragma omp threadprivate (idx, rnd) +#endif + +int iRCCE_barrier(RCCE_COMM *comm) +{ + int backoff = BACKOFF_MIN, wait, i = 0; + int counter; + + if(comm == NULL) comm = &RCCE_COMM_WORLD; + + if (comm == &RCCE_COMM_WORLD) { + + iRCCE_atomic_inc(iRCCE_atomic_barrier[idx], &counter); + if (counter < (comm->size-1)) + { + iRCCE_atomic_read(iRCCE_atomic_barrier[idx], &counter); + while (counter > 0) + { + rnd = rnd * 1103515245u + 12345u; + wait = BACKOFF_MIN + (rnd % (backoff << i)); + RC_wait(wait); + if (wait < BACKOFF_MAX) i++; + + iRCCE_atomic_read(iRCCE_atomic_barrier[idx], &counter); + } + } + else + { + iRCCE_atomic_write(iRCCE_atomic_barrier[idx], 0); + } + + idx = !idx; + + return(RCCE_SUCCESS); + } + else + { + return RCCE_barrier(comm); + } +} + +#else // !AIR + +int iRCCE_barrier(RCCE_COMM *comm) +{ + if(comm == NULL) return RCCE_barrier(&RCCE_COMM_WORLD); + else return RCCE_barrier(comm); +} + +#endif // !AIR diff --git a/hermit/usr/ircce/iRCCE_get.c b/hermit/usr/ircce/iRCCE_get.c new file mode 100644 index 000000000..ee90950ea --- /dev/null +++ b/hermit/usr/ircce/iRCCE_get.c @@ -0,0 +1,78 @@ +//*************************************************************************************** +// Get data from communication buffer. +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-11-03] switched to SCC-optimized memcpy() functions in scc_memcpy.h: +// - memcpy_to_mpb() +// - memcpy_from_mpb() +// by Stefan Lankes, Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +#include "iRCCE_lib.h" + +#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__) +#include "scc_memcpy.h" +#endif + +void* iRCCE_memcpy_get(void *dest, const void *src, size_t count) +{ +#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__) + return memcpy_from_mpb(dest, src, count); +#else + return memcpy(dest, src, count); +#endif +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_get +//-------------------------------------------------------------------------------------- +// copy data from address "source" in the remote MPB to address "target" in either the +// local MPB, or in the calling UE's private memory. We do not test to see if a move +// into the calling UE's private memory stays within allocated memory * +//-------------------------------------------------------------------------------------- +int iRCCE_get( + t_vcharp target, // target buffer, MPB or private memory + t_vcharp source, // source buffer, MPB + int num_bytes, // number of bytes to copy (must be multiple of cache line size + int ID // rank of source UE + ) { + + // in non-GORY mode we only need to retain the MPB source shift; we + // already know the source is in the MPB, not private memory + source = RCCE_comm_buffer[ID]+(source-RCCE_comm_buffer[RCCE_IAM]); + + // do the actual copy, making sure we copy fresh data +#ifdef _OPENMP + #pragma omp flush +#endif + RC_cache_invalidate(); + + iRCCE_memcpy_get((void *)target, (void *)source, num_bytes); + + // flush data to make sure it is visible to all threads; cannot use a flush list + // because it concerns malloced space +#ifdef _OPENMP + #pragma omp flush +#endif + return(iRCCE_SUCCESS); +} diff --git a/hermit/usr/ircce/iRCCE_irecv.c b/hermit/usr/ircce/iRCCE_irecv.c new file mode 100644 index 000000000..57829e49a --- /dev/null +++ b/hermit/usr/ircce/iRCCE_irecv.c @@ -0,0 +1,709 @@ +//*************************************************************************************** +// Synchronized receive routines. +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-10-25] added support for non-blocking send/recv operations +// - iRCCE_isend(), ..._test(), ..._wait(), ..._push() +// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push() +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +// [2010-11-12] extracted non-blocking code into separate library +// by Carsten Scholtes +// +// [2010-12-09] added cancel functions for non-blocking send/recv requests +// by Carsten Clauss +// +// [2011-02-21] added support for multiple incoming queues +// (one recv queue per remote rank) +// +// [2011-04-19] added wildcard mechanism (iRCCE_ANY_SOURCE) for receiving +// a message from an arbitrary remote rank +// by Simon Pickartz, Chair for Operating Systems, +// RWTH Aachen University +// +// [2011-06-27] merged iRCCE_ANY_SOURCE branch with trunk (iRCCE_ANY_LENGTH) +// +// [2011-08-02] added iRCCE_iprobe() function for probing for incomming messages +// +// [2011-11-03] added non-blocking by synchronous send/recv functions: +// iRCCE_issend() / iRCCE_isrecv() +// + +#include "iRCCE_lib.h" + +#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__) +#include "scc_memcpy.h" +#else +#define memcpy_scc memcpy +#endif + +#ifdef SINGLEBITFLAGS +#warning iRCCE_ANY_LENGTH: for using this wildcard, SINGLEBITFLAGS must be disabled! (make SINGLEBITFLAGS=0) +#endif + +#ifdef RCCE_VERSION +#warning iRCCE_ANY_LENGTH: for using this wildcard, iRCCE must be built against RCCE release V1.0.13! +#endif + +static int iRCCE_push_recv_request(iRCCE_RECV_REQUEST *request) { + + char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size + int test; // flag for calling iRCCE_test_flag() + + if(request->finished) return(iRCCE_SUCCESS); + + if(request->sync) return iRCCE_push_srecv_request(request); + + if(request->label == 1) goto label1; + if(request->label == 2) goto label2; + if(request->label == 3) goto label3; + +#ifdef _iRCCE_ANY_LENGTH_ + RCCE_flag_read(*(request->sent), &(request->flag_set_value), RCCE_IAM); + if(request->flag_set_value == 0) { + return(iRCCE_PENDING); + } + request->size = (size_t)request->flag_set_value; +#endif + + // receive data in units of available chunk size of MPB + for (; request->wsize < (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) { + request->bufptr = request->privbuf + request->wsize; + request->nbytes = request->chunk; +label1: + iRCCE_test_flag(*(request->sent), request->flag_set_value, &test); + if(!test) { + request->label = 1; + return(iRCCE_PENDING); + } + request->started = 1; + + RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM); + // copy data from source's MPB space to private memory + iRCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, request->source); + + // tell the source I have moved data out of its comm buffer + RCCE_flag_write(request->ready, request->flag_set_value, request->source); + } + + request->remainder = request->size % request->chunk; + // if nothing is left over, we are done + if (!request->remainder) { + if(iRCCE_recent_source != request->source) iRCCE_recent_source = request->source; + if(iRCCE_recent_length != request->size) iRCCE_recent_length = request->size; + request->finished = 1; + return(iRCCE_SUCCESS); + } + + // receive remainder of data--whole cache lines + request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk; + request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE; + if (request->nbytes) { +label2: + iRCCE_test_flag(*(request->sent), request->flag_set_value, &test); + if(!test) { + request->label = 2; + return(iRCCE_PENDING); + } + request->started = 1; + + RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM); + // copy data from source's MPB space to private memory + iRCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, request->source); + + // tell the source I have moved data out of its comm buffer + RCCE_flag_write(request->ready, request->flag_set_value, request->source); + } + + request->remainder = request->size % request->chunk; + request->remainder = request->remainder % RCCE_LINE_SIZE; + if (!request->remainder) { + if(iRCCE_recent_source != request->source) iRCCE_recent_source = request->source; + if(iRCCE_recent_length != request->size) iRCCE_recent_length = request->size; + request->finished = 1; + return(iRCCE_SUCCESS); + } + + // remainder is less than cache line. This must be copied into appropriately sized + // intermediate space before exact number of bytes get copied to the final destination + request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes; + request->nbytes = RCCE_LINE_SIZE; +label3: + iRCCE_test_flag(*(request->sent), request->flag_set_value, &test); + if(!test) { + request->label = 3; + return(iRCCE_PENDING); + } + request->started = 1; + + RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM); + // copy data from source's MPB space to private memory + iRCCE_get((t_vcharp)padline, request->combuf, request->nbytes, request->source); + memcpy_scc(request->bufptr,padline,request->remainder); + + // tell the source I have moved data out of its comm buffer + RCCE_flag_write(request->ready, request->flag_set_value, request->source); + + if(iRCCE_recent_source != request->source) iRCCE_recent_source = request->source; + if(iRCCE_recent_length != request->size) iRCCE_recent_length = request->size; + request->finished = 1; + return(iRCCE_SUCCESS); +} + +static void iRCCE_init_recv_request( + char *privbuf, // source buffer in local private memory (send buffer) + t_vcharp combuf, // intermediate buffer in MPB + size_t chunk, // size of MPB available for this message (bytes) + RCCE_FLAG *ready, // flag indicating whether receiver is ready + RCCE_FLAG *sent, // flag indicating whether message has been sent by source + size_t size, // size of message (bytes) + int source, // UE that will send the message + int sync, // flag indicating whether recv is synchronous or not + iRCCE_RECV_REQUEST *request + ) { + + request->privbuf = privbuf; + request->combuf = combuf; + request->chunk = chunk; + request->ready = ready; + request->sent = sent; + request->size = size; + request->source = source; + + request->sync = sync; + request->subchunk1 = chunk / 2; + request->subchunk1 = ( (chunk / 2) / RCCE_LINE_SIZE ) * RCCE_LINE_SIZE; + request->subchunk2 = chunk - request->subchunk1; + + request->wsize = 0; + request->remainder = 0; + request->nbytes = 0; + request->bufptr = NULL; + + request->label = 0; + request->finished = 0; + request->started = 0; + + request->next = NULL; + +#ifndef _iRCCE_ANY_LENGTH_ + request->flag_set_value = RCCE_FLAG_SET; +#else + request->flag_set_value = (RCCE_FLAG_STATUS)size; +#endif + + return; +} + +static int iRCCE_irecv_search_source() { + int i, j; + int res = iRCCE_ANY_SOURCE; + + for( i=0; ifinished = 1; + return(iRCCE_SUCCESS); + } + } + + if( source == iRCCE_ANY_SOURCE ) { + source = iRCCE_irecv_search_source(); // first try to find a source + + if( source == iRCCE_ANY_SOURCE ){ // queue request if no source available + + iRCCE_init_recv_request(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[RCCE_IAM], NULL, + size, iRCCE_ANY_SOURCE, sync, request); + + // put anysource-request in irecv_any_source_queue + if( iRCCE_irecv_any_source_queue == NULL ){ + iRCCE_irecv_any_source_queue = request; + } + else { + if( iRCCE_irecv_any_source_queue->next == NULL ) { + iRCCE_irecv_any_source_queue->next = request; + } + else { + iRCCE_RECV_REQUEST* run = iRCCE_irecv_any_source_queue; + while( run->next != NULL ) run = run->next; + run->next = request; + } + } + return iRCCE_RESERVED; + } + } + + if (source<0 || source >= RCCE_NP) + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID)); + else { + iRCCE_init_recv_request(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source], + size, source, sync, request); + + if(iRCCE_irecv_queue[source] == NULL) { + + if(iRCCE_push_recv_request(request) == iRCCE_SUCCESS) { + return(iRCCE_SUCCESS); + } + else { + iRCCE_irecv_queue[source] = request; + + if(request == &blocking_irecv_request) { + iRCCE_irecv_wait(request); + return(iRCCE_SUCCESS); + } + + return(iRCCE_PENDING); + } + } + else { + if(iRCCE_irecv_queue[source]->next == NULL) { + iRCCE_irecv_queue[source]->next = request; + } + else { + iRCCE_RECV_REQUEST *run = iRCCE_irecv_queue[source]; + while(run->next != NULL) run = run->next; + run->next = request; + } + + if(request == &blocking_irecv_request) { + iRCCE_irecv_wait(request); + return(iRCCE_SUCCESS); + } + + return(iRCCE_RESERVED); + } + } +} + +int iRCCE_irecv(char *privbuf, ssize_t size, int dest, iRCCE_RECV_REQUEST *request) { + + return iRCCE_irecv_generic(privbuf, size, dest, request, 0); +} + +int iRCCE_isrecv(char *privbuf, ssize_t size, int dest, iRCCE_RECV_REQUEST *request) { + + return iRCCE_irecv_generic(privbuf, size, dest, request, 1); +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_probe +//-------------------------------------------------------------------------------------- +// probe for incomming messages (non-blocking / does not receive) +//-------------------------------------------------------------------------------------- +int iRCCE_iprobe(int source, int* test_rank, int* test_flag) +{ + // determine source of request if given source = iRCCE_ANY_SOURCE + if( source == iRCCE_ANY_SOURCE ) { + + source = iRCCE_irecv_search_source(); // first try to find a source + } + else { + int res; + iRCCE_test_flag(RCCE_sent_flag[source], RCCE_FLAG_SET, &res); + + if(!res) source = iRCCE_ANY_SOURCE; + } + + if(source != iRCCE_ANY_SOURCE) { // message found: + + if (test_rank != NULL) (*test_rank) = source; + if (test_flag != NULL) (*test_flag) = 1; + +#ifdef _iRCCE_ANY_LENGTH_ + { + int size = iRCCE_ANY_LENGTH; + RCCE_flag_read(RCCE_sent_flag[source], &size, RCCE_IAM); + if(iRCCE_recent_length != size) iRCCE_recent_length = size; + } +#endif + if(iRCCE_recent_source != source) iRCCE_recent_source = source; + } + else { + if (test_rank != NULL) (*test_rank) = iRCCE_ANY_SOURCE; + if (test_flag != NULL) (*test_flag) = 0; + } + + return iRCCE_SUCCESS; +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_irecv_test +//-------------------------------------------------------------------------------------- +// test function for completion of the requestes non-blocking recv operation +// Just provide NULL instead of the testvar if you don't need it +//-------------------------------------------------------------------------------------- +int iRCCE_irecv_test(iRCCE_RECV_REQUEST *request, int *test) { + + int source; + + if(request == NULL) { + + if(iRCCE_irecv_push() == iRCCE_SUCCESS) { + if (test) (*test) = 1; + return(iRCCE_SUCCESS); + } + else { + if (test) (*test) = 0; + return(iRCCE_PENDING); + } + } + + // does request still have no source? + if( request->source == iRCCE_ANY_SOURCE ) { + request->source = iRCCE_irecv_search_source(); + + if( request->source == iRCCE_ANY_SOURCE ) { + if (test) (*test) = 0; + return iRCCE_RESERVED; + } + else { // take request out of wait_any_source-list + + // find request in queue + if( request == iRCCE_irecv_any_source_queue ) { + iRCCE_irecv_any_source_queue = iRCCE_irecv_any_source_queue->next; + } + else { + iRCCE_RECV_REQUEST* run = iRCCE_irecv_any_source_queue; + while( run->next != request ) run = run->next; + run->next = request->next; + } + + request->next = NULL; + request->sent = &RCCE_sent_flag[request->source]; // set senders flag + source = request->source; + + // queue request in iRCCE_irecv_queue + if(iRCCE_irecv_queue[source] == NULL) { + + if(iRCCE_push_recv_request(request) == iRCCE_SUCCESS) { + if (test) (*test) = 1; + return(iRCCE_SUCCESS); + } + else { + iRCCE_irecv_queue[source] = request; + + if(request == &blocking_irecv_request) { + iRCCE_irecv_wait(request); + if (test) (*test) = 1; + return(iRCCE_SUCCESS); + } + if (test) (*test) = 0; + return(iRCCE_PENDING); + } + } + else { + if(iRCCE_irecv_queue[source]->next == NULL) { + iRCCE_irecv_queue[source]->next = request; + } + else { + iRCCE_RECV_REQUEST *run = iRCCE_irecv_queue[source]; + while(run->next != NULL) run = run->next; + run->next = request; + } + + if(request == &blocking_irecv_request) { + iRCCE_irecv_wait(request); + if (test) (*test) = 1; + return(iRCCE_SUCCESS); + } + + if (test) (*test) = 1; + return(iRCCE_RESERVED); + } + + + } + } + else { + + source = request->source; + + if(request->finished) { + if (test) (*test) = 1; + return(iRCCE_SUCCESS); + } + + if(iRCCE_irecv_queue[source] != request) { + if (test) (*test) = 0; + return(iRCCE_RESERVED); + } + + iRCCE_push_recv_request(request); + + if(request->finished) { + iRCCE_irecv_queue[source] = request->next; + + if (test) (*test) = 1; + return(iRCCE_SUCCESS); + } + + if (test) (*test) = 0; + return(iRCCE_PENDING); + } +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_irecv_push +//-------------------------------------------------------------------------------------- +// progress function for pending requests in the irecv queue +//-------------------------------------------------------------------------------------- +static int iRCCE_irecv_push_source(int source) { + + iRCCE_RECV_REQUEST *request = iRCCE_irecv_queue[source]; + + if(request == NULL) { + return(iRCCE_SUCCESS); + } + + if(request->finished) { + return(iRCCE_SUCCESS); + } + + iRCCE_push_recv_request(request); + + if(request->finished) { + iRCCE_irecv_queue[source] = request->next; + return(iRCCE_SUCCESS); + } + + return(iRCCE_PENDING); +} + +int iRCCE_irecv_push(void) { + iRCCE_RECV_REQUEST* help_request; + + // first check sourceless requests + if( iRCCE_irecv_any_source_queue != NULL) { + while( iRCCE_irecv_any_source_queue != NULL ) { + iRCCE_irecv_any_source_queue->source = iRCCE_irecv_search_source(); + + if( iRCCE_irecv_any_source_queue->source == iRCCE_ANY_SOURCE ) { + + break; + } + // source found for first request in iRCCE_irecv_any_source_queue + else { + // set senders flag + iRCCE_irecv_any_source_queue->sent = &RCCE_sent_flag[iRCCE_irecv_any_source_queue->source]; + + // take request out of irecv_any_source_queue + help_request = iRCCE_irecv_any_source_queue; + iRCCE_irecv_any_source_queue = iRCCE_irecv_any_source_queue->next; + help_request->next = NULL; + + // put request into irecv_queue + if(iRCCE_irecv_queue[help_request->source] == NULL) { + iRCCE_irecv_queue[help_request->source] = help_request; + } + else { + iRCCE_RECV_REQUEST *run = iRCCE_irecv_queue[help_request->source]; + while(run->next != NULL) run = run->next; + run->next = help_request; + } + } + } + + } + + int i, j; + int retval = iRCCE_SUCCESS; + + for(i=0; ifinished) { + iRCCE_irecv_push(); + iRCCE_isend_push(); + } + } + else { + do { + iRCCE_isend_push(); + } + while( iRCCE_irecv_push() != iRCCE_SUCCESS ); + } + + return(iRCCE_SUCCESS); +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_irecv_cancel +//-------------------------------------------------------------------------------------- +// try to cancel a pending non-blocking recv request +//-------------------------------------------------------------------------------------- +int iRCCE_irecv_cancel(iRCCE_RECV_REQUEST *request, int *test) { + + int source; + iRCCE_RECV_REQUEST *run; + + if( (request == NULL) || (request->finished) ) { + if (test) (*test) = 0; + return iRCCE_NOT_ENQUEUED; + } + + + // does request have any source specified? + if( request->source == iRCCE_ANY_SOURCE ) { + for( run = iRCCE_irecv_any_source_queue; run->next != NULL; run = run->next ) { + if( run->next == request ) { + run->next = run->next->next; + + if (test) (*test) = 1; + return iRCCE_SUCCESS; + } + } + + if (test) (*test) = 0; + return iRCCE_NOT_ENQUEUED; + } + + + + source = request->source; + + if(iRCCE_irecv_queue[source] == NULL) { + if (test) (*test) = 0; + return iRCCE_NOT_ENQUEUED; + } + + if(iRCCE_irecv_queue[source] == request) { + + // have parts of the message already been received? + if(request->started) { + if (test) (*test) = 0; + return iRCCE_PENDING; + } + else { + // no, thus request can be canceld just in time: + iRCCE_irecv_queue[source] = request->next; + if (test) (*test) = 1; + return iRCCE_SUCCESS; + } + } + + for(run = iRCCE_irecv_queue[source]; run->next != NULL; run = run->next) { + + // request found --> remove it from recv queue: + if(run->next == request) { + + run->next = run->next->next; + + if (test) (*test) = 1; + return iRCCE_SUCCESS; + } + } + + if (test) (*test) = 0; + return iRCCE_NOT_ENQUEUED; +} + + diff --git a/hermit/usr/ircce/iRCCE_isend.c b/hermit/usr/ircce/iRCCE_isend.c new file mode 100644 index 000000000..e55686b7c --- /dev/null +++ b/hermit/usr/ircce/iRCCE_isend.c @@ -0,0 +1,411 @@ +//*************************************************************************************** +// Non-blocking send routines. +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-10-25] added support for non-blocking send/recv operations +// - iRCCE_isend(), ..._test(), ..._wait(), ..._push() +// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push() +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +// [2010-11-12] extracted non-blocking code into separate library +// by Carsten Scholtes +// +// [2010-12-09] added cancel functions for non-blocking send/recv requests +// by Carsten Clauss +// +// [2011-11-03] added non-blocking by synchronous send/recv functions: +// iRCCE_issend() / iRCCE_isrecv() +// + +#ifdef GORY +#error iRCCE _cannot_ be built in GORY mode! +#endif + +#include "iRCCE_lib.h" + +#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__) +#include "scc_memcpy.h" +#else +#define memcpy_scc memcpy +#endif + +static int iRCCE_push_send_request(iRCCE_SEND_REQUEST *request) { + + char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size + int test; // flag for calling iRCCE_test_flag() + + if(request->finished) return(iRCCE_SUCCESS); + + if(request->sync) return iRCCE_push_ssend_request(request); + + if(request->label == 1) goto label1; + if(request->label == 2) goto label2; + if(request->label == 3) goto label3; + + // send data in units of available chunk size of comm buffer + for (; request->wsize< (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) { + request->bufptr = request->privbuf + request->wsize; + request->nbytes = request->chunk; + // copy private data to own comm buffer + iRCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, RCCE_IAM); + RCCE_flag_write(request->sent, request->flag_set_value, request->dest); + // wait for the destination to be ready to receive a message +label1: + iRCCE_test_flag(*(request->ready), request->flag_set_value, &test); + if(!test) { + request->label = 1; + return(iRCCE_PENDING); + } + RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); + } + + request->remainder = request->size % request->chunk; + // if nothing is left over, we are done + if (!request->remainder) { + request->finished = 1; + return(iRCCE_SUCCESS); + } + + // send remainder of data--whole cache lines + request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk; + request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE; + if (request->nbytes) { + // copy private data to own comm buffer + iRCCE_put(request->combuf, (t_vcharp)request->bufptr, request->nbytes, RCCE_IAM); + RCCE_flag_write(request->sent, request->flag_set_value, request->dest); + // wait for the destination to be ready to receive a message +label2: + iRCCE_test_flag(*(request->ready), request->flag_set_value, &test); + if(!test) { + request->label = 2; + return(iRCCE_PENDING); + } + RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); + } + + request->remainder = request->size % request->chunk; + request->remainder = request->remainder%RCCE_LINE_SIZE; + // if nothing is left over, we are done + if (!request->remainder) + { + request->finished = 1; + return(iRCCE_SUCCESS); + } + + // remainder is less than a cache line. This must be copied into appropriately sized + // intermediate space before it can be sent to the receiver + request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes; + request->nbytes = RCCE_LINE_SIZE; + // copy private data to own comm buffer + memcpy_scc(padline,request->bufptr,request->remainder); + iRCCE_put(request->combuf, (t_vcharp)padline, request->nbytes, RCCE_IAM); + RCCE_flag_write(request->sent, request->flag_set_value, request->dest); + // wait for the destination to be ready to receive a message +label3: + iRCCE_test_flag(*(request->ready), request->flag_set_value, &test); + if(!test) { + request->label = 3; + return(iRCCE_PENDING); + } + RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); + + request->finished = 1; + return(iRCCE_SUCCESS); +} + +static void iRCCE_init_send_request( + char *privbuf, // source buffer in local private memory (send buffer) + t_vcharp combuf, // intermediate buffer in MPB + size_t chunk, // size of MPB available for this message (bytes) + RCCE_FLAG *ready, // flag indicating whether receiver is ready + RCCE_FLAG *sent, // flag indicating whether message has been sent by source + size_t size, // size of message (bytes) + int dest, // UE that will receive the message + int sync, // flag indicating whether send is synchronous or not + iRCCE_SEND_REQUEST *request + ) { + + request->privbuf = privbuf; + request->combuf = combuf; + request->chunk = chunk; + request->ready = ready; + request->sent = sent; + request->size = size; + request->dest = dest; + + request->sync = sync; + request->subchunk1 = ( (chunk / 2) / RCCE_LINE_SIZE ) * RCCE_LINE_SIZE; + request->subchunk2 = chunk - request->subchunk1; + + request->wsize = 0; + request->remainder = 0; + request->nbytes = 0; + request->bufptr = NULL; + + request->label = 0; + + request->finished = 0; + + request->next = NULL; + +#ifndef _iRCCE_ANY_LENGTH_ + request->flag_set_value = RCCE_FLAG_SET; +#else + request->flag_set_value = (RCCE_FLAG_STATUS)size; +#endif + + return; +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_isend +//-------------------------------------------------------------------------------------- +// non-blocking send function; returns a handle of type iRCCE_SEND_REQUEST +//-------------------------------------------------------------------------------------- +static iRCCE_SEND_REQUEST blocking_isend_request; +#ifdef _OPENMP + #pragma omp threadprivate (blocking_isend_request) +#endif +inline static int iRCCE_isend_generic(char *privbuf, ssize_t size, int dest, iRCCE_SEND_REQUEST *request, int sync) { + + if(request == NULL) request = &blocking_isend_request; + + if(size == 0) { + if(sync) { + // just synchronize: + size = 1; + privbuf = (char*)&size; + } else + size = -1; + } + + if(size < 0) { + iRCCE_init_send_request(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM], + size, dest, sync, request); + request->finished = 1; + return(iRCCE_SUCCESS); + } + + if (dest<0 || dest >= RCCE_NP) + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID)); + else { + iRCCE_init_send_request(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM], + size, dest, sync, request); + + if(iRCCE_isend_queue == NULL) { + + if(iRCCE_push_send_request(request) == iRCCE_SUCCESS) { + return(iRCCE_SUCCESS); + } + else { + iRCCE_isend_queue = request; + + if(request == &blocking_isend_request) { + iRCCE_isend_wait(request); + return(iRCCE_SUCCESS); + } + + return(iRCCE_PENDING); + } + } + else { + if(iRCCE_isend_queue->next == NULL) { + iRCCE_isend_queue->next = request; + } + else { + iRCCE_SEND_REQUEST *run = iRCCE_isend_queue; + while(run->next != NULL) run = run->next; + run->next = request; + } + + if(request == &blocking_isend_request) { + iRCCE_isend_wait(request); + return(iRCCE_SUCCESS); + } + + return(iRCCE_RESERVED); + } + } +} + +int iRCCE_isend(char *privbuf, ssize_t size, int dest, iRCCE_SEND_REQUEST *request) { + + return iRCCE_isend_generic(privbuf, size, dest, request, 0); +} + +int iRCCE_issend(char *privbuf, ssize_t size, int dest, iRCCE_SEND_REQUEST *request) { + + return iRCCE_isend_generic(privbuf, size, dest, request, 1); +} + + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_isend_push +//-------------------------------------------------------------------------------------- +// progress function for pending requests in the isend queue +//-------------------------------------------------------------------------------------- +int iRCCE_isend_push(void) { + + iRCCE_SEND_REQUEST *request = iRCCE_isend_queue; + + if(request == NULL) { + return(iRCCE_SUCCESS); + } + + if(request->finished) { + return(iRCCE_SUCCESS); + } + + iRCCE_push_send_request(request); + + if(request->finished) { + iRCCE_isend_queue = request->next; + return(iRCCE_SUCCESS); + } + + return(iRCCE_PENDING); +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_isend_test +//-------------------------------------------------------------------------------------- +// test function for completion of the requestes non-blocking send operation +// Just provide NULL instead of testvar if you don't need it +//-------------------------------------------------------------------------------------- +int iRCCE_isend_test(iRCCE_SEND_REQUEST *request, int *test) { + + if(request == NULL) { + + iRCCE_isend_push(); + + if(iRCCE_isend_queue == NULL) { + if (test) (*test) = 1; + return(iRCCE_SUCCESS); + } + else { + if (test) (*test) = 0; + return(iRCCE_PENDING); + } + } + + if(request->finished) { + if (test) (*test) = 1; + return(iRCCE_SUCCESS); + } + + if(iRCCE_isend_queue != request) { + + iRCCE_isend_push(); + + if(iRCCE_isend_queue != request) { + if (test) (*test) = 0; + return(iRCCE_RESERVED); + } + } + + iRCCE_push_send_request(request); + + if(request->finished) { + iRCCE_isend_queue = request->next; + + if (test) (*test) = 1; + return(iRCCE_SUCCESS); + } + + if (test) (*test) = 0; + return(iRCCE_PENDING); +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_isend_wait +//-------------------------------------------------------------------------------------- +// just wait for completion of the requestes non-blocking send operation +//-------------------------------------------------------------------------------------- +int iRCCE_isend_wait(iRCCE_SEND_REQUEST *request) { + + if(request != NULL) { + + while(!request->finished) { + + iRCCE_isend_push(); + iRCCE_irecv_push(); + } + } + else { + + while(iRCCE_isend_queue != NULL) { + + iRCCE_isend_push(); + iRCCE_irecv_push(); + } + } + + return(iRCCE_SUCCESS); +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_isend_cancel +//-------------------------------------------------------------------------------------- +// try to cancel a pending non-blocking send request +//-------------------------------------------------------------------------------------- +int iRCCE_isend_cancel(iRCCE_SEND_REQUEST *request, int *test) { + + iRCCE_SEND_REQUEST *run; + + if( (request == NULL) || (request->finished) ) { + if (test) (*test) = 0; + return iRCCE_NOT_ENQUEUED; + } + + if(iRCCE_isend_queue == NULL) { + if (test) (*test) = 0; + return iRCCE_NOT_ENQUEUED; + } + + if(iRCCE_isend_queue == request) { + if (test) (*test) = 0; + return iRCCE_PENDING; + } + + for(run = iRCCE_isend_queue; run->next != NULL; run = run->next) { + + // request found --> remove it from send queue: + if(run->next == request) { + + run->next = run->next->next; + + if (test) (*test) = 1; + return iRCCE_SUCCESS; + } + } + + if (test) (*test) = 0; + return iRCCE_NOT_ENQUEUED; +} diff --git a/hermit/usr/ircce/iRCCE_lib.h b/hermit/usr/ircce/iRCCE_lib.h new file mode 100644 index 000000000..a55616edc --- /dev/null +++ b/hermit/usr/ircce/iRCCE_lib.h @@ -0,0 +1,62 @@ +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-10-25] added support for non-blocking send/recv operations +// - iRCCE_isend(), ..._test(), ..._wait(), ..._push() +// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push() +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +// [2010-11-12] extracted non-blocking code into separate library +// by Carsten Scholtes +// +// [2011-04-19] added wildcard mechanism (iRCCE_ANY_SOURCE) for receiving +// a message from an arbitrary remote rank +// by Simon Pickartz, Chair for Operating Systems, +// RWTH Aachen University +// +// [2011-06-27] merged iRCCE_ANY_SOURCE branch with trunk (iRCCE_ANY_LENGTH) +// + +#ifndef IRCCE_LIB_H +#define IRCCE_LIB_H + +#include "RCCE_lib.h" +#include "iRCCE.h" + +#ifdef AIR +#define FPGA_BASE 0xf9000000 +#define BACKOFF_MIN 8 +#define BACKOFF_MAX 256 +extern iRCCE_AIR iRCCE_atomic_inc_regs[]; +extern int iRCCE_atomic_alloc_counter; +extern iRCCE_AIR* iRCCE_atomic_barrier[2]; +#endif + +extern iRCCE_SEND_REQUEST* iRCCE_isend_queue; +extern iRCCE_RECV_REQUEST* iRCCE_irecv_queue[RCCE_MAXNP]; +extern iRCCE_RECV_REQUEST* iRCCE_irecv_any_source_queue; +extern int iRCCE_recent_source; +extern int iRCCE_recent_length; + +#ifdef _OPENMP +#pragma omp threadprivate (iRCCE_isend_queue, iRCCE_irecv_queue, iRCCE_irecv_any_source_queue, iRCCE_recent_source, iRCCE_recent_length) +#endif + +int iRCCE_test_flag(RCCE_FLAG, RCCE_FLAG_STATUS, int *); +int iRCCE_push_ssend_request(iRCCE_SEND_REQUEST *request); +int iRCCE_push_srecv_request(iRCCE_RECV_REQUEST *request); + +#endif diff --git a/hermit/usr/ircce/iRCCE_mcast.c b/hermit/usr/ircce/iRCCE_mcast.c new file mode 100644 index 000000000..5be496f93 --- /dev/null +++ b/hermit/usr/ircce/iRCCE_mcast.c @@ -0,0 +1,289 @@ +//*************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-11-26] added xxx +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// + +#include "iRCCE_lib.h" +#include +#include + +#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__) +#include "scc_memcpy.h" +#else +#define memcpy_scc memcpy +#endif + +static int iRCCE_msend_general( + char *privbuf, // source buffer in local private memory (send buffer) + t_vcharp combuf, // intermediate buffer in MPB + size_t chunk, // size of MPB available for this message (bytes) + RCCE_FLAG *sent, // flag indicating whether message has been sent by source + ssize_t size // size of message (bytes) + ) { + + char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size + size_t wsize, // offset within send buffer when putting in "chunk" bytes + remainder, // bytes remaining to be sent + nbytes; // number of bytes to be sent in single iRCCE_put call + char *bufptr; // running pointer inside privbuf for current location + size_t subchunk1, subchunk2; // sub-chunks for the pipelined message transfer + int ue; + +#ifndef _iRCCE_ANY_LENGTH_ +#define FLAG_SET_VALUE RCCE_FLAG_SET +#else + RCCE_FLAG_STATUS FLAG_SET_VALUE = (RCCE_FLAG_STATUS)size; +#endif + // send data in units of available chunk size of comm buffer + for (wsize=0; wsize< (size/chunk)*chunk; wsize+=chunk) { + + bufptr = privbuf + wsize; + nbytes = chunk; + + // copy private data to own comm buffer + RCCE_put(combuf, (t_vcharp) bufptr, nbytes, RCCE_IAM); + + for(ue=0; ue= RCCE_NP) + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID)); + else { + return(iRCCE_mrecv_general(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_sent_flag[source], size, source)); + } +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_mcast +//-------------------------------------------------------------------------------------- +// multicast based on msend() and mrecv() +//-------------------------------------------------------------------------------------- +int iRCCE_mcast(char *buf, size_t size, int root) +{ + if(RCCE_IAM != root) { + return iRCCE_mrecv(buf, size, root); + } else { + return iRCCE_msend(buf, size); + } +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_bcast +//-------------------------------------------------------------------------------------- +// wrapper function for using iRCCE's multicast feature +//-------------------------------------------------------------------------------------- +int iRCCE_bcast(char *buf, size_t size, int root, RCCE_COMM comm) +{ + if(memcmp(&comm, &RCCE_COMM_WORLD, sizeof(RCCE_COMM)) == 0) { + return RCCE_bcast(buf, size, root, comm); + } else { + return iRCCE_mcast(buf, size, root); + } +} diff --git a/hermit/usr/ircce/iRCCE_put.c b/hermit/usr/ircce/iRCCE_put.c new file mode 100644 index 000000000..8d2fff0da --- /dev/null +++ b/hermit/usr/ircce/iRCCE_put.c @@ -0,0 +1,82 @@ +//*************************************************************************************** +// Put data into communication buffer. +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-11-03] switched to SCC-optimized memcpy() functions in scc_memcpy.h: +// - memcpy_to_mpb() +// - memcpy_from_mpb() +// by Stefan Lankes, Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +#include "iRCCE_lib.h" + +#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__) +#include "scc_memcpy.h" +#else +#define memcpy_to_mpb memcpy +#endif + +void* iRCCE_memcpy_put(void *dest, const void *src, size_t count) +{ +#if defined COPPERRIDGE || defined SCC + return memcpy_to_mpb(dest, src, count); +#else + return memcpy(dest, src, count); +#endif +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_put +//-------------------------------------------------------------------------------------- +// copy data from address "source" in the local MPB or the calling UE's private memory +// to address "target" in the remote MPB. We do not test to see if a move from the +// calling UE's private memory stays within allocated memory +//-------------------------------------------------------------------------------------- +int iRCCE_put( + t_vcharp target, // target buffer, MPB + t_vcharp source, // source buffer, MPB or private memory + int num_bytes, + int ID + ) { + + // in non-GORY mode we only need to retain the MPB target shift; we + // already know the target is in the MPB, not private memory + target = RCCE_comm_buffer[ID]+(target-RCCE_comm_buffer[RCCE_IAM]); + + // make sure that any data that has been put in our MPB by another UE is visible +#ifdef _OPENMP + #pragma omp flush +#endif + + // do the actual copy + RC_cache_invalidate(); + + iRCCE_memcpy_put((void *)target, (void *)source, num_bytes); + + // flush data to make it visible to all threads; cannot use flush list because it + // concerns malloced space +#ifdef _OPENMP + #pragma omp flush +#endif + return(iRCCE_SUCCESS); +} diff --git a/hermit/usr/ircce/iRCCE_srecv.c b/hermit/usr/ircce/iRCCE_srecv.c new file mode 100644 index 000000000..421cfe2eb --- /dev/null +++ b/hermit/usr/ircce/iRCCE_srecv.c @@ -0,0 +1,497 @@ +//*************************************************************************************** +// Non-blocking receive routines. +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-11-26] added a _pipelined_ version of blocking send/recv +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +// [2011-04-19] added wildcard mechanism (iRCCE_ANY_SOURCE) for receiving +// a message from an arbitrary remote rank +// by Simon Pickartz, Chair for Operating Systems, +// RWTH Aachen University +// +// [2011-05-31] added iRCCE_ANY_LENGTH wildcard mechanism +// by Carsten Clauss +// +// [2011-06-27] merged iRCCE_ANY_SOURCE branch with trunk (iRCCE_ANY_LENGTH) +// +// [2011-08-02] added iRCCE_iprobe() function for probing for incomming messages +// +// [2011-11-03] added internal push function for non-blocking synchronous send +// iRCCE_push_srecv_request() (called by iRCCE_push_recv_request) +// + +#include "iRCCE_lib.h" +#include +#include + +#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__) +#include "scc_memcpy.h" +#else +#define memcpy_scc memcpy +#endif + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_srecv_general +//-------------------------------------------------------------------------------------- +// pipelined receive function +//-------------------------------------------------------------------------------------- +static int iRCCE_srecv_general( + char *privbuf, // destination buffer in local private memory (receive buffer) + t_vcharp combuf, // intermediate buffer in MPB + size_t chunk, // size of MPB available for this message (bytes) + RCCE_FLAG *ready, // flag indicating whether receiver is ready + RCCE_FLAG *sent, // flag indicating whether message has been sent by source + ssize_t size, // size of message (bytes) + int source, // UE that sent the message + int *test // if 1 upon entry, do nonblocking receive; if message available + // set to 1, otherwise to 0 + ) { + + char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size + size_t wsize, // offset within receive buffer when pulling in "chunk" bytes + remainder, // bytes remaining to be received + nbytes; // number of bytes to be received in single iRCCE_get call + int first_test; // only use first chunk to determine if message has been received yet + char *bufptr; // running pointer inside privbuf for current location + size_t subchunk1, subchunk2; // sub-chunks for the pipelined message transfer + +#ifndef _iRCCE_ANY_LENGTH_ +#define FLAG_SET_VALUE RCCE_FLAG_SET +#else + RCCE_FLAG_STATUS FLAG_SET_VALUE; + + while (1) { + RCCE_flag_read(*sent, &size, RCCE_IAM); + if(size!=0) break; + } + FLAG_SET_VALUE = (RCCE_FLAG_STATUS)size; +#endif + + if(iRCCE_recent_source != source) iRCCE_recent_source = source; + if(iRCCE_recent_length != size) iRCCE_recent_length = size; + + first_test = 1; + + for (wsize=0; wsize < (size/chunk)*chunk; wsize+=chunk) { + + if (*test && first_test) { + first_test = 0; + iRCCE_test_flag(*sent, RCCE_FLAG_SET, test); + if (!(*test)) return(iRCCE_PENDING); + } + + if(wsize == 0) { + // allign sub-chunks to cache line granularity: + subchunk1 = ( (chunk / 2) / RCCE_LINE_SIZE ) * RCCE_LINE_SIZE; + subchunk2 = chunk - subchunk1; + } + + bufptr = privbuf + wsize; + nbytes = subchunk1; + + RCCE_wait_until(*sent, FLAG_SET_VALUE); + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + RCCE_flag_write(ready, RCCE_FLAG_SET, source); + iRCCE_get((t_vcharp)bufptr, combuf, nbytes, source); + + bufptr = privbuf + wsize + subchunk1; + nbytes = subchunk2; + + RCCE_wait_until(*sent, FLAG_SET_VALUE); + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + RCCE_flag_write(ready, RCCE_FLAG_SET, source); + iRCCE_get((t_vcharp)bufptr, combuf + subchunk1, nbytes, source); + } + + remainder = size%chunk; + // if nothing is left over, we are done + if (!remainder) return(iRCCE_SUCCESS); + + // receive remainder of data--whole cache lines + bufptr = privbuf + (size/chunk)*chunk; + nbytes = remainder - remainder % RCCE_LINE_SIZE; + if (nbytes) { + // if function is called in test mode, check if first chunk has been sent already. + // If so, proceed as usual. If not, exit immediately + if (*test && first_test) { + first_test = 0; + iRCCE_test_flag(*sent, RCCE_FLAG_SET, test); + if (!(*test)) return(iRCCE_PENDING); + } + + RCCE_wait_until(*sent, FLAG_SET_VALUE); + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + // copy data from local MPB space to private memory + iRCCE_get((t_vcharp)bufptr, combuf, nbytes, source); + + // tell the source I have moved data out of its comm buffer + RCCE_flag_write(ready, RCCE_FLAG_SET, source); + } + + remainder = remainder % RCCE_LINE_SIZE; + if (!remainder) return(iRCCE_SUCCESS); + + // remainder is less than cache line. This must be copied into appropriately sized + // intermediate space before exact number of bytes get copied to the final destination + bufptr = privbuf + (size/chunk)*chunk + nbytes; + nbytes = RCCE_LINE_SIZE; + + // if function is called in test mode, check if first chunk has been sent already. + // If so, proceed as usual. If not, exit immediately + if (*test && first_test) { + first_test = 0; + iRCCE_test_flag(*sent, RCCE_FLAG_SET, test); + if (!(*test)) return(iRCCE_PENDING); + } + + RCCE_wait_until(*sent, FLAG_SET_VALUE); + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + + // copy data from local MPB space to private memory + iRCCE_get((t_vcharp)padline, combuf, nbytes, source); + memcpy_scc(bufptr, padline, remainder); + + // tell the source I have moved data out of its comm buffer + RCCE_flag_write(ready, RCCE_FLAG_SET, source); + + return(iRCCE_SUCCESS); +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_srecv +//-------------------------------------------------------------------------------------- +// pipelined recv function (blocking!) +//-------------------------------------------------------------------------------------- +int iRCCE_srecv(char *privbuf, ssize_t size, int source) { + + int ignore = 0; + + if(size < 0) { +#ifdef _iRCCE_ANY_LENGTH_ + if (size != iRCCE_ANY_LENGTH) +#endif + { + return(iRCCE_SUCCESS); + } + } + + if(size == 0) { + // just synchronize: + size = 1; + privbuf = (char*)&size; + } + + // determine source of request if given source = iRCCE_ANY_SOURCE + if (source == iRCCE_ANY_SOURCE) { + + // wait for completion of _all_ pending non-blocking requests: + iRCCE_irecv_wait(NULL); + + int i, res; + for( i=0;;i=(i+1)%RCCE_NP ){ + iRCCE_test_flag(RCCE_sent_flag[i], RCCE_FLAG_SET, &res); + if ( (i != RCCE_IAM) && (res) ) { + source = i; + break; + } + } + } + + // wait for completion of pending (ans source-related) non-blocking requests: + while(iRCCE_irecv_queue[source] != NULL) { + iRCCE_irecv_push(); + iRCCE_isend_push(); + } + +#if !defined(SINGLEBITFLAGS) && !defined(RCCE_VERSION) + if(size <= iRCCE_MAX_TAGGED_LEN) { +#ifndef _iRCCE_ANY_LENGTH_ +#define FLAG_SET_VALUE RCCE_FLAG_SET +#else + RCCE_FLAG_STATUS FLAG_SET_VALUE; + + if(size == iRCCE_ANY_LENGTH) { + while (1) { + RCCE_flag_read(RCCE_sent_flag[source], &size, RCCE_IAM); + if(size!=0) break; + } + } + FLAG_SET_VALUE = (RCCE_FLAG_STATUS)size; +#endif + if(size <= iRCCE_MAX_TAGGED_LEN) { + // just wait and then read the tagged flag with payload: + iRCCE_wait_tagged(RCCE_sent_flag[source], FLAG_SET_VALUE, privbuf, size); + + RCCE_flag_write(&RCCE_sent_flag[source], RCCE_FLAG_UNSET, RCCE_IAM); + RCCE_flag_write(&RCCE_ready_flag[RCCE_IAM], RCCE_FLAG_SET, source); + + return(RCCE_SUCCESS); + } + } +#endif + + if (source<0 || source >= RCCE_NP) + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID)); + else { + return(iRCCE_srecv_general(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source], + size, source, &ignore)); + } +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_probe +//-------------------------------------------------------------------------------------- +// probe for incomming messages (blocking / does not receive) +//-------------------------------------------------------------------------------------- +int iRCCE_probe(int source, int* test_rank) +{ + // determine source of request if given source = iRCCE_ANY_SOURCE + if (source == iRCCE_ANY_SOURCE) { + + // wait for completion of _all_ pending non-blocking requests: + iRCCE_irecv_wait(NULL); + + int i, res; + for( i=0;;i=(i+1)%RCCE_NP ){ + iRCCE_test_flag(RCCE_sent_flag[i], RCCE_FLAG_SET, &res); + if ( (i != RCCE_IAM) && (res) ) { + source = i; + break; + } + } + } + else { + int res; + do { + iRCCE_test_flag(RCCE_sent_flag[source], RCCE_FLAG_SET, &res); + } + while(!res); + } + + if (test_rank != NULL) { + (*test_rank) = source; + } + +#ifdef _iRCCE_ANY_LENGTH_ + { + int size; + RCCE_flag_read(RCCE_sent_flag[source], &size, RCCE_IAM); + if(iRCCE_recent_length != size) iRCCE_recent_length = size; + } +#endif + if(iRCCE_recent_source != source) iRCCE_recent_source = source; + + return iRCCE_SUCCESS; +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_recv +//-------------------------------------------------------------------------------------- +// pipelined recv function (non-blocking / analogous to RCCE_recv_test fuction) +//-------------------------------------------------------------------------------------- +int iRCCE_srecv_test(char *privbuf, ssize_t size, int source, int *test) { + + if(test == NULL) return iRCCE_recv(privbuf, size, source); + + if(size <= 0) { +#ifdef _iRCCE_ANY_LENGTH_ + if(size != iRCCE_ANY_LENGTH) +#endif + { + (*test) = 1; + return(iRCCE_SUCCESS); + } + } + + // determine source of request if given source = iRCCE_ANY_SOURCE + if (source == iRCCE_ANY_SOURCE) { + + // check whether there are still pending non-blocking receive requests: + if(iRCCE_irecv_push() != iRCCE_SUCCESS) { + (*test) = 0; + return(iRCCE_PENDING); + } + + int i, res; + for( i=0; i= RCCE_NP) + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID)); + else { + (*test) = 1; + return(iRCCE_srecv_general(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source], + size, source, test)); + } +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_push_srecv_request +//-------------------------------------------------------------------------------------- +// pipelined push for recv function (non-blocking and stricly synchronous!) +//-------------------------------------------------------------------------------------- +int iRCCE_push_srecv_request(iRCCE_RECV_REQUEST *request) { + + char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size + int test; // flag for calling iRCCE_test_flag() + + if(request->finished) return(iRCCE_SUCCESS); + + if(request->label == 1) goto label1; + if(request->label == 2) goto label2; + if(request->label == 3) goto label3; + if(request->label == 4) goto label4; + +#ifdef _iRCCE_ANY_LENGTH_ + RCCE_flag_read(*(request->sent), &(request->flag_set_value), RCCE_IAM); + if(request->flag_set_value == 0) { + return(iRCCE_PENDING); + } + request->size = (size_t)request->flag_set_value; +#endif + + // receive data in units of available chunk size of MPB + for (; request->wsize < (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) { + + request->bufptr = request->privbuf + request->wsize; + request->nbytes = request->subchunk1; +label1: + iRCCE_test_flag(*(request->sent), request->flag_set_value, &test); + if(!test) { + request->label = 1; + return(iRCCE_PENDING); + } + request->started = 1; + + RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM); + RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source); + iRCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, request->source); + + request->bufptr = request->privbuf + request->wsize + request->subchunk1; + request->nbytes = request->subchunk2; + +label2: + iRCCE_test_flag(*(request->sent), request->flag_set_value, &test); + if(!test) { + request->label = 2; + return(iRCCE_PENDING); + } + + RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM); + RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source); + iRCCE_get((t_vcharp)request->bufptr, request->combuf + request->subchunk1, request->nbytes, request->source); + } + + request->remainder = request->size % request->chunk; + // if nothing is left over, we are done + if (!request->remainder) { + if(iRCCE_recent_source != request->source) iRCCE_recent_source = request->source; + if(iRCCE_recent_length != request->size) iRCCE_recent_length = request->size; + request->finished = 1; + return(iRCCE_SUCCESS); + } + + // receive remainder of data--whole cache lines + request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk; + request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE; + if (request->nbytes) { +label3: + iRCCE_test_flag(*(request->sent), request->flag_set_value, &test); + if(!test) { + request->label = 3; + return(iRCCE_PENDING); + } + request->started = 1; + + RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM); + // copy data from source's MPB space to private memory + iRCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, request->source); + + // tell the source I have moved data out of its comm buffer + RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source); + } + + request->remainder = request->size % request->chunk; + request->remainder = request->remainder % RCCE_LINE_SIZE; + if (!request->remainder) { + if(iRCCE_recent_source != request->source) iRCCE_recent_source = request->source; + if(iRCCE_recent_length != request->size) iRCCE_recent_length = request->size; + request->finished = 1; + return(iRCCE_SUCCESS); + } + + // remainder is less than cache line. This must be copied into appropriately sized + // intermediate space before exact number of bytes get copied to the final destination + request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes; + request->nbytes = RCCE_LINE_SIZE; +label4: + iRCCE_test_flag(*(request->sent), request->flag_set_value, &test); + if(!test) { + request->label = 4; + return(iRCCE_PENDING); + } + request->started = 1; + + RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM); + // copy data from source's MPB space to private memory + iRCCE_get((t_vcharp)padline, request->combuf, request->nbytes, request->source); + memcpy_scc(request->bufptr,padline,request->remainder); + + // tell the source I have moved data out of its comm buffer + RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source); + + if(iRCCE_recent_source != request->source) iRCCE_recent_source = request->source; + if(iRCCE_recent_length != request->size) iRCCE_recent_length = request->size; + request->finished = 1; + return(iRCCE_SUCCESS); +} + diff --git a/hermit/usr/ircce/iRCCE_ssend.c b/hermit/usr/ircce/iRCCE_ssend.c new file mode 100644 index 000000000..7593de9bb --- /dev/null +++ b/hermit/usr/ircce/iRCCE_ssend.c @@ -0,0 +1,282 @@ +//*************************************************************************************** +// Synchronized receive routines. +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-11-26] added a _pipelined_ version of blocking send/recv +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +// [2011-05-31] added iRCCE_ANY_LENGTH wildcard mechanism +// by Carsten Clauss +// +// [2011-11-03] added internal push function for non-blocking synchronous send +// iRCCE_push_ssend_request() (called by iRCCE_push_send_request) +// + +#include "iRCCE_lib.h" +#include +#include + +#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__) +#include "scc_memcpy.h" +#else +#define memcpy_scc memcpy +#endif + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_ssend_general +//-------------------------------------------------------------------------------------- +// pipelined send function +//-------------------------------------------------------------------------------------- +static int iRCCE_ssend_general( + char *privbuf, // source buffer in local private memory (send buffer) + t_vcharp combuf, // intermediate buffer in MPB + size_t chunk, // size of MPB available for this message (bytes) + RCCE_FLAG *ready, // flag indicating whether receiver is ready + RCCE_FLAG *sent, // flag indicating whether message has been sent by source + ssize_t size, // size of message (bytes) + int dest // UE that will receive the message + ) { + + char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size + size_t wsize, // offset within send buffer when putting in "chunk" bytes + remainder, // bytes remaining to be sent + nbytes; // number of bytes to be sent in single iRCCE_put call + char *bufptr; // running pointer inside privbuf for current location + size_t subchunk1, subchunk2; // sub-chunks for the pipelined message transfer + +#ifndef _iRCCE_ANY_LENGTH_ +#define FLAG_SET_VALUE RCCE_FLAG_SET +#else + RCCE_FLAG_STATUS FLAG_SET_VALUE = (RCCE_FLAG_STATUS)size; +#endif + + for (wsize = 0; wsize < (size/chunk)*chunk; wsize+=chunk) { + + if(wsize == 0) { + // allign sub-chunks to cache line granularity: + subchunk1 = ( (chunk / 2) / RCCE_LINE_SIZE ) * RCCE_LINE_SIZE; + subchunk2 = chunk - subchunk1; + } + + bufptr = privbuf + wsize; + nbytes = subchunk1; + + iRCCE_put(combuf, (t_vcharp) bufptr, nbytes, RCCE_IAM); + + RCCE_flag_write(sent, FLAG_SET_VALUE, dest); + + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); + + bufptr = privbuf + wsize + subchunk1; + nbytes = subchunk2; + + iRCCE_put(combuf + subchunk1, (t_vcharp) bufptr, nbytes, RCCE_IAM); + + RCCE_flag_write(sent, FLAG_SET_VALUE, dest); + + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); + } + + remainder = size%chunk; + // if nothing is left over, we are done + if (!remainder) return(iRCCE_SUCCESS); + + // send remainder of data--whole cache lines + bufptr = privbuf + (size/chunk)*chunk; + nbytes = remainder - remainder%RCCE_LINE_SIZE; + if (nbytes) { + // copy private data to own comm buffer + iRCCE_put(combuf, (t_vcharp)bufptr, nbytes, RCCE_IAM); + RCCE_flag_write(sent, FLAG_SET_VALUE, dest); + // wait for the destination to be ready to receive a message + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); + } + + remainder = remainder%RCCE_LINE_SIZE; + if (!remainder) return(iRCCE_SUCCESS); + + // remainder is less than a cache line. This must be copied into appropriately sized + // intermediate space before it can be sent to the receiver + bufptr = privbuf + (size/chunk)*chunk + nbytes; + nbytes = RCCE_LINE_SIZE; + + // copy private data to own comm buffer + memcpy_scc(padline, bufptr, remainder); + iRCCE_put(combuf, (t_vcharp)padline, nbytes, RCCE_IAM); + RCCE_flag_write(sent, FLAG_SET_VALUE, dest); + + // wait for the destination to be ready to receive a message + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); + + return(iRCCE_SUCCESS); +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_ssend +//-------------------------------------------------------------------------------------- +// pipelined send function (blocking and synchronous!) +//-------------------------------------------------------------------------------------- +int iRCCE_ssend(char *privbuf, ssize_t size, int dest) { + + if(size < 0) return(iRCCE_SUCCESS); + + if(size == 0) { + // just synchronize: + size = 1; + privbuf = (char*)&size; + } + + while(iRCCE_isend_queue != NULL) { + + // wait for completion of pending non-blocking requests + iRCCE_isend_push(); + iRCCE_irecv_push(); + } + +#if !defined(SINGLEBITFLAGS) && !defined(RCCE_VERSION) + if(size <= iRCCE_MAX_TAGGED_LEN) { + // just write the tagged 'sent' flag (with payload) and wait for 'ready' flag: + iRCCE_flag_write_tagged(&RCCE_sent_flag[RCCE_IAM], (RCCE_FLAG_STATUS)size, dest, privbuf, size); + + RCCE_wait_until(RCCE_ready_flag[dest], RCCE_FLAG_SET); + RCCE_flag_write(&RCCE_ready_flag[dest], RCCE_FLAG_UNSET, RCCE_IAM); + + return(RCCE_SUCCESS); + } +#endif + + if (dest<0 || dest >= RCCE_NP) + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID)); + else + return(iRCCE_ssend_general(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM], + size, dest)); +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_push_ssend_request +//-------------------------------------------------------------------------------------- +// pipelined push for send function (non-blocking and stricly synchronous!) +//-------------------------------------------------------------------------------------- +int iRCCE_push_ssend_request(iRCCE_SEND_REQUEST *request) { + + char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size + int test; // flag for calling iRCCE_test_flag() + + if(request->finished) return(iRCCE_SUCCESS); + + if(request->label == 1) goto label1; + if(request->label == 2) goto label2; + if(request->label == 3) goto label3; + if(request->label == 4) goto label4; + + // send data in units of available chunk size of comm buffer + for (request->wsize = 0; request->wsize < (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) { + + request->bufptr = request->privbuf + request->wsize; + request->nbytes = request->subchunk1; + + iRCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, RCCE_IAM); + RCCE_flag_write(request->sent, request->flag_set_value, request->dest); +label1: + iRCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 1; + return(iRCCE_PENDING); + } + RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); + + request->bufptr = request->privbuf + request->wsize + request->subchunk1; + request->nbytes = request->subchunk2; + + iRCCE_put(request->combuf + request->subchunk1, (t_vcharp) request->bufptr, request->nbytes, RCCE_IAM); + RCCE_flag_write(request->sent, request->flag_set_value, request->dest); +label2: + iRCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 2; + return(iRCCE_PENDING); + } + RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); + } + + request->remainder = request->size % request->chunk; + // if nothing is left over, we are done + if (!request->remainder) { + request->finished = 1; + return(iRCCE_SUCCESS); + } + + // send remainder of data--whole cache lines + request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk; + request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE; + if (request->nbytes) { + // copy private data to own comm buffer + iRCCE_put(request->combuf, (t_vcharp)request->bufptr, request->nbytes, RCCE_IAM); + RCCE_flag_write(request->sent, request->flag_set_value, request->dest); + // wait for the destination to be ready to receive a message +label3: + iRCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 3; + return(iRCCE_PENDING); + } + RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); + } + + request->remainder = request->size % request->chunk; + request->remainder = request->remainder%RCCE_LINE_SIZE; + // if nothing is left over, we are done + if (!request->remainder) + { + request->finished = 1; + return(iRCCE_SUCCESS); + } + + // remainder is less than a cache line. This must be copied into appropriately sized + // intermediate space before it can be sent to the receiver + request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes; + request->nbytes = RCCE_LINE_SIZE; + // copy private data to own comm buffer + memcpy(padline,request->bufptr,request->remainder); + iRCCE_put(request->combuf, (t_vcharp)padline, request->nbytes, RCCE_IAM); + RCCE_flag_write(request->sent, request->flag_set_value, request->dest); + // wait for the destination to be ready to receive a message +label4: + iRCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 4; + return(iRCCE_PENDING); + } + RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); + + request->finished = 1; + return(iRCCE_SUCCESS); +} diff --git a/hermit/usr/ircce/iRCCE_synch.c b/hermit/usr/ircce/iRCCE_synch.c new file mode 100644 index 000000000..3f8e55fe3 --- /dev/null +++ b/hermit/usr/ircce/iRCCE_synch.c @@ -0,0 +1,279 @@ +///************************************************************************************* +// Synchronization functions. +// Single-bit and whole-cache-line flags are sufficiently different that we provide +// separate implementations of the synchronization routines for each case +//************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-10-25] added support for non-blocking send/recv operations +// - iRCCE_isend(), ..._test(), ..._wait(), ..._push() +// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push() +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +// [2010-11-12] extracted non-blocking code into separate library +// by Carsten Scholtes +// +// [2011-01-21] updated the datatype of RCCE_FLAG according to the +// recent version of RCCE +// +// [2011-04-12] added marco test for rcce version +// +// [2012-11-06] add barrier implementation as described in: +// USENIX HotPar'12 Eval. Hardw. Synch. Supp. SCC +// by Pablo Reble +// +#include "iRCCE_lib.h" + +#ifdef SINGLEBITFLAGS +#warning iRCCE_TAGGED_FLAGS: for using this feature, SINGLEBITFLAGS must be disabled! (make SINGLEBITFLAGS=0) +#endif + +#ifdef SINGLEBITFLAGS + +int iRCCE_test_flag(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result) { + + t_vcharp cflag; + +#ifdef RCCE_VERSION + // this is a newer version than V1.0.13 + t_vcharp flaga; +#endif + + cflag = flag.line_address; + +#ifdef RCCE_VERSION + // this is a newer version than V1.0.13 + flaga = flag.flag_addr; +#endif + + // always flush/invalidate to ensure we read the most recent value of *flag + // keep reading it until it has the required value + +#ifdef _OPENMP +#pragma omp flush +#endif + RC_cache_invalidate(); + +#ifdef RCCE_VERSION + // this is a newer version than V1.0.13 + if(RCCE_bit_value(flaga, (flag.location)%RCCE_FLAGS_PER_BYTE) != val) { +#else + if(RCCE_bit_value(cflag, flag.location) != val) { +#endif + (*result) = 0; + } + else { + (*result) = 1; + } + + return(iRCCE_SUCCESS); +} + +#else + +////////////////////////////////////////////////////////////////// +// LOCKLESS SYNCHRONIZATION USING ONE WHOLE CACHE LINE PER FLAG // +////////////////////////////////////////////////////////////////// + +int iRCCE_test_flag(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result) { + +#ifndef RCCE_VERSION + RCCE_FLAG flag_pos = flag; +#endif + +#ifdef _OPENMP +#pragma omp flush +#endif + + RC_cache_invalidate(); + +#ifdef RCCE_VERSION + if((RCCE_FLAG_STATUS)(*flag.flag_addr) != val) { +#else + if((*flag_pos) != val) { +#endif + (*result) = 0; + } + else { + (*result) = 1; + } + + return(iRCCE_SUCCESS); +} + + +////////////////////////////////////////////////////////////////////////// +// FUNCTIONS FOR HANDLING TAGGED FLAGS (NEED WHOLE CACHE LINE PER FLAG) // +////////////////////////////////////////////////////////////////////////// + +int iRCCE_flag_alloc_tagged(RCCE_FLAG *flag) +{ +#ifdef RCCE_VERSION + // this is a newer version than V1.0.13 + flag->flag_addr = RCCE_malloc(RCCE_LINE_SIZE); + if (!(flag->flag_addr)) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED)); + return(RCCE_SUCCESS); +#else + return RCCE_flag_alloc(flag); +#endif +} + +int iRCCE_flag_write_tagged(RCCE_FLAG *flag, RCCE_FLAG_STATUS val, int ID, void *tag, int len) { + + unsigned char val_array[RCCE_LINE_SIZE] = + // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + + int error, i, j; + + *(int *) val_array = val; +#ifdef _OPENMP + *(int *) &val_array[RCCE_LINE_SIZE-sizeof(int)] = val; +#endif + + if(tag) + { + if(len > iRCCE_MAX_TAGGED_LEN) len = iRCCE_MAX_TAGGED_LEN; + iRCCE_memcpy_put(&val_array[sizeof(int)], tag, len); + } + +#ifdef RCCE_VERSION + error = iRCCE_put(flag->flag_addr, val_array, RCCE_LINE_SIZE, ID); +#else + error = iRCCE_put((t_vcharp)(*flag), val_array, RCCE_LINE_SIZE, ID); +#endif + + return(RCCE_error_return(RCCE_debug_synch,error)); +} + +int iRCCE_flag_read_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS *val, int ID, void *tag, int len) { + + unsigned char val_array[RCCE_LINE_SIZE]; + int error, i, j; + +#ifdef RCCE_VERSION + if(error=iRCCE_get(val_array, flag.flag_addr, RCCE_LINE_SIZE, ID)) + return(RCCE_error_return(RCCE_debug_synch,error)); +#else + if(error=iRCCE_get(val_array, (t_vcharp)flag, RCCE_LINE_SIZE, ID)) + return(RCCE_error_return(RCCE_debug_synch,error)); +#endif + + if(val) *val = *(int *)val_array; + +#ifdef _OPENMP + if(val) *val = *(int *)&val_array[RCCE_LINE_SIZE-sizeof(int)]; +#endif + + if( (val) && (*val) && (tag) ) { + if(len > iRCCE_MAX_TAGGED_LEN) len = iRCCE_MAX_TAGGED_LEN; + iRCCE_memcpy_put(tag, &val_array[sizeof(int)], len); + } + + return(RCCE_SUCCESS); +} + +int iRCCE_wait_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS val, void *tag, int len) { + + int i, j; + +#ifndef RCCE_VERSION + RCCE_FLAG flag_pos = flag; +#ifdef _OPENMP + flag_pos = flag + RCCE_LINE_SIZE / sizeof(int) - 1; +#endif +#endif + + do { +#ifdef _OPENMP +#pragma omp flush +#endif + RC_cache_invalidate(); +#ifdef RCCE_VERSION + // this is a newer version than V1.0.13 +#ifdef _OPENMP + } while ((RCCE_FLAG_STATUS)(*( ((int*)flag.flag_addr) + RCCE_LINE_SIZE / sizeof(int) - 1)) != val); +#else + } while ((RCCE_FLAG_STATUS)(*flag.flag_addr) != val); +#endif +#else + } while ((*flag_pos) != val); +#endif + + if(tag) { + if(len > iRCCE_MAX_TAGGED_LEN) len = iRCCE_MAX_TAGGED_LEN; +#ifdef RCCE_VERSION + iRCCE_memcpy_put(tag, &((char*)flag.flag_addr)[sizeof(int)], len); +#else + iRCCE_memcpy_put(tag, &((char*)flag)[sizeof(int)], len); +#endif + } + + return(RCCE_SUCCESS); +} + +int iRCCE_test_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result, void *tag, int len) { + + int i, j; + +#ifndef RCCE_VERSION + RCCE_FLAG flag_pos = flag; +#ifdef _OPENMP + flag_pos = flag + RCCE_LINE_SIZE / sizeof(int) - 1; +#endif +#endif + +#ifdef _OPENMP +#pragma omp flush +#endif + + RC_cache_invalidate(); + +#ifdef RCCE_VERSION + if((RCCE_FLAG_STATUS)(*flag.flag_addr) != val) { +#else + if((*flag_pos) != val) { +#endif + (*result) = 0; + } + else { + (*result) = 1; + } + + if((*result) && tag) { + if(len > iRCCE_MAX_TAGGED_LEN) len = iRCCE_MAX_TAGGED_LEN; +#ifdef RCCE_VERSION + iRCCE_memcpy_put(tag, &((char*)flag.flag_addr)[sizeof(int)], len); +#else + iRCCE_memcpy_put(tag, &((char*)flag)[sizeof(int)], len); +#endif + } + + return(RCCE_SUCCESS); +} + +int iRCCE_get_max_tagged_len(void) +{ + return iRCCE_MAX_TAGGED_LEN; +} +#endif diff --git a/hermit/usr/ircce/iRCCE_waitlist.c b/hermit/usr/ircce/iRCCE_waitlist.c new file mode 100644 index 000000000..4c48e8131 --- /dev/null +++ b/hermit/usr/ircce/iRCCE_waitlist.c @@ -0,0 +1,324 @@ +/**************************************************************************************** + * Functions for a convenient handling of multiple outstanding non-blocking requests + **************************************************************************************** + * + * Authors: Jacek Galowicz, Carsten Clauss + * Chair for Operating Systems, RWTH Aachen University + * Date: 2010-12-09 + * + **************************************************************************************** + * + * Copyright 2010 Jacek Galowicz, Chair for Operating Systems, + * RWTH Aachen University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "iRCCE_lib.h" + +void iRCCE_init_wait_list(iRCCE_WAIT_LIST *list) +{ + list->first = NULL; + list->last = NULL; +} + +static void iRCCE_add_wait_list_generic(iRCCE_WAIT_LIST *list, iRCCE_WAIT_LISTELEM * elem) +{ + if (list->first == NULL) { + list->first = elem; + list->last = elem; + return; + } + + list->last->next = elem; + list->last = elem; +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_add_recv_to_wait_list +//-------------------------------------------------------------------------------------- +// Function for adding Send requests to the waitall-queue +//-------------------------------------------------------------------------------------- +void iRCCE_add_send_to_wait_list(iRCCE_WAIT_LIST *list, iRCCE_SEND_REQUEST * req) +{ + iRCCE_WAIT_LISTELEM *elem; + elem = (iRCCE_WAIT_LISTELEM*)malloc(sizeof(iRCCE_WAIT_LISTELEM)); + + elem->type = iRCCE_WAIT_LIST_SEND_TYPE; + elem->next = NULL; + elem->req = (void*)req; + iRCCE_add_wait_list_generic(list, elem); + + return; +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_add_send_to_wait_list +//-------------------------------------------------------------------------------------- +// Function for adding Recv requests to the waitall-queue +//-------------------------------------------------------------------------------------- +void iRCCE_add_recv_to_wait_list(iRCCE_WAIT_LIST *list, iRCCE_RECV_REQUEST * req) +{ + iRCCE_WAIT_LISTELEM *elem; + elem = (iRCCE_WAIT_LISTELEM*)malloc(sizeof(iRCCE_WAIT_LISTELEM)); + + elem->type = iRCCE_WAIT_LIST_RECV_TYPE; + elem->next = NULL; + elem->req = (void*)req; + iRCCE_add_wait_list_generic(list, elem); + + return; +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_add_to_wait_list +//-------------------------------------------------------------------------------------- +// Function for adding Send and/or Recv requests to the waitall-queue +//-------------------------------------------------------------------------------------- +void iRCCE_add_to_wait_list(iRCCE_WAIT_LIST *list, iRCCE_SEND_REQUEST * send_req, iRCCE_RECV_REQUEST * recv_req) +{ + if (send_req != NULL) iRCCE_add_send_to_wait_list(list, send_req); + if (recv_req != NULL) iRCCE_add_recv_to_wait_list(list, recv_req); + + return; +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_wait_all +//-------------------------------------------------------------------------------------- +// Blocking wait for completion of all enqueued send and recv calls +//-------------------------------------------------------------------------------------- +int iRCCE_wait_all(iRCCE_WAIT_LIST *list) +{ + while(iRCCE_test_all(list, NULL) != iRCCE_SUCCESS) ; + + return iRCCE_SUCCESS; +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_test_all +//-------------------------------------------------------------------------------------- +// Nonblocking test for completion of all enqueued send and recv calls +// Just provide NULL instead of testvar if you don't need it +//-------------------------------------------------------------------------------------- +int iRCCE_test_all(iRCCE_WAIT_LIST *list, int *test) +{ + int retval = iRCCE_SUCCESS; + int req_state; + iRCCE_WAIT_LISTELEM *pElem; + iRCCE_WAIT_LISTELEM *pLastElem; + iRCCE_WAIT_LISTELEM *pTemp; + pLastElem = NULL; + pElem = list->first; + + while (pElem != NULL) { + if (pElem->type == iRCCE_WAIT_LIST_SEND_TYPE) + req_state = iRCCE_isend_test((iRCCE_SEND_REQUEST*)pElem->req, NULL); + else + req_state = iRCCE_irecv_test((iRCCE_RECV_REQUEST*)pElem->req, NULL); + + if (req_state == iRCCE_SUCCESS) { + // Remove this element from the list + if (pElem == list->first) { + list->first = pElem->next; + } + else if (pElem == list->last) { + list->last = pLastElem; + pLastElem->next = NULL; + } + else { + pLastElem->next = pElem->next; + } + + pTemp = pElem->next; + free(pElem); + pElem = pTemp; + } + else { + retval = iRCCE_PENDING; + + pLastElem = pElem; + pElem = pElem->next; + } + } + + if (test) { + if (retval == iRCCE_SUCCESS) { + (*test) = 1; + } + else { + (*test) = 0; + } + } + + return retval; +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_wait_any +//-------------------------------------------------------------------------------------- +// Blocking wait for completion of any enqueued send and recv request +//-------------------------------------------------------------------------------------- +int iRCCE_wait_any(iRCCE_WAIT_LIST *list, iRCCE_SEND_REQUEST ** send_request, iRCCE_RECV_REQUEST ** recv_request) +{ + while(iRCCE_test_any(list, send_request, recv_request) != iRCCE_SUCCESS) ; + + return iRCCE_SUCCESS; +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_test_any +//-------------------------------------------------------------------------------------- +// Nonblocking test for completion of any enqueued send or recv request +//-------------------------------------------------------------------------------------- +int iRCCE_test_any(iRCCE_WAIT_LIST *list, iRCCE_SEND_REQUEST ** send_request, iRCCE_RECV_REQUEST ** recv_request) +{ + int req_state; + + iRCCE_WAIT_LISTELEM *pElem; + iRCCE_WAIT_LISTELEM *pLastElem; + iRCCE_WAIT_LISTELEM *pTemp; + pLastElem = NULL; + pElem = list->first; + + while (pElem != NULL) { + if (pElem->type == iRCCE_WAIT_LIST_SEND_TYPE) + req_state = iRCCE_isend_test((iRCCE_SEND_REQUEST*)pElem->req, NULL); + else + req_state = iRCCE_irecv_test((iRCCE_RECV_REQUEST*)pElem->req, NULL); + + if (req_state == iRCCE_SUCCESS) { + // Remove this element from the list + if (pElem == list->first) { + list->first = pElem->next; + } + else if (pElem == list->last) { + list->last = pLastElem; + pLastElem->next = NULL; + } + else { + pLastElem->next = pElem->next; + } + + if (pElem->type == iRCCE_WAIT_LIST_SEND_TYPE) { + if(send_request) { + (*send_request) = (iRCCE_SEND_REQUEST*)pElem->req; + } + if(recv_request) { + (*recv_request) = NULL; + } + } + else { + if(send_request) { + (*send_request) = NULL; + } + if(recv_request) { + (*recv_request) = (iRCCE_RECV_REQUEST*)pElem->req; + } + } + + pTemp = pElem->next; + free(pElem); + pElem = pTemp; + + return iRCCE_SUCCESS; + } + else { + pLastElem = pElem; + pElem = pElem->next; + } + } + + if(send_request) { + (*send_request) = NULL; + } + if(recv_request) { + (*recv_request) = NULL; + } + + return iRCCE_PENDING; +} + + +//-------------------------------------------------------------------------------------- +// FUNCTIONS: iRCCE_get_dest, iRCCE_get_source, iRCCE_get_length, iRCCE_get_status +//-------------------------------------------------------------------------------------- +// Functions to determine the respective sender/receiver after test_any() / wait_any() +// (Can also be used after receiving a message via wildcard mechanism!) +//-------------------------------------------------------------------------------------- +int iRCCE_get_dest(iRCCE_SEND_REQUEST *request) +{ + if(request != NULL) return request->dest; + + return iRCCE_ERROR; +} +//-------------------------------------------------------------------------------------- +int iRCCE_get_source(iRCCE_RECV_REQUEST *request) +{ + if(request != NULL) return request->source; + + return iRCCE_recent_source; +} +//-------------------------------------------------------------------------------------- +int iRCCE_get_size(iRCCE_SEND_REQUEST * send_req, iRCCE_RECV_REQUEST * recv_req) +{ + if(send_req != NULL) return send_req->size; + if(recv_req != NULL) return recv_req->size; + + return iRCCE_recent_length; +} +//-------------------------------------------------------------------------------------- +int iRCCE_get_length(void) +{ + return iRCCE_recent_length; +} +//-------------------------------------------------------------------------------------- +int iRCCE_get_status(iRCCE_SEND_REQUEST * send_req, iRCCE_RECV_REQUEST * recv_req) +{ + if(send_req != NULL) { + + if(send_req->finished) { + + return(iRCCE_SUCCESS); + } + + if(iRCCE_isend_queue != send_req) { + + return(iRCCE_RESERVED); + } + else + { + return(iRCCE_PENDING); + } + } + + if(recv_req != NULL) { + + if(recv_req->finished) { + + return(iRCCE_SUCCESS); + } + + if(iRCCE_irecv_queue[recv_req->source] != recv_req) { + + return(iRCCE_RESERVED); + } + else + { + return(iRCCE_PENDING); + } + } + + return iRCCE_ERROR; +} diff --git a/hermit/usr/ircce/syscall.h b/hermit/usr/ircce/syscall.h new file mode 100644 index 000000000..cf4137018 --- /dev/null +++ b/hermit/usr/ircce/syscall.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2011, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __SYSCALL_H__ +#define __SYSCALL_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#define __NR_exit 0 +#define __NR_write 1 +#define __NR_open 2 +#define __NR_close 3 +#define __NR_read 4 +#define __NR_lseek 5 +#define __NR_unlink 6 +#define __NR_getpid 7 +#define __NR_kill 8 +#define __NR_fstat 9 +#define __NR_sbrk 10 +#define __NR_fork 11 +#define __NR_wait 12 +#define __NR_execve 13 +#define __NR_times 14 +#define __NR_accept 15 +#define __NR_bind 16 +#define __NR_closesocket 17 +#define __NR_connect 18 +#define __NR_listen 19 +#define __NR_recv 20 +#define __NR_send 21 +#define __NR_socket 22 +#define __NR_getsockopt 23 +#define __NR_setsockopt 24 +#define __NR_gethostbyname 25 +#define __NR_sendto 26 +#define __NR_recvfrom 27 +#define __NR_select 28 +#define __NR_stat 29 +#define __NR_dup 30 +#define __NR_dup2 31 +#define __NR_msleep 32 +#define __NR_yield 33 +#define __NR_sem_init 34 +#define __NR_sem_destroy 35 +#define __NR_sem_wait 36 +#define __NR_sem_post 37 +#define __NR_sem_timedwait 38 +#define __NR_getprio 39 +#define __NR_setprio 40 +#define __NR_clone 41 +#define __NR_sem_cancelablewait 42 +#define __NR_get_ticks 43 +#define __NR_rcce_init 44 +#define __NR_rcce_fini 45 +#define __NR_rcce_malloc 46 + +inline static long +syscall(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2) +{ + long res; + + // note: syscall stores the return address in rcx and rflags in r11 + asm volatile ("syscall" + : "=a" (res) + : "a" (nr), "D" (arg0), "S" (arg1), "d" (arg2) + : "memory", "%rcx", "%r11"); + + return res; +} + +#define SYSCALL0(NR) \ + syscall(NR, 0, 0, 0) +#define SYSCALL1(NR, ARG0) \ + syscall(NR, (unsigned long)ARG0, 0, 0) +#define SYSCALL2(NR, ARG0, ARG1) \ + syscall(NR, (unsigned long)ARG0, (unsigned long)ARG1, 0) +#define SYSCALL3(NR, ARG0, ARG1, ARG2) \ + syscall(NR, (unsigned long)ARG0, (unsigned long)ARG1, (unsigned long)ARG2) + +#ifdef __cplusplus +} +#endif + +#endif