1
0
Fork 0
mirror of https://github.com/hermitcore/libhermit.git synced 2025-03-09 00:00:03 +01:00

add iRCCE as communication library between isles

- iRCCE was designed for the SCC
- see http://www.lfbs.rwth-aachen.de/publications/files/iRCCE.pdf
- HermitCreate creates virtual message passing puffers to emulate the
  SCC
This commit is contained in:
Stefan Lankes 2015-10-17 11:56:19 +02:00
parent 3f1c5c1d77
commit e339311d05
32 changed files with 10724 additions and 0 deletions

View file

@ -55,6 +55,9 @@ libs:
$Q$(MAKE) TARGET=$(TARGET) CC_FOR_TARGET=$(CC_FOR_TARGET) AR_FOR_TARGET=$(AR_FOR_TARGET) CFLAGS_FOR_TARGET+="-I. -Iplatform/hermit -Iplatform/helper -Wall" -C pte
$Q$(MAKE) TARGET=$(TARGET) CC_FOR_TARGET=$(CC_FOR_TARGET) AR_FOR_TARGET=$(AR_FOR_TARGET) CFLAGS_FOR_TARGET+="-I. -Wall -pthread" -C libgomp depend
$Q$(MAKE) TARGET=$(TARGET) CC_FOR_TARGET=$(CC_FOR_TARGET) AR_FOR_TARGET=$(AR_FOR_TARGET) CFLAGS_FOR_TARGET+="-I. -Wall -pthread" -C libgomp
$Q$(MAKE) TARGET=$(TARGET) CC_FOR_TARGET=$(CC_FOR_TARGET) AR_FOR_TARGET=$(AR_FOR_TARGET) CFLAGS_FOR_TARGET+="-I. -Wall" -C ircce depend
$Q$(MAKE) TARGET=$(TARGET) CC_FOR_TARGET=$(CC_FOR_TARGET) AR_FOR_TARGET=$(AR_FOR_TARGET) CFLAGS_FOR_TARGET+="-I. -Wall" -C ircce
$(TMP)/gcc:
@echo Build final gcc
@ -73,6 +76,8 @@ veryclean:
@echo Propper cleaning of the toolchain
$Q$(MAKE) -C pte veryclean
$Q$(MAKE) -C libgomp veryclean
$Q$(MAKE) -C ircce veryclean
$Q$(MAKE) -C tests veryclean
$Q$(MAKE) -C benchmarks veryclean
$Q$(RM) $(TOPDIR)/$(ARCH)
$Q$(RM) $(TMP)

43
hermit/usr/ircce/Makefile Normal file
View file

@ -0,0 +1,43 @@
NEWLIB = ../x86/x86_64-hermit
MAKE = make
ARFLAGS_FOR_TARGET = rsv
CP = cp
C_source = $(wildcard *.c)
NAME = libircce.a
OBJS = $(C_source:.c=.o)
#
# Prettify output
V = 0
ifeq ($V,0)
Q = @
P = > /dev/null
endif
# other implicit rules
%.o : %.c
@echo [CC] $@
$Q$(CC_FOR_TARGET) -c $(CFLAGS_FOR_TARGET) -o $@ $<
default: all
all: $(NAME)
$(NAME): $(OBJS)
$Q$(AR_FOR_TARGET) $(ARFLAGS_FOR_TARGET) $@ $(OBJS)
$Q$(CP) $@ $(NEWLIB)/lib
$Q$(CP) *.h $(NEWLIB)/include
clean:
@echo Cleaning examples
$Q$(RM) $(NAME) *.o *~
veryclean:
@echo Propper cleaning examples
$Q$(RM) $(NAME) *.o *~
depend:
$Q$(CC_FOR_TARGET) -MM $(CFLAGS_FOR_TARGET) *.c > Makefile.dep
-include Makefile.dep
# DO NOT DELETE

374
hermit/usr/ircce/RCCE.h Normal file
View file

@ -0,0 +1,374 @@
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// [2010-10-25] added support for non-blocking send/recv operations
// - RCCE_isend(), ..._test(), ..._wait(), ..._push()
// - RCCE_irecv(), ..._test(), ..._wait(), ..._push()
// by Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
// [2012-09-10] added support for "tagged" flags
// - RCCE_send_tagged(), RCCE_recv_tagged(), RCCE_recv_probe_tagged()
// by Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
#ifndef RCCE_H
#define RCCE_H
#include <stdlib.h>
#include <stdio.h>
#ifdef __hermit__
#define SCC
#define COPPERRIDGE
#define USE_REMOTE_PUT_LOCAL_GET
#undef SHMADD
#endif
#define _RCCE "1.0.13 release"
// #define USE_BYTE_FLAGS
// #define USE_FLAG_EXPERIMENTAL
// little trick to allow the application to be called "RCCE_APP" under
// OpenMP, and "main" otherwise
#define ABS(x) ((x > 0)?x:-x)
#ifndef _OPENMP
#define RCCE_APP main
#endif
// modify next line for Intel BareMetal, which supports stdout, but not stdferr
#define STDERR stdout
#ifdef __hermit__
#define LOG2_LINE_SIZE 6
#else
#define LOG2_LINE_SIZE 5
#endif
#define RCCE_LINE_SIZE (1<<LOG2_LINE_SIZE)
// RCCE_BUFF_SIZE_MAX is space per UE, which is half of the space per tile
#define RCCE_BUFF_SIZE_MAX (1<<13)
#ifdef SHMADD
//64MB
//#define RCCE_SHM_SIZE_MAX 0x4000000
// 128MB
//#define RCCE_SHM_SIZE_MAX 0x8000000
// 256MB
//#define RCCE_SHM_SIZE_MAX 0x10000000
// 512MB
#define RCCE_SHM_SIZE_MAX 0x20000000
// 960MB
//#define RCCE_SHM_SIZE_MAX 0x3C000000
#else
#ifndef SCC_COUPLED_SYSTEMS
// 64MB
#define RCCE_SHM_SIZE_MAX (1<<26)
#else
// In Coupled Mode only 4MB
#define RCCE_SHM_SIZE_MAX (1<<22)
#endif
#endif
#ifdef __hermit__
#define RCCE_MAX_BOARDS 1
#define RCCE_MAXNP_PER_BOARD 8
#else
#define RCCE_MAX_BOARDS 2 /* allow up to 2 SCC boards for now */
#define RCCE_MAXNP_PER_BOARD 48
#endif
#define RCCE_MAXNP (RCCE_MAX_BOARDS * RCCE_MAXNP_PER_BOARD)
#define RCCE_SUCCESS 0
#define RCCE_PENDING -1
#define RCCE_RESERVED -2
#define RCCE_REJECTED -3
#define RCCE_ERROR_BASE 1234321
#define RCCE_ERROR_TARGET (RCCE_ERROR_BASE + 1)
#define RCCE_ERROR_SOURCE (RCCE_ERROR_BASE + 2)
#define RCCE_ERROR_ID (RCCE_ERROR_BASE + 3)
#define RCCE_ERROR_MESSAGE_LENGTH (RCCE_ERROR_BASE + 4)
#define RCCE_ERROR_FLAG_UNDEFINED (RCCE_ERROR_BASE + 5)
#define RCCE_ERROR_NUM_UES (RCCE_ERROR_BASE + 6)
#define RCCE_ERROR_DATA_OVERLAP (RCCE_ERROR_BASE + 7)
#define RCCE_ERROR_ALIGNMENT (RCCE_ERROR_BASE + 8)
#define RCCE_ERROR_DEBUG_FLAG (RCCE_ERROR_BASE + 9)
#define RCCE_ERROR_FLAG_NOT_IN_COMM_BUFFER (RCCE_ERROR_BASE + 10)
#define RCCE_ERROR_FLAG_STATUS_UNDEFINED (RCCE_ERROR_BASE + 11)
#define RCCE_ERROR_FLAG_NOT_ALLOCATED (RCCE_ERROR_BASE + 12)
#define RCCE_ERROR_VAL_UNDEFINED (RCCE_ERROR_BASE + 13)
#define RCCE_ERROR_INVALID_ERROR_CODE (RCCE_ERROR_BASE + 14)
#define RCCE_ERROR_RPC_NOT_ALLOCATED (RCCE_ERROR_BASE + 15)
#define RCCE_ERROR_RPC_INTERNAL (RCCE_ERROR_BASE + 16)
#define RCCE_ERROR_MULTIPLE_RPC_REQUESTS (RCCE_ERROR_BASE + 17)
#define RCCE_ERROR_FDIVIDER (RCCE_ERROR_BASE + 18)
#define RCCE_ERROR_FREQUENCY_EXCEEDED (RCCE_ERROR_BASE + 19)
#define RCCE_ERROR_NO_ACTIVE_RPC_REQUEST (RCCE_ERROR_BASE + 20)
#define RCCE_ERROR_STALE_RPC_REQUEST (RCCE_ERROR_BASE + 21)
#define RCCE_ERROR_COMM_UNDEFINED (RCCE_ERROR_BASE + 22)
#define RCCE_ERROR_ILLEGAL_OP (RCCE_ERROR_BASE + 23)
#define RCCE_ERROR_ILLEGAL_TYPE (RCCE_ERROR_BASE + 24)
#define RCCE_ERROR_MALLOC (RCCE_ERROR_BASE + 25)
#define RCCE_ERROR_COMM_INITIALIZED (RCCE_ERROR_BASE + 26)
#define RCCE_ERROR_CORE_NOT_IN_HOSTFILE (RCCE_ERROR_BASE + 27)
#define RCCE_ERROR_NO_MULTICAST_SUPPORT (RCCE_ERROR_BASE + 28)
#define RCCE_MAX_ERROR_STRING 45
#define RCCE_DEBUG_ALL 111111
#define RCCE_DEBUG_SYNCH 111444
#define RCCE_DEBUG_COMM 111555
#define RCCE_DEBUG_RPC 111666
#define RCCE_DEBUG_DEBUG 111888
#define RCCE_FLAG_SET 1
#define RCCE_FLAG_UNSET 0
#define RCCE_NUM_OPS 4
#define RCCE_OP_BASE 23232323
#define RCCE_SUM (RCCE_OP_BASE)
#define RCCE_MIN (RCCE_OP_BASE+1)
#define RCCE_MAX (RCCE_OP_BASE+2)
#define RCCE_PROD (RCCE_OP_BASE+3)
#define RCCE_TYPE_BASE 63636363
#define RCCE_INT (RCCE_TYPE_BASE)
#define RCCE_LONG (RCCE_TYPE_BASE+1)
#define RCCE_FLOAT (RCCE_TYPE_BASE+2)
#define RCCE_DOUBLE (RCCE_TYPE_BASE+3)
// MPB pointer type
typedef volatile unsigned char* t_vcharp;
#if (defined(SINGLEBITFLAGS) || defined(USE_BYTE_FLAGS)) && !defined(USE_FLAG_EXPERIMENTAL)
typedef struct {
int location; /* location of bit within line (0-255) */
t_vcharp flag_addr; /* address of byte containing flag inside cache line */
t_vcharp line_address; /* start of cache line containing flag */
} RCCE_FLAG;
#else
#ifdef USE_FLAG_EXPERIMENTAL
typedef volatile unsigned char *RCCE_FLAG;
#else
typedef volatile int *RCCE_FLAG;
#endif
#endif
#ifdef USE_FLAG_EXPERIMENTAL
typedef unsigned char RCCE_FLAG_STATUS;
#else
typedef int RCCE_FLAG_STATUS;
#endif
typedef struct {
int size;
int my_rank;
int initialized;
int member[RCCE_MAXNP];
#ifdef USE_FAT_BARRIER
RCCE_FLAG gather[RCCE_MAXNP];
#else
RCCE_FLAG gather;
#endif
RCCE_FLAG release;
volatile int cycle;
volatile int count;
int step;
int label;
} RCCE_COMM;
typedef struct _RCCE_SEND_REQUEST {
char *privbuf; // source buffer in local private memory (send buffer)
t_vcharp combuf; // intermediate buffer in MPB
size_t chunk; // size of MPB available for this message (bytes)
RCCE_FLAG *ready; // flag indicating whether receiver is ready
RCCE_FLAG *sent; // flag indicating whether message has been sent by source
size_t size; // size of message (bytes)
int dest; // UE that will receive the message
int copy; // set to 0 for synchronization only (no copying/sending)
void* tag; // additional tag?
int len; // length of additional tag
RCCE_FLAG *probe; // flag for probing for incoming messages
size_t wsize; // offset within send buffer when putting in "chunk" bytes
size_t remainder; // bytes remaining to be sent
size_t nbytes; // number of bytes to be sent in single RCCE_put call
char *bufptr; // running pointer inside privbuf for current location
int label; // jump/goto label for the reentrance of the respective poll function
int finished; // flag that indicates whether the request has already been finished
struct _RCCE_SEND_REQUEST *next;
} RCCE_SEND_REQUEST;
typedef struct _RCCE_RECV_REQUEST {
char *privbuf; // source buffer in local private memory (send buffer)
t_vcharp combuf; // intermediate buffer in MPB
size_t chunk; // size of MPB available for this message (bytes)
RCCE_FLAG *ready; // flag indicating whether receiver is ready
RCCE_FLAG *sent; // flag indicating whether message has been sent by source
size_t size; // size of message (bytes)
int source; // UE that will send the message
int copy; // set to 0 for cancel function
void* tag; // additional tag?
int len; // length of additional tag
RCCE_FLAG *probe; // flag for probing for incoming messages
size_t wsize; // offset within send buffer when putting in "chunk" bytes
size_t remainder; // bytes remaining to be sent
size_t nbytes; // number of bytes to be sent in single RCCE_put call
char *bufptr; // running pointer inside privbuf for current location
int label; // jump/goto label for the reentrance of the respective poll function
int finished; // flag that indicates whether the request has already been finished
struct _RCCE_RECV_REQUEST *next;
} RCCE_RECV_REQUEST;
typedef struct tree_s {
int parent; // UE of parent
int num_children;
int child[RCCE_MAXNP]; // UEs of children
} tree_t;
#ifdef RC_POWER_MANAGEMENT
typedef struct{
int release;
int old_voltage_level;
int new_voltage_level;
int old_frequency_divider;
int new_frequency_divider;
long long start_cycle;
} RCCE_REQUEST;
int RCCE_power_domain(void);
int RCCE_iset_power(int, RCCE_REQUEST *, int *, int *);
int RCCE_wait_power(RCCE_REQUEST *);
int RCCE_set_frequency_divider(int, int *);
int RCCE_power_domain_master(void);
int RCCE_power_domain_size(void);
#endif
int RCCE_init(int *, char***);
int RCCE_finalize(void);
double RCCE_wtime(void);
int RCCE_ue(void);
int RCCE_num_ues(void);
#ifdef SCC_COUPLED_SYSTEMS
int RCCE_dev(void);
int RCCE_dev_ue(void);
int RCCE_num_dev(void);
int RCCE_num_ues_dev(int);
int RCCE_ue_to_dev(int);
#endif
#ifdef GORY
t_vcharp RCCE_malloc(size_t);
t_vcharp RCCE_malloc_request(size_t, size_t *);
t_vcharp RCCE_palloc(size_t,int);
void RCCE_free(t_vcharp);
int RCCE_put(t_vcharp, t_vcharp, int, int);
int RCCE_get(t_vcharp, t_vcharp, int, int);
int RCCE_wait_until(RCCE_FLAG, RCCE_FLAG_STATUS);
int RCCE_test_flag(RCCE_FLAG, RCCE_FLAG_STATUS, int *);
int RCCE_flag_alloc(RCCE_FLAG *);
int RCCE_flag_free(RCCE_FLAG *);
int RCCE_flag_write(RCCE_FLAG *, RCCE_FLAG_STATUS, int);
int RCCE_flag_read(RCCE_FLAG, RCCE_FLAG_STATUS *, int);
int RCCE_flag_write_tagged(RCCE_FLAG *, RCCE_FLAG_STATUS, int, char*, int);
int RCCE_flag_read_tagged(RCCE_FLAG, RCCE_FLAG_STATUS *, int, char*, int);
int RCCE_send(char *, t_vcharp, size_t, RCCE_FLAG *, RCCE_FLAG *, size_t, int);
int RCCE_recv(char *, t_vcharp, size_t, RCCE_FLAG *, RCCE_FLAG *, size_t, int, RCCE_FLAG *);
int RCCE_recv_test(char *, t_vcharp, size_t, RCCE_FLAG *, RCCE_FLAG *, size_t, int, int *, RCCE_FLAG *);
#ifdef USE_FLAG_EXPERIMENTAL
int RCCE_put_flag(t_vcharp, t_vcharp, int, int);
int RCCE_get_flag(t_vcharp, t_vcharp, int, int);
#endif
#else
// standard non-gory functions:
t_vcharp RCCE_malloc(size_t);
int RCCE_flag_write(RCCE_FLAG *, RCCE_FLAG_STATUS, int);
int RCCE_flag_read(RCCE_FLAG, RCCE_FLAG_STATUS *, int);
int RCCE_send(char *, size_t, int);
int RCCE_recv(char *, size_t, int);
int RCCE_recv_test(char *, size_t, int, int *);
int RCCE_send_pipe(char *, size_t, int);
int RCCE_recv_pipe(char *, size_t, int);
int RCCE_send_mcast(char *, size_t);
int RCCE_recv_mcast(char *, size_t, int);
int RCCE_send_tagged(char *, size_t, int, void *, int);
int RCCE_recv_tagged(char *, size_t, int, void *, int);
int RCCE_recv_probe_tagged(int, int *, t_vcharp *, void *, int);
int RCCE_allreduce(char *, char *, int, int, int, RCCE_COMM);
int RCCE_reduce(char *, char *, int, int, int, int, RCCE_COMM);
int RCCE_bcast(char *, size_t, int, RCCE_COMM);
int RCCE_recv_probe(int, int *, t_vcharp *);
int RCCE_recv_cancel(size_t, int);
int RCCE_isend(char *, size_t, int, RCCE_SEND_REQUEST *);
int RCCE_isend_test(RCCE_SEND_REQUEST *, int *);
int RCCE_isend_wait(RCCE_SEND_REQUEST *);
int RCCE_isend_push(int);
int RCCE_irecv(char *, size_t, int, RCCE_RECV_REQUEST *);
int RCCE_irecv_test(RCCE_RECV_REQUEST *, int *);
int RCCE_irecv_wait(RCCE_RECV_REQUEST *);
int RCCE_irecv_push(int);
#endif
t_vcharp RCCE_shmalloc(size_t);
void RCCE_shfree(t_vcharp);
void RCCE_shflush(void);
t_vcharp RCCE_shrealloc(t_vcharp, size_t);
// LfBS-customized functions:
void* RCCE_memcpy_get(void *, const void *, size_t);
void* RCCE_memcpy_put(void *, const void *, size_t);
#define RCCE_memcpy(a,b,c) RCCE_memcpy_put(a,b,c)
int RCCE_comm_split(int (*)(int, void *), void *, RCCE_COMM *);
int RCCE_comm_free(RCCE_COMM *);
int RCCE_comm_size(RCCE_COMM, int *);
int RCCE_comm_rank(RCCE_COMM, int *);
void RCCE_fence(void);
int RCCE_barrier(RCCE_COMM *);
int RCCE_tree_init(RCCE_COMM *, tree_t *, int);
int RCCE_tree_barrier(RCCE_COMM *, tree_t *);
int RCCE_tournament_barrier(RCCE_COMM *);
int RCCE_tournament_fixed_barrier(RCCE_COMM *);
int RCCE_dissemination_barrier(RCCE_COMM *);
int RCCE_TNS_barrier(RCCE_COMM *);
int RCCE_AIR_barrier(RCCE_COMM *);
int RCCE_AIR_barrier2(RCCE_COMM *);
int RCCE_nb_barrier(RCCE_COMM *);
int RCCE_nb_TNS_barrier(RCCE_COMM *);
int RCCE_nb_AIR_barrier(RCCE_COMM *);
int RCCE_error_string(int, char *, int *);
int RCCE_debug_set(int);
int RCCE_debug_unset(int);
extern RCCE_COMM RCCE_COMM_WORLD;
#ifdef RC_POWER_MANAGEMENT
extern RCCE_COMM RCCE_P_COMM;
#define RCCE_POWER_DEFAULT -99999
#endif
#ifdef _OPENMP
#pragma omp threadprivate (RCCE_COMM_WORLD)
#ifdef RC_POWER_MANAGEMENT
#pragma omp threadprivate (RCCE_P_COMM)
#endif
#endif
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,73 @@
//***************************************************************************************
// Broadcast functions.
//***************************************************************************************
// Since only collective operations require communication domains, they are the only ones
// that use communicators. All collectives implementations are naive, linear operations.
// There may not be any overlap between target and source.
//
// Author: Rob F. Van der Wijngaart
// Intel Corporation
// Date: 008/30/2010
//
//**************************************************************************************
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "RCCE_lib.h"
#ifdef USE_RCCE_COMM
#ifndef GORY
#include "RCCE_comm/RCCE_bcast.c"
#endif
#else
#include <stdlib.h>
#include <string.h>
//--------------------------------------------------------------------------------------
// RCCE_bcast
//--------------------------------------------------------------------------------------
// function that sends data from UE root to all other UEs in the communicator
//--------------------------------------------------------------------------------------
int RCCE_bcast(
char *buf, // private memory, used for sending (root) and receiving (other UEs)
size_t num, // number of bytes to be sent
int root, // source within "comm" of broadcast data
RCCE_COMM comm // communication domain
) {
int ue, ierr;
#ifdef GORY
printf("Collectives only implemented for simplified API\n");
return(1);
#else
// check to make sure root is member of the communicator
if (root<0 || root >= comm.size)
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
if (RCCE_IAM == comm.member[root]) {
for (ue=0; ue<comm.size; ue++) if (ue != root)
if(ierr=RCCE_send(buf, num, comm.member[ue]))
return(RCCE_error_return(RCCE_debug_comm,ierr));
}
else if(ierr=RCCE_recv(buf, num, comm.member[root]))
return(RCCE_error_return(RCCE_debug_comm,ierr));
return(RCCE_SUCCESS);
#endif
}
#endif

View file

@ -0,0 +1,159 @@
//***************************************************************************************
// Communicator manipulation and accessor routines.
//***************************************************************************************
//
// Author: Rob F. Van der Wijngaart
// Intel Corporation
// Date: 008/30/2010
//
//***************************************************************************************
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "RCCE_lib.h"
#if defined(COPPERRIDGE) && !defined(__hermit__)
#define RCCE_memcpy_put(a,b,c) memcpy_to_mpb(a, b, c)
#include "scc_memcpy.h"
#else
#define RCCE_memcpy_put(a,b,c) memcpy(a, b, c)
#endif
#ifdef USE_RCCE_COMM
#ifndef GORY
#include "RCCE_comm/RCCE_scatter.c"
#include "RCCE_comm/RCCE_gather.c"
#include "RCCE_comm/RCCE_allgather.c"
#endif
#endif
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_comm_split
// RCCE_comm_split works like MPI_Comm_split, but:
// 1. Always uses the default global communicator as the basis, not an
// arbitrary communicator
// 2. Uses the rank of the UE in the global communicator as the key
// 3. Uses a function, operating on UE's global rank, to compute color
//--------------------------------------------------------------------------------------
int RCCE_comm_split(
int (*color)(int, void *), // function returning a color value for given ue and aux
void *aux, // optional user-supplied data structure
RCCE_COMM *comm // new communicator
) {
int i, my_color, error;
if (!comm) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_COMM_UNDEFINED));
// start with a barrier to make sure all UEs are participating, unless we are still
// defining the global communicator; there is no danger in skipping the barrier in
// that case, because the global communicator is defined in RCCE_init, which must be
// called by all cores before any other RCCE calls
if (comm != &RCCE_COMM_WORLD) RCCE_barrier(&RCCE_COMM_WORLD);
// determine the size of the communicator
my_color = color(RCCE_IAM, aux);
comm->size = 0;
for (i=0; i<RCCE_NP; i++) {
if (color(i, aux) == my_color) {
if (i == RCCE_IAM) comm->my_rank = comm->size;
comm->member[comm->size++] = i;
}
}
// note: we only need to allocate new synch flags if the communicator has not yet been
// initialized. It is legal to overwrite an initialized communcator, in which case the
// membership may change, but the same synchronization flags can be used
if (comm->initialized == RCCE_COMM_INITIALIZED) return(RCCE_SUCCESS);
#ifndef USE_FAT_BARRIER
if((error=RCCE_flag_alloc(&(comm->gather))))
return(RCCE_error_return(RCCE_debug_comm,error));
#else
for (i=0; i<RCCE_NP; i++) {
if((error=RCCE_flag_alloc(&(comm->gather[i]))))
return(RCCE_error_return(RCCE_debug_comm,error));
}
#endif
if(error=RCCE_flag_alloc(&(comm->release)))
return(RCCE_error_return(RCCE_debug_comm,error));
comm->label = 0;
comm->initialized = RCCE_COMM_INITIALIZED;
return(RCCE_SUCCESS);
}
// DO NOT USE THIS FUNCTION IN NON-GORY MODE UNTIL MALLOC_FREE HAS BEEN IMPLEMENTED
int RCCE_comm_free(RCCE_COMM *comm) {
printf("DO NOT USE IN NON-GORY MODE UNTIL MALLOC_FREE HAS BEEN IMPLEMENTED\n");
if (comm->initialized != RCCE_COMM_INITIALIZED)
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_COMM_INITIALIZED));
#ifndef USE_FAT_BARRIER
RCCE_flag_free(&(comm->gather));
#else
{ int i;
for (i=0; i<RCCE_NP; i++)
RCCE_flag_free(&(comm->gather[i]));
}
#endif
RCCE_flag_free(&(comm->release));
comm->initialized = RCCE_COMM_NOT_INITIALIZED;
return(RCCE_SUCCESS);
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_comm_size
// returns the number of UEs inside the communicator
//--------------------------------------------------------------------------------------
int RCCE_comm_size(
RCCE_COMM comm, // communicator
int *size // return value (size)
) {
if (comm.initialized == RCCE_COMM_INITIALIZED) {
*size = comm.size;
return(RCCE_SUCCESS);
}
else return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_COMM_INITIALIZED));
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_comm_rank
// returns the rank of the calling UE inside the communicator
//--------------------------------------------------------------------------------------
int RCCE_comm_rank(
RCCE_COMM comm, // communicator
int *rank // return value (rank)
) {
if (comm.initialized == RCCE_COMM_INITIALIZED) {
*rank = comm.my_rank;
return(RCCE_SUCCESS);
}
else return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_COMM_INITIALIZED));
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_global_color
// use this trivial color function to define global communicator
//--------------------------------------------------------------------------------------
int RCCE_global_color(int rank, void *nothing) {return(1);}

View file

@ -0,0 +1,163 @@
//***************************************************************************************
// Diagnostic routines.
//***************************************************************************************
//
// Author: Rob F. Van der Wijngaart
// Intel Corporation
// Date: 008/30/2010
//
//***************************************************************************************
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "RCCE_lib.h"
#include "RCCE_debug.h"
#define MAX_ERROR_NUMBER 26
//......................................................................................
// GLOBAL VARIABLES USED BY THE LIBRARY
//......................................................................................
const char *RCCE_estrings[] = {
/* 0 */ "Success",
/* 1 */ "Invalid target buffer",
/* 2 */ "Invalid source buffer",
/* 3 */ "Invalid UE ID",
/* 4 */ "Invalid message length",
/* 5 */ "Flag variable undefined",
/* 6 */ "Emulated NUEs do not match requested NUEs",
/* 7 */ "Message buffers overlap in comm buffer",
/* 8 */ "Data buffer misalignment",
/* 9 */ "Debug flag not defined",
/* 10 */ "RCCE_flag variable not inside comm buffer",
/* 11 */ "Flag status not defined",
/* 12 */ "Flag not allocated",
/* 13 */ "Value not defined",
/* 14 */ "Invalid error code",
/* 15 */ "RPC data structure not allocated",
/* 16 */ "RPC internal error",
/* 17 */ "Multiple outstanding RPC requests",
/* 18 */ "Invalid power step",
/* 19 */ "Maximum allowable frequency exceeded",
/* 20 */ "No active RPC request",
/* 21 */ "Stale RPC request",
/* 22 */ "Communicator undefined",
/* 23 */ "Illegal reduction operator",
/* 24 */ "Illegal data type",
/* 25 */ "Memory allocation error",
/* 26 */ "Communicator initialization error",
/* 27 */ "Multicast is not supported in remote-put/local-get mode"
};
// GLOBAL VARIABLES USED BY THE LIBRARY
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_error_string
//--------------------------------------------------------------------------------------
// RCCE_error_string returns a descriptive error string
//--------------------------------------------------------------------------------------
int RCCE_error_string(
int err_no, // number of error to be described
char *error_string, // copy of error string
int *string_length // length of error string
) {
if (err_no != RCCE_SUCCESS) {
err_no -= RCCE_ERROR_BASE;
if (err_no < 1 || err_no > MAX_ERROR_NUMBER) {
strcpy(error_string,"");
*string_length=0;
return(RCCE_error_return(RCCE_debug_debug,RCCE_ERROR_INVALID_ERROR_CODE));
}
}
strcpy(error_string,RCCE_estrings[err_no]);
*string_length = strlen(RCCE_estrings[err_no]);
return(RCCE_SUCCESS);
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_print_error
//--------------------------------------------------------------------------------------
// prints diagnostic error string, governed by input flag, also returns the error code
//--------------------------------------------------------------------------------------
int RCCE_error_return(
int debug_flag, // flag that controls diagnostic printing
int err_no // number of error to be printed
) {
char error_string[RCCE_MAX_ERROR_STRING];
int string_length;
if (debug_flag && err_no) {
RCCE_error_string(err_no, error_string, &string_length);
fprintf(STDERR,"Error on UE %d: %s\n", RCCE_IAM, error_string); fflush(NULL);
}
return(err_no);
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_debug_set
//--------------------------------------------------------------------------------------
// turns on debugging of a certain library feature
//--------------------------------------------------------------------------------------
int RCCE_debug_set(
int flag // flag that controls which library feaure is instrumented
){
switch(flag) {
case(RCCE_DEBUG_ALL): RCCE_debug_synch=1;
RCCE_debug_comm=1;
RCCE_debug_debug=1;
RCCE_debug_RPC=1;
return(RCCE_SUCCESS);
case(RCCE_DEBUG_SYNCH): RCCE_debug_synch=1;
return(RCCE_SUCCESS);
case(RCCE_DEBUG_COMM): RCCE_debug_comm=1;
return(RCCE_SUCCESS);
case(RCCE_DEBUG_DEBUG): RCCE_debug_debug=1;
return(RCCE_SUCCESS);
case(RCCE_DEBUG_RPC): RCCE_debug_RPC=1;
return(RCCE_SUCCESS);
default: return(RCCE_error_return(RCCE_debug_debug,
RCCE_ERROR_DEBUG_FLAG));
}
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_debug_unset
//--------------------------------------------------------------------------------------
// turns off debugging of a certain library feature
//--------------------------------------------------------------------------------------
int RCCE_debug_unset(
int flag // flag that controls which library feaure is uninstrumented
){
switch(flag) {
case(RCCE_DEBUG_ALL): RCCE_debug_synch=0;
RCCE_debug_comm=0;
RCCE_debug_debug=0;
RCCE_debug_RPC=0;
return(RCCE_SUCCESS);
case(RCCE_DEBUG_SYNCH): RCCE_debug_synch=0;
return(RCCE_SUCCESS);
case(RCCE_DEBUG_COMM): RCCE_debug_comm=0;
return(RCCE_SUCCESS);
case(RCCE_DEBUG_DEBUG): RCCE_debug_debug=0;
return(RCCE_SUCCESS);
case(RCCE_DEBUG_RPC): RCCE_debug_RPC=0;
return(RCCE_SUCCESS);
default: return(RCCE_error_return(RCCE_debug_debug,
RCCE_ERROR_DEBUG_FLAG));
}
}

View file

@ -0,0 +1,26 @@
/**************************************************************
* Change the RCCE_debug_xxx values to get debug info. *
* Change RCCE_comm_init_val to 1 to see what happens if *
* the comm buffers are not properly initialized at startup . *
**************************************************************/
int RCCE_debug_synch=0;
int RCCE_debug_comm=0;
int RCCE_debug_debug=0;
int RCCE_debug_RPC=0;
int RCCE_comm_init_val=0;
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

View file

@ -0,0 +1,488 @@
//**************************************************************************************
// Flag manipulation and access functions.
// Single-bit and whole-cache-line flags are sufficiently different that we provide
// separate implementations of all the flag routines for each case
//**************************************************************************************
//
// Author: Rob F. Van der Wijngaart
// Intel Corporation
// Date: 008/30/2010
//
//**************************************************************************************
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// [2012-09-07] added support for "tagged" flags
// by Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
#include "RCCE_lib.h"
#if defined(COPPERRIDGE) && !defined(__hermit__)
#include "scc_memcpy.h"
#else
#define memcpy_scc memcpy
#endif
#ifdef USE_BYTE_FLAGS
#include "RCCE_byte_flags.c"
#else
#ifdef SINGLEBITFLAGS
//////////////////////////////////////////////////////////////////
// LOCKING SYNCHRONIZATION USING ONE BIT PER FLAG
//////////////////////////////////////////////////////////////////
//......................................................................................
// GLOBAL VARIABLES USED BY THE LIBRARY
//......................................................................................
// single bit flags are accessed with the granularity of integers. Compute the
// number of flags per integer
int WORDSIZE = sizeof(int)*8;
int LEFTMOSTBIT = sizeof(int)*8-1;
//......................................................................................
// END GLOBAL VARIABLES USED BY THE LIBRARY
//......................................................................................
RCCE_FLAG_LINE RCCE_flags =
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
{{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
NULL, 0, NULL};
// next three utility functions are only used by the library, not the user. We assume
// there will never be errrors, so we do not return any error code. "location" of a
// flag bit // inside a cache line is reckoned from the most significant (leftmost)
// bit. Within a word, flag zero is also in the leftmost bit
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_bit_value
//--------------------------------------------------------------------------------------
// return status of single bit flag at a specific location within cache line
//--------------------------------------------------------------------------------------
#if 0
// BUGGY VERSION (by Intel):
RCCE_FLAG_STATUS RCCE_bit_value(t_vcharp line_address, int location) {
t_vintp character = (t_vintp) (line_address + location/WORDSIZE);
int bit_position = (LEFTMOSTBIT-(location%WORDSIZE));
unsigned int mask = 1<<bit_position;
return (((*character) & mask)>>bit_position);
}
#else
// FIXED VERSION (by LfBS):
RCCE_FLAG_STATUS RCCE_bit_value(t_vcharp line_address, int location) {
t_vcharp character = (t_vcharp) (line_address + location/8);
int bit_position = 7 - location%8;
unsigned char mask = 1<<bit_position;
return (((*character) & mask)>>bit_position);
}
#endif
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_flip_bit_value
//--------------------------------------------------------------------------------------
// flip single bit in cache line and return value of changed bit. The location is that
// of the bit inside the line. To find the word it is in, divide by WORDSIZE.
//--------------------------------------------------------------------------------------
#if 0
// BUGGY VERSION (by Intel):
RCCE_FLAG_STATUS RCCE_flip_bit_value(t_vcharp line_address, int location) {
t_vintp character = (t_vintp) (line_address + location/WORDSIZE);
int bit_position = (LEFTMOSTBIT-(location%WORDSIZE));
unsigned int mask = 1<<bit_position;
(*character) ^= mask;
return ((mask & (*character))>>bit_position);
}
#else
// FIXED VERSION (by LfBS):
RCCE_FLAG_STATUS RCCE_flip_bit_value(t_vcharp line_address, int location) {
t_vcharp character = (t_vcharp) (line_address + location/8);
int bit_position = 7 - location%8;
unsigned char mask = 1<<bit_position;
(*character) ^= mask;
return ((mask & (*character))>>bit_position);
}
#endif
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_write_bit_value
//--------------------------------------------------------------------------------------
// write single bit in cache line and return value of changed bit. The location is that
// of the bit inside the line. To find the word it is in, divide by WORDSIZE.
//--------------------------------------------------------------------------------------
#if 0
// BUGGY VERSION (by Intel):
int RCCE_write_bit_value(t_vcharp line_address, int location, RCCE_FLAG_STATUS val) {
t_vintp character = (t_vintp)(line_address + location/WORDSIZE);
int bit_position = (LEFTMOSTBIT-(location%WORDSIZE));
unsigned int mask;
switch (val) {
case RCCE_FLAG_UNSET: mask = ~(1<<bit_position);
(*character) &= mask;
break;
case RCCE_FLAG_SET: mask = 1<<bit_position;
(*character) |= mask;
break;
}
return (RCCE_SUCCESS);
}
#else
// FIXED VERSION (by LfBS):
int RCCE_write_bit_value(t_vcharp line_address, int location, RCCE_FLAG_STATUS val) {
t_vcharp character = (t_vcharp)(line_address + location/8);
int bit_position = 7 - location%8;
unsigned char mask;
switch (val) {
case RCCE_FLAG_UNSET: mask = ~(1<<bit_position);
(*character) &= mask;
break;
case RCCE_FLAG_SET: mask = 1<<bit_position;
(*character) |= mask;
break;
}
return (RCCE_SUCCESS);
}
#endif
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_flag_alloc
//--------------------------------------------------------------------------------------
// allocate space for single bit flag. Since 256 fit on a single cache line, we only
// need to allocate new MPB space when the all existing lines are completely filled. A
// flag line is a data structure that contains an array of size RCCE_LINE_SIZE
// characters called "flag." Each bit in field "flag" corresponds to a flag being in use
// (bit is 1) or not (bit is 0). The actual value of the flag is stored in the MPB
// line pointed to be the field "line_address," at the corresponding bit location as in
// field "flag."
//--------------------------------------------------------------------------------------
int RCCE_flag_alloc(RCCE_FLAG *flag) {
RCCE_FLAG_LINE *flagp;
int c, loc;
// find the head of the data structure that administers the flag variables
flagp = &RCCE_flags;
while (flagp->members == 256 && flagp->next) {
flagp = flagp->next;
}
// if this is a new flag line, need to allocate MPB for it
if (!flagp->line_address) flagp->line_address = RCCE_malloc(RCCE_LINE_SIZE);
if (!flagp->line_address) return(RCCE_error_return(RCCE_debug_synch,
RCCE_ERROR_FLAG_NOT_ALLOCATED));
if (flagp->members < 256) {
// there is space in this line for a new flag; find first open slot
for (loc=0; loc<RCCE_LINE_SIZE*8; loc++)
if (!RCCE_bit_value((t_vcharp)(flagp->flag),loc)) {
RCCE_flip_bit_value((t_vcharp)(flagp->flag),loc);
flagp->members++;
flag->location = loc;
flag->line_address = flagp->line_address;
return(RCCE_SUCCESS);
}
}
else {
// must create new flag line if last one was full
flagp->next = (RCCE_FLAG_LINE *) malloc(sizeof(RCCE_FLAG_LINE));
if (!(flagp->next)) return(RCCE_error_return(RCCE_debug_synch,
RCCE_ERROR_FLAG_NOT_ALLOCATED));
flagp = flagp->next;
flagp->line_address = RCCE_malloc(RCCE_LINE_SIZE);
if (!(flagp->line_address)) return(RCCE_error_return(RCCE_debug_synch,
RCCE_ERROR_FLAG_NOT_ALLOCATED));
// initialize the flag line
flagp->members=1;
flagp->next = NULL;
for (c=0; c<RCCE_LINE_SIZE; c++) flagp->flag[c] &= (unsigned int) 0;
// flip the very first bit field to indicate that flag is not in use
RCCE_flip_bit_value((t_vcharp)(flagp->flag),0);
flag->location = 0;
flag->line_address = flagp->line_address;
}
return(RCCE_SUCCESS);
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_flag_free
//--------------------------------------------------------------------------------------
// free space for single bit flag. Since 256 fit on a single cache line, we only
// need to free claimed MPB space when the all existing lines are completely emptied.
//--------------------------------------------------------------------------------------
int RCCE_flag_free(RCCE_FLAG *flag) {
RCCE_FLAG_LINE *flagp, *flagpminus1 = NULL;
// check wether flag exists, and whether the location field is valid
if (!flag || flag->location < 0)
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED));
// find flag line in globally maintained structure
flagp = &RCCE_flags;
while (flagp->next && flag->line_address != flagp->line_address) {
flagpminus1 = flagp;
flagp = flagp->next;
}
if (flag->line_address != flagp->line_address)
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED));
// error checking is done
flagp->members--;
RCCE_flip_bit_value((t_vcharp)(flagp->flag),flag->location);
// something special happens if we've emptied an entire line
if (flagp->members==0) {
if (flagpminus1) {
// there is a predecessor; splice out current flag line from linked list
RCCE_free(flagp->line_address);
flagpminus1->next = flagp->next;
free(flagp);
}
// if there is a successor but no predecessor, do nothing
}
// invalidate location field to make sure we won't free again by mistake
flag->location = -1;
flag->line_address = NULL;
return(RCCE_SUCCESS);
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_flag_write
//--------------------------------------------------------------------------------------
// This is the core flag manipulation routine. It requires locking to guarantee atomic
// access while updating one of a line of flags.
//--------------------------------------------------------------------------------------
int RCCE_flag_write(RCCE_FLAG *flag, RCCE_FLAG_STATUS val, int ID) {
t_vchar val_array[RCCE_LINE_SIZE];
int error;
#ifdef GORY
// check input parameters
if (!flag || flag->location < 0 || flag->location > 255)
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED));
if (error = (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET))
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED));
#endif
// acquire lock to make sure nobody else fiddles with the flags on the target core
RCCE_acquire_lock(ID);
// copy entire MPB cache line containing flag to local space
if (error = RCCE_get(val_array, flag->line_address, RCCE_LINE_SIZE, ID))
return(RCCE_error_return(RCCE_debug_synch,error));
// overwrite single bit within local copy of cache line
RCCE_write_bit_value(val_array, flag->location, val);
// write copy back to the MPB
error = RCCE_put(flag->line_address, val_array, RCCE_LINE_SIZE, ID);
// release write lock for the flags on the target core
RCCE_release_lock(ID);
return(RCCE_error_return(RCCE_debug_synch,error));
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_flag_read
//--------------------------------------------------------------------------------------
// This routine is rarely needed. We typically only read a flag when we're waiting for
// it to change value (function RCCE_wait_until). Reading does not require locking. The
// moment the target flag we're trying to read changes value, it is OK to read and
// return that value
//--------------------------------------------------------------------------------------
int RCCE_flag_read(RCCE_FLAG flag, RCCE_FLAG_STATUS *val, int ID) {
volatile unsigned char val_array[RCCE_LINE_SIZE];
int error;
#ifdef GORY
if (flag.location < 0 || flag.location > 255)
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED));
if (!val) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_VAL_UNDEFINED));
#endif
// Should be able to use same technique as in RCCE_wait_until, i.e., should not need
// to copy out of MPB first. However, this function is not time critical
if(error=RCCE_get(val_array, flag.line_address, RCCE_LINE_SIZE, ID))
return(RCCE_error_return(RCCE_debug_synch,error));
*val = RCCE_bit_value(val_array, flag.location);
return(RCCE_SUCCESS);
}
#else
//////////////////////////////////////////////////////////////////
// LOCKLESS SYNCHRONIZATION USING ONE WHOLE CACHE LINE PER FLAG //
//////////////////////////////////////////////////////////////////
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_flag_alloc
//--------------------------------------------------------------------------------------
// there is no internal structure to whole-cache-line flags; a new flag simply means a
// newly allocated line in the MPB
//--------------------------------------------------------------------------------------
int RCCE_flag_alloc(RCCE_FLAG *flag) {
*flag = (RCCE_FLAG) RCCE_malloc(RCCE_LINE_SIZE);
if (!(*flag)) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED));
else return(RCCE_SUCCESS);
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_flag_free
//--------------------------------------------------------------------------------------
// there is no internal structure to whole-cache-line flags; deleting a flag simply
// means deallocating line in the MPB
//--------------------------------------------------------------------------------------
int RCCE_flag_free(RCCE_FLAG *flag) {
if (!flag) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED));
else RCCE_free((t_vcharp)(*flag));
return(RCCE_SUCCESS);
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_flag_write
//--------------------------------------------------------------------------------------
// This is the core flag manipulation routine. No locking required. We simple write the
// flag value into the first word of a local (private) buffer of the size of a cache
// line and copy it to the corresponding location in the NPB
// access while updating one of a line of flags.
//--------------------------------------------------------------------------------------
int RCCE_flag_write(RCCE_FLAG *flag, RCCE_FLAG_STATUS val, int ID) {
int error;
#ifndef USE_FLAG_EXPERIMENTAL
volatile unsigned char val_array[RCCE_LINE_SIZE] =
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
#ifdef GORY
// check input parameters
if (!flag || !(*flag)) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED));
if (error = (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET))
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED));
#endif
#ifndef USE_REVERTED_FLAGS
*(int *) val_array = val;
#else
*(int *) &val_array[RCCE_LINE_SIZE-sizeof(int)] = val;
#endif
error = RCCE_put((t_vcharp)(*flag), val_array, RCCE_LINE_SIZE, ID);
#else
//*flag = val;
volatile unsigned char value = val;
error = RCCE_put_flag(*flag, &value, 1, ID);
#endif
return(RCCE_error_return(RCCE_debug_synch,error));
}
#ifdef USE_TAGGED_FLAGS
int RCCE_flag_write_tagged(RCCE_FLAG *flag, RCCE_FLAG_STATUS val, int ID, void* tag, int len) {
unsigned char val_array[RCCE_LINE_SIZE] =
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
int error, i, j;
#ifndef USE_REVERTED_FLAGS
*(int *) val_array = val;
#else
*(int *) &val_array[RCCE_LINE_SIZE-sizeof(int)] = val;
#endif
if(tag)
{
if( len > ( RCCE_LINE_SIZE - sizeof(int) ) ) len = RCCE_LINE_SIZE - sizeof(int);
#ifndef USE_REVERTED_FLAGS
memcpy_scc(&val_array[sizeof(int)], tag, len);
#else
memcpy_scc(&val_array[0], tag, len);
#endif
}
error = RCCE_put((t_vcharp)(*flag), val_array, RCCE_LINE_SIZE, ID);
return(RCCE_error_return(RCCE_debug_synch,error));
}
#endif
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_flag_read
//--------------------------------------------------------------------------------------
// This routine is rarely needed. We typically only read a flag when we're waiting for
// it to change value (function RCCE_wait_until). Reading requires copying the whole
// MPB cache line containing the flag to a private buffer and returning the first int.
//--------------------------------------------------------------------------------------
int RCCE_flag_read(RCCE_FLAG flag, RCCE_FLAG_STATUS *val, int ID) {
int error;
#ifndef USE_FLAG_EXPERIMENTAL
volatile unsigned char val_array[RCCE_LINE_SIZE];
#ifdef GORY
if (!flag) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED));
if (!val) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_VAL_UNDEFINED));
#endif
if(error=RCCE_get(val_array, (t_vcharp)flag, RCCE_LINE_SIZE, ID))
return(RCCE_error_return(RCCE_debug_synch,error));
#ifndef USE_REVERTED_FLAGS
if(val) *val = *(int *)val_array;
#else
if(val) *val = *(int *)&val_array[RCCE_LINE_SIZE-sizeof(int)];
#endif
#else
volatile unsigned char value;
if(error=RCCE_get_flag(&value, (t_vcharp)flag, 1, ID))
return(RCCE_error_return(RCCE_debug_synch,error));
if(val) *val = value;
#endif
return(RCCE_SUCCESS);
}
#ifdef USE_TAGGED_FLAGS
int RCCE_flag_read_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS *val, int ID, void *tag, int len) {
unsigned char val_array[RCCE_LINE_SIZE];
int error, i, j;
if(error=RCCE_get(val_array, (t_vcharp)flag, RCCE_LINE_SIZE, ID))
return(RCCE_error_return(RCCE_debug_synch,error));
#ifndef USE_REVERTED_FLAGS
if(val) *val = *(int *)val_array;
#else
if(val) *val = *(int *)&val_array[RCCE_LINE_SIZE-sizeof(int)];
#endif
if( (val) && (*val) && (tag) ) {
if( len > ( RCCE_LINE_SIZE - sizeof(int) ) ) len = RCCE_LINE_SIZE - sizeof(int);
#ifndef USE_REVERTED_FLAGS
memcpy_scc(tag, &val_array[sizeof(int)], len);
#else
memcpy_scc(tag, &val_array[0], len);
#endif
}
return(RCCE_SUCCESS);
}
#endif
#endif
#endif

169
hermit/usr/ircce/RCCE_get.c Normal file
View file

@ -0,0 +1,169 @@
//***************************************************************************************
// Get data from communication buffer.
//***************************************************************************************
//
// Author: Rob F. Van der Wijngaart
// Intel Corporation
// Date: 008/30/2010
//
//***************************************************************************************
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// [2010-11-03] switched to SCC-optimized memcpy() functions in scc_memcpy.h:
// - memcpy_to_mpb()
// - memcpy_from_mpb()
// by Stefan Lankes, Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
#include "RCCE_lib.h"
#ifdef COPPERRIDGE
#ifdef __hermit__
#define memcpy_from_mpb memcpy
#else
#include "scc_memcpy.h"
#endif
#endif
void *RCCE_memcpy_get(void *dest, const void *src, size_t count)
{ // function wrapper for external usage of improved memcpy()...
#ifdef COPPERRIDGE
return memcpy_from_mpb(dest, src, count);
#else
return memcpy(dest, src, count);
#endif
}
#ifdef COPPERRIDGE
#define RCCE_memcpy_get(a,b,c) memcpy_from_mpb(a,b,c)
#else
#define RCCE_memcpy_get(a,b,c) memcpy(a,b,c)
#endif
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_get
//--------------------------------------------------------------------------------------
// copy data from address "source" in the remote MPB to address "target" in either the
// local MPB, or in the calling UE's private memory. We do not test to see if a move
// into the calling UE's private memory stays within allocated memory *
//--------------------------------------------------------------------------------------
int RCCE_get(
t_vcharp target, // target buffer, MPB or private memory
t_vcharp source, // source buffer, MPB
int num_bytes, // number of bytes to copy (must be multiple of cache line size
int ID // rank of source UE
) {
// printf("UE %d at top of RCCE_get\n", RCCE_IAM); fflush(NULL);
#ifdef GORY
// we only need to do tests in GORY mode; in non-GORY mode ths function is never
// called by the user, but only be the library
int copy_mode;
// check validity of parameters
if (!target) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_TARGET));
if (!source) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_SOURCE));
if (ID<0 || ID>=RCCE_NP) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
if (num_bytes <0 || num_bytes%RCCE_LINE_SIZE!=0)
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_MESSAGE_LENGTH));
// determine if source data is in MPB; check using local buffer boundaries
if (source - RCCE_comm_buffer[RCCE_IAM] >=0 &&
source+num_bytes - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<=0)
// shift source address to point to remote MPB
source = RCCE_comm_buffer[ID]+(source-RCCE_comm_buffer[RCCE_IAM]);
else return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_SOURCE));
// target can be either local MPB or private memory
if (target -RCCE_comm_buffer[RCCE_IAM] >= 0 &&
target+num_bytes - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<=0)
copy_mode = BOTH_IN_COMM_BUFFER;
else
copy_mode = TARGET_IN_PRIVATE_MEMORY;
// make sure that if the copy is between locations within the same MPB
// there is no overlap between source and target address ranges
if ( copy_mode == BOTH_IN_COMM_BUFFER) {
if (((source-target)>0 && (source+num_bytes-target)<0) ||
((target-source)>0 && (target+num_bytes-source)<0)) {
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_DATA_OVERLAP));
}
}
// ascertain that the start of the buffer is cache line aligned
int start_index = source-RCCE_comm_buffer[ID];
if (start_index%RCCE_LINE_SIZE!=0)
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ALIGNMENT));
// only verify alignment of the target if it is in the MPB
if (copy_mode == BOTH_IN_COMM_BUFFER) {
start_index = target-RCCE_comm_buffer[ID];
if (start_index%RCCE_LINE_SIZE!=0)
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ALIGNMENT));
}
#else
// in non-GORY mode we only need to retain the MPB source shift; we
// already know the source is in the MPB, not private memory
source = RCCE_comm_buffer[ID]+(source-RCCE_comm_buffer[RCCE_IAM]);
#endif
// printf("UE %d; target = %x, source = %x, nbytes= %d\n", RCCE_IAM, target, source, num_bytes);
fflush(NULL);
// do the actual copy, making sure we copy fresh data
#ifdef _OPENMP
#pragma omp flush
#endif
RC_cache_invalidate();
RCCE_memcpy_get((void *)target, (void *)source, num_bytes);
if (RCCE_debug_synch)
fprintf(STDERR,"UE %d get data: %d from address %X \n", RCCE_IAM,*target,source);
// printf("UE %d finished the memcopy\n", RCCE_IAM);
// flush data to make sure it is visible to all threads; cannot use a flush list
// because it concerns malloced space
#ifdef _OPENMP
#pragma omp flush
#endif
return(RCCE_SUCCESS);
}
#ifdef USE_FLAG_EXPERIMENTAL
int RCCE_get_flag(
t_vcharp target, // target buffer, private memory
t_vcharp source, // source buffer, MPB ncm mapped
int num_bytes, // number of bytes to copy (must be multiple of cache line size
int ID // rank of source UE
) {
source = RCCE_flag_buffer[ID]+(source-RCCE_comm_buffer[RCCE_IAM]);
//memcpy((void*)target, (void*)source, num_bytes);
*target = *source;
if (RCCE_debug_synch)
fprintf(STDERR,"UE %d get flag: %x from address %X \n", RCCE_IAM,*target,source);
return(RCCE_SUCCESS);
}
#endif

382
hermit/usr/ircce/RCCE_lib.h Normal file
View file

@ -0,0 +1,382 @@
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef RCCE_LIB_H
#define RCCE_LIB_H
#include "RCCE.h"
#ifdef _OPENMP
#include <omp.h>
#endif
#include <string.h>
//#define AIR
#undef USE_FLAG_EXPERIMENTAL
#undef USE_RCCE_COMM
#undef USE_FAT_BARRIER
#undef USE_PIPELINE_FLAGS
#undef USE_PROBE_FLAGS
#undef USE_TAGGED_FLAGS
#undef USE_TAGGED_FOR_SHORT
#undef USE_REVERTED_FLAGS
#undef USE_REMOTE_PUT_LOCAL_GET
#undef USE_PROBE_FLAGS_SHORTCUT
#define USE_SYNCH_FOR_ZERO_BYTE
// override certain settings for SCC-MPICH:
//#include "scc-mpich-defs.h"
// adjust settings automatically?
#undef AUTO_ADJUST_SETTINGS
////////////////////////////////////////////////////////////////////////////////////////////////
#ifdef AUTO_ADJUST_SETTINGS
#ifdef SINGLEBITFLAGS
#ifdef USE_TAGGED_FLAGS
#warning TAGGED FLAGS CANNOT BE USED WITH SINGLEBITFLAGS! (#undef USE_TAGGED_FLAGS)
#undef USE_TAGGED_FLAGS
#undef USE_TAGGED_FOR_SHORT
#undef USE_PROBE_FLAGS_SHORTCUT
#endif
#ifdef USE_FAT_BARRIER
#warning FAT BARRIER CANNOT BE USED WITH SINGLEBITFLAGS! (#undef USE_FAT_BARRIER)
#undef USE_FAT_BARRIER
#endif
#endif
#ifdef USE_PROBE_FLAGS_SHORTCUT
#ifndef USE_PROBE_FLAGS
#warning THE PROBE FLAGS SHORTCUT REQUIRES PROBE FLAGS! (#define USE_PROBE_FLAGS)
#define USE_PROBE_FLAGS
#endif
#ifndef USE_TAGGED_FOR_SHORT
#warning THE PROBE FLAGS SHORTCUT REQUIRES TAGGED FLAGS! (#define USE_TAGGED_FLAGS)
#define USE_TAGGED_FLAGS
#endif
#endif
#ifdef USE_TAGGED_FOR_SHORT
#ifndef USE_TAGGED_FLAGS
#warning TAGGED SHORT MESSAGES REQUIRE TAGGED FLAGS! (#define USE_TAGGED_FLAGS)
#define USE_TAGGED_FLAGS
#endif
#endif
#ifdef USE_REMOTE_PUT_LOCAL_GET
#ifndef USE_PROBE_FLAGS
#warning PROBING FOR MESSAGES IN REMOTE-PUT/LOCAL-GET NEEDS ADDITIONAL PROBE FLAGS! (#define USE_PROBE_FLAGS)
#define USE_PROBE_FLAGS
#endif
#endif
#ifdef SCC_COUPLED_SYSTEMS
#ifndef USE_REVERTED_FLAGS
#ifdef USE_TAGGED_FLAGS
#warning COUPLED SYSTEMS REQUIRE REVERTED FLAGS WHEN USING TAGGED FLAGS! (#define USE_REVERTED_FLAGS)
#define USE_REVERTED_FLAGS
#endif
#endif
#ifndef USE_REMOTE_PUT_LOCAL_GET
#warning COUPLED SYSTEMS SHOULD USE REMOTE-PUT/LOCAL-GET! (#define USE_REMOTE_PUT_LOCAL_GET)
#define USE_REMOTE_PUT_LOCAL_GET
#endif
#else
#ifdef USE_PROBE_FLAGS
#warning NON-COUPLED SYSTEMS SHOULD NOT USE ADDITIONAL PROBE FLAGS! (#undef USE_PROBE_FLAGS)
#undef USE_PROBE_FLAGS
#endif
#endif
#ifdef USE_PROBE_FLAGS
#ifdef USE_FAT_BARRIER
#warning PROBABLY TOO LITTLE MPB SPACE FOR USING FAT BARRIER WITH PROBE FLAGS ENABLED! (#undef USE_FAT_BARRIER)
#undef USE_FAT_BARRIER
#endif
#endif
////////////////////////////////////////////////////////////////////////////////////////////////
#else // !AUTO_ADJUST_SETTINGS
#ifdef SINGLEBITFLAGS
#ifdef USE_TAGGED_FLAGS
#error TAGGED FLAGS CANNOT BE USED WITH SINGLEBITFLAGS! (#undef USE_TAGGED_FLAGS)
#endif
#undef USE_TAGGED_FLAGS
#undef USE_TAGGED_FOR_SHORT
#undef USE_PROBE_FLAGS_SHORTCUT
#ifdef USE_FAT_BARRIER
#error FAT BARRIER CANNOT BE USED WITH SINGLEBITFLAGS! (#undef USE_FAT_BARRIER)
#endif
#endif
#ifdef USE_PROBE_FLAGS_SHORTCUT
#ifndef USE_PROBE_FLAGS
#error THE PROBE FLAGS SHORTCUT REQUIRES PROBE FLAGS! (#define USE_PROBE_FLAGS)
#endif
#ifndef USE_TAGGED_FOR_SHORT
#error THE PROBE FLAGS SHORTCUT REQUIRES TAGGED FLAGS! (#define USE_TAGGED_FLAGS)
#endif
#endif
#ifdef USE_TAGGED_FOR_SHORT
#ifndef USE_TAGGED_FLAGS
#error TAGGED SHORT MESSAGES REQUIRE TAGGED FLAGS! (#define USE_TAGGED_FLAGS)
#endif
#endif
#ifdef USE_REMOTE_PUT_LOCAL_GET
#ifndef USE_PROBE_FLAGS
#warning PROBING FOR MESSAGES IN REMOTE-PUT/LOCAL-GET NEEDS ADDITIONAL PROBE FLAGS! (#define USE_PROBE_FLAGS)
#endif
#endif
#ifdef SCC_COUPLED_SYSTEMS
#ifdef USE_TAGGED_FLAGS
#ifndef USE_REVERTED_FLAGS
#error COUPLED SYSTEMS REQUIRE REVERTED FLAGS WHEN USING TAGGED FLAGS! (#define USE_REVERTED_FLAGS)
#endif
#endif
#ifndef USE_REMOTE_PUT_LOCAL_GET
#warning COUPLED SYSTEMS SHOULD USE REMOTE-PUT/LOCAL-GET! (#define USE_REMOTE_PUT_LOCAL_GET)
#endif
#else
#ifdef USE_PROBE_FLAGS
#warning NON-COUPLED SYSTEMS SHOULD NOT USE ADDITIONAL PROBE FLAGS! (#undef USE_PROBE_FLAGS)
#endif
#endif
#ifdef USE_PROBE_FLAGS
#ifdef USE_FAT_BARRIER
#warning PROBABLY TOO LITTLE MPB SPACE FOR USING FAT BARRIER WITH PROBE FLAGS ENABLED! (#undef USE_FAT_BARRIER)
#endif
#endif
#endif // !AUTO_ADJUST_SETTINGS
////////////////////////////////////////////////////////////////////////////////////////////////
/* PAD32byte is used to compute a cacheline padded length of n (input) bytes */
#define PAD32byte(n) ((n)%32==0 ? (n) : (n) + 32 - (n)%32)
//#define BITSPERCHAR 8
#define BOTH_IN_COMM_BUFFER 12
#define SOURCE_IN_PRIVATE_MEMORY 34
#define TARGET_IN_PRIVATE_MEMORY 56
#ifdef SINGLEBITFLAGS
#define RCCE_FLAGS_PER_BYTE 8
#else
#define RCCE_FLAGS_PER_BYTE 1
#endif
#define RCCE_FLAGS_PER_LINE (RCCE_LINE_SIZE*RCCE_FLAGS_PER_BYTE)
#define RCCE_SUM_INT (RCCE_SUM+(RCCE_NUM_OPS)*(RCCE_INT))
#define RCCE_SUM_LONG (RCCE_SUM+(RCCE_NUM_OPS)*(RCCE_LONG))
#define RCCE_SUM_FLOAT (RCCE_SUM+(RCCE_NUM_OPS)*(RCCE_FLOAT))
#define RCCE_SUM_DOUBLE (RCCE_SUM+(RCCE_NUM_OPS)*(RCCE_DOUBLE))
#define RCCE_MAX_INT (RCCE_MAX+(RCCE_NUM_OPS)*(RCCE_INT))
#define RCCE_MAX_LONG (RCCE_MAX+(RCCE_NUM_OPS)*(RCCE_LONG))
#define RCCE_MAX_FLOAT (RCCE_MAX+(RCCE_NUM_OPS)*(RCCE_FLOAT))
#define RCCE_MAX_DOUBLE (RCCE_MAX+(RCCE_NUM_OPS)*(RCCE_DOUBLE))
#define RCCE_MIN_INT (RCCE_MIN+(RCCE_NUM_OPS)*(RCCE_INT))
#define RCCE_MIN_LONG (RCCE_MIN+(RCCE_NUM_OPS)*(RCCE_LONG))
#define RCCE_MIN_FLOAT (RCCE_MIN+(RCCE_NUM_OPS)*(RCCE_FLOAT))
#define RCCE_MIN_DOUBLE (RCCE_MIN+(RCCE_NUM_OPS)*(RCCE_DOUBLE))
#define RCCE_PROD_INT (RCCE_PROD+(RCCE_NUM_OPS)*(RCCE_INT))
#define RCCE_PROD_LONG (RCCE_PROD+(RCCE_NUM_OPS)*(RCCE_LONG))
#define RCCE_PROD_FLOAT (RCCE_PROD+(RCCE_NUM_OPS)*(RCCE_FLOAT))
#define RCCE_PROD_DOUBLE (RCCE_PROD+(RCCE_NUM_OPS)*(RCCE_DOUBLE))
#define RCCE_COMM_INITIALIZED 45328976
#define RCCE_COMM_NOT_INITIALIZED -45328976
// auxiliary MPB pointer type
typedef volatile unsigned int* t_vintp;
// Also need dereferenced types
typedef volatile unsigned char t_vchar;
typedef volatile unsigned int t_vint;
typedef struct rcce_block {
t_vcharp space; // pointer to space for data in block
size_t free_size; // actual free space in block (0 or whole block)
size_t size; // size of an allocated block
struct rcce_block *next; // pointer to next block in circular linked list
} RCCE_BLOCK;
#if defined(SINGLEBITFLAGS) || defined(USE_BYTE_FLAGS)
typedef struct rcce_flag_line {
char flag[RCCE_FLAGS_PER_LINE];
t_vcharp line_address;
int members;
struct rcce_flag_line *next;
} RCCE_FLAG_LINE;
#endif
typedef struct {
RCCE_BLOCK *tail; // "last" block in linked list of blocks
} RCCE_BLOCK_S;
#ifdef AIR
#define FPGA_BASE 0xf9000000
#define BACKOFF_MIN 8
#define BACKOFF_MAX 256
typedef volatile struct _RCCE_AIR {
int * counter;
int * init;
} RCCE_AIR;
#endif
#ifndef GORY
extern RCCE_FLAG RCCE_sent_flag[RCCE_MAXNP];
extern RCCE_FLAG RCCE_ready_flag[RCCE_MAXNP];
#ifdef USE_PIPELINE_FLAGS
extern RCCE_FLAG RCCE_sent_flag_pipe[RCCE_MAXNP];
extern RCCE_FLAG RCCE_ready_flag_pipe[RCCE_MAXNP];
#endif
#ifdef USE_PROBE_FLAGS
extern RCCE_FLAG RCCE_probe_flag[RCCE_MAXNP];
#endif
extern t_vcharp RCCE_buff_ptr;
extern size_t RCCE_chunk;
extern t_vcharp RCCE_flags_start;
#ifndef USE_REMOTE_PUT_LOCAL_GET
extern RCCE_SEND_REQUEST* RCCE_send_queue;
extern RCCE_RECV_REQUEST* RCCE_recv_queue[RCCE_MAXNP];
#else
extern RCCE_SEND_REQUEST* RCCE_send_queue[RCCE_MAXNP];
extern RCCE_RECV_REQUEST* RCCE_recv_queue;
#endif
#endif
//#ifdef USE_FLAG_EXPERIMENTAL
extern t_vcharp RCCE_flag_buffer[RCCE_MAXNP];
//#endif
#ifndef __hermit__
extern t_vcharp RCCE_fool_write_combine_buffer;
#endif
extern t_vcharp RCCE_comm_buffer[RCCE_MAXNP];
extern int RCCE_NP;
extern int RCCE_BUFF_SIZE;
#ifndef COPPERRIDGE
extern omp_lock_t RCCE_corelock[RCCE_MAXNP];
extern t_vchar RC_comm_buffer[RCCE_MAXNP*RCCE_BUFF_SIZE_MAX];
extern t_vchar RC_shm_buffer[RCCE_SHM_SIZE_MAX];
#endif
extern int RC_MY_COREID;
extern int RC_COREID[RCCE_MAXNP];
extern double RC_REFCLOCKGHZ;
extern int RCCE_IAM;
extern int RCCE_debug_synch;
extern int RCCE_debug_comm;
extern int RCCE_debug_debug;
extern int RCCE_debug_RPC;
#ifdef SINGLEBITFLAGS
extern RCCE_FLAG_LINE RCCE_flags;
extern int WORDSIZE;
extern int LEFTMOSTBIT;
RCCE_FLAG_STATUS RCCE_bit_value(t_vcharp, int);
RCCE_FLAG_STATUS RCCE_flip_bit_value(t_vcharp, int);
int RCCE_write_bit_value(t_vcharp, int, RCCE_FLAG_STATUS);
#endif
extern int RCCE_comm_init_val;
void RCCE_malloc_init(t_vcharp, size_t);
void RCCE_shmalloc_init(t_vcharp, size_t);
int RCCE_qsort(char *, size_t, size_t, int (*)(const void*, const void*));
int id_compare(const void *, const void *);
#if 0
int RCCE_probe(RCCE_FLAG);
#endif
int RCCE_error_return(int, int);
#ifdef __hermit__
#define RC_cache_invalidate() {}
#else
void RC_cache_invalidate(void);
#endif
int RCCE_acquire_treelock(RCCE_COMM*);
int RCCE_release_treelock(RCCE_COMM*);
int RCCE_TNS_barrier(RCCE_COMM*);
int RCCE_acquire_lock(int);
int RCCE_try_lock(int);
int RCCE_backoff_lock(int);
int RCCE_release_lock(int);
int RCCE_global_color(int, void *);
t_vcharp RC_COMM_BUFFER_START(int);
//#ifdef USE_FLAG_EXPERIMENTAL
t_vcharp RC_FLAG_BUFFER_START(int);
//#endif
#ifndef GORY
t_vcharp RCCE_malloc(size_t);
t_vcharp RCCE_malloc_request(size_t, size_t *);
t_vcharp RCCE_palloc(size_t, int);
void RCCE_free(t_vcharp);
int RCCE_put(t_vcharp, t_vcharp, int, int);
int RCCE_get(t_vcharp, t_vcharp, int, int);
int RCCE_wait_until(RCCE_FLAG, RCCE_FLAG_STATUS);
int RCCE_test_flag(RCCE_FLAG, RCCE_FLAG_STATUS, int *);
int RCCE_flag_alloc(RCCE_FLAG *);
int RCCE_flag_free(RCCE_FLAG *);
int RCCE_flag_write(RCCE_FLAG *, RCCE_FLAG_STATUS, int);
int RCCE_flag_read(RCCE_FLAG, RCCE_FLAG_STATUS *, int);
#ifdef USE_FLAG_EXPERIMENTAL
int RCCE_put_flag(t_vcharp, t_vcharp, int, int);
int RCCE_get_flag(t_vcharp, t_vcharp, int, int);
#endif
#ifdef USE_TAGGED_FLAGS
int RCCE_flag_write_tagged(RCCE_FLAG *, RCCE_FLAG_STATUS, int, void*, int);
int RCCE_flag_read_tagged(RCCE_FLAG, RCCE_FLAG_STATUS *, int, void*, int);
int RCCE_wait_tagged(RCCE_FLAG, RCCE_FLAG_STATUS, void *, int);
int RCCE_test_tagged(RCCE_FLAG, RCCE_FLAG_STATUS, int *, void *, int);
#endif
#endif
#ifdef _OPENMP
#pragma omp threadprivate (RC_COREID, RC_MY_COREID, RC_REFCLOCKGHZ)
#pragma omp threadprivate (RCCE_comm_buffer)
#pragma omp threadprivate (RCCE_BUFF_SIZE)
#pragma omp threadprivate (RCCE_IAM, RCCE_NP)
#pragma omp threadprivate (RCCE_debug_synch, RCCE_debug_comm, RCCE_debug_debug)
#ifdef SINGLEBITFLAGS
#pragma omp threadprivate (RCCE_flags, WORDSIZE, LEFTMOSTBIT)
#endif
#ifndef GORY
#pragma omp threadprivate (RCCE_send_queue, RCCE_recv_queue)
#pragma omp threadprivate (RCCE_sent_flag, RCCE_ready_flag)
#ifdef USE_PROBE_FLAGS
#pragma omp threadprivate (RCCE_probe_flag)
#endif
#ifdef USE_PIPELINE_FLAGS
#pragma omp threadprivate (RCCE_sent_flag_pipe, RCCE_ready_flag_pipe)
#endif
#pragma omp threadprivate (RCCE_buff_ptr, RCCE_chunk)
#pragma omp threadprivate (RCCE_flags_start)
#endif
#endif
#ifdef SHMADD
unsigned int getCOREID();
unsigned int readTILEID();
unsigned int readLUT(unsigned int);
void writeLUT(unsigned int, unsigned int);
#endif
#endif

View file

@ -0,0 +1,255 @@
//***************************************************************************************
// MPB memory allocation routines.
//***************************************************************************************
//
// Author: Rob F. Van der Wijngaart
// Intel Corporation
// Date: 008/30/2010
//
//***************************************************************************************
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "RCCE_lib.h"
//......................................................................................
// GLOBAL VARIABLES USED BY THE LIBRARY
//......................................................................................
static RCCE_BLOCK_S RCCE_space; // data structure used for trscking MPB memory blocks
static RCCE_BLOCK_S *RCCE_spacep; // pointer to RCCE_space
#ifdef _OPENMP
#pragma omp threadprivate (RCCE_space, RCCE_spacep)
#endif
// END GLOBAL VARIABLES USED BY THE LIBRARY
//......................................................................................
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_malloc_init
//--------------------------------------------------------------------------------------
// initialize memory allocator
//--------------------------------------------------------------------------------------
void RCCE_malloc_init(
t_vcharp mem, // pointer to MPB space that is to be managed by allocator
size_t size // size (bytes) of managed space
) {
#ifndef GORY
// in the simplified API MPB memory allocation merely uses running pointers
RCCE_flags_start = mem;
RCCE_chunk = size;
RCCE_buff_ptr = mem;
#else
// create one block containing all memory for truly dynamic memory allocator
RCCE_spacep = &RCCE_space;
RCCE_spacep->tail = (RCCE_BLOCK *) malloc(sizeof(RCCE_BLOCK));
RCCE_spacep->tail->free_size = size;
RCCE_spacep->tail->space = mem;
/* make a circular list by connecting tail to itself */
RCCE_spacep->tail->next = RCCE_spacep->tail;
#endif
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_malloc
//--------------------------------------------------------------------------------------
// Allocate memory inside MPB. In restricted mode we only use it to allocate new
// flags prompted by the creation of new communicators. Since communicators are never
// deleted, we do not need to deallocate MPB memory, so we can simply keep running
// pointers of where the next flag will be stored, and where payload data can go. In
// GORY mode we need to support fully dynamic memory allocation and deallocation.
//--------------------------------------------------------------------------------------
t_vcharp RCCE_malloc(
size_t size // requested space
) {
t_vcharp result;
#ifndef GORY
// new flag takes exactly one cache line, whether using single bit flags are not
if (size != RCCE_LINE_SIZE) {
fprintf(stderr, "ERROR in RCCE_malloc(): size != RCCE_LINE_SIZE!\n");
exit(-1);
return(0);
}
// if chunk size becomes zero, we have allocated too many flags
if (!(RCCE_chunk-RCCE_LINE_SIZE)) {
fprintf(stderr, "ERROR in RCCE_malloc(): No more MPB space left!\n");
exit(-1);
return(0);
}
result = RCCE_flags_start;
// reduce maximum size of message payload chunk
RCCE_chunk -= RCCE_LINE_SIZE;
// move running pointer to next available flags line
RCCE_flags_start += RCCE_LINE_SIZE;
// move running pointer to new start of payload data area
RCCE_buff_ptr += RCCE_LINE_SIZE;
return(result);
#else
// simple memory allocator, loosely based on public domain code developed by
// Michael B. Allen and published on "The Scripts--IT /Developers Network".
// Approach:
// - maintain linked list of pointers to memory. A block is either completely
// malloced (free_size = 0), or completely free (free_size > 0).
// The space field always points to the beginning of the block
// - malloc: traverse linked list for first block that has enough space
// - free: Check if pointer exists. If yes, check if the new block should be
// merged with neighbors. Could be one or two neighbors.
RCCE_BLOCK *b1, *b2, *b3; // running pointers for blocks
if (size==0 || size%RCCE_LINE_SIZE!=0) return 0;
// always first check if the tail block has enough space, because that
// is the most likely. If it does and it is exactly enough, we still
// create a new block that will be the new tail, whose free space is
// zero. This acts as a marker of where free space of predecessor ends
b1 = RCCE_spacep->tail;
if (b1->free_size >= size) {
// need to insert new block; new order is: b1->b2 (= new tail)
b2 = (RCCE_BLOCK *) malloc(sizeof(RCCE_BLOCK));
b2->next = b1->next;
b1->next = b2;
b2->free_size = b1->free_size-size;
b2->space = b1->space + size;
b1->free_size = 0;
// need to update the tail
RCCE_spacep->tail = b2;
return(b1->space);
}
// tail didn't have enough space; loop over whole list from beginning
while (b1->next->free_size < size) {
if (b1->next == RCCE_spacep->tail) {
return NULL; // we came full circle
}
b1 = b1->next;
}
b2 = b1->next;
if (b2->free_size > size) { // split block; new block order: b1->b2->b3
b3 = (RCCE_BLOCK *) malloc(sizeof(RCCE_BLOCK));
b3->next = b2->next; // reconnect pointers to add block b3
b2->next = b3; // " " " " " "
b3->free_size = b2->free_size - size; // b3 gets remainder free space
b3->space = b2->space + size; // need to shift space pointer
}
b2->free_size = 0; // block b2 is completely used
return (b2->space);
#endif
}
t_vcharp RCCE_palloc(
size_t size, // requested space
int CoreID // location
) {
t_vcharp result = RCCE_malloc(size);
if (result)
result = RCCE_comm_buffer[CoreID]+(result-RCCE_comm_buffer[RCCE_IAM]);
return result;
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_free
//--------------------------------------------------------------------------------------
// Deallocate memory in MPB; only used in GORY mode
//--------------------------------------------------------------------------------------
void RCCE_free(
t_vcharp ptr // pointer to data to be freed
) {
RCCE_BLOCK *b1, *b2, *b3; // running block pointers
int j1, j2; // booleans determining merging of blocks
// loop over whole list from the beginning until we locate space ptr
b1 = RCCE_spacep->tail;
while (b1->next->space != ptr && b1->next != RCCE_spacep->tail) {
b1 = b1->next;
}
// b2 is target block whose space must be freed
b2 = b1->next;
// tail either has zero free space, or hasn't been malloc'ed
if (b2 == RCCE_spacep->tail) return;
// reset free space for target block (entire block)
b3 = b2->next;
b2->free_size = b3->space - b2->space;
// determine with what non-empty blocks the target block can be merged
j1 = (b1->free_size>0 && b1!=RCCE_spacep->tail); // predecessor block
j2 = (b3->free_size>0 || b3==RCCE_spacep->tail); // successor block
if (j1) {
if (j2) { // splice all three blocks together: (b1,b2,b3) into b1
b1->next = b3->next;
b1->free_size += b3->free_size + b2->free_size;
if (b3==RCCE_spacep->tail) RCCE_spacep->tail = b1;
free(b3);
}
else { // only merge (b1,b2) into b1
b1->free_size += b2->free_size;
b1->next = b3;
}
free(b2);
}
else {
if (j2) { // only merge (b2,b3) into b2
b2->next = b3->next;
b2->free_size += b3->free_size;
if (b3==RCCE_spacep->tail) RCCE_spacep->tail = b2;
free(b3);
}
}
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_malloc_request
//--------------------------------------------------------------------------------------
// this function tries to return a (padded) amount of space in the MPB of size
// "size" bytes. If not available, the function keeps halving space until it fits
//--------------------------------------------------------------------------------------
t_vcharp RCCE_malloc_request(
size_t size, // requested number of bytes
size_t *chunk // number of bytes of space returned
) {
t_vcharp combuf;
combuf = 0;
*chunk = PAD32byte(size);
while (!combuf && *chunk >= RCCE_LINE_SIZE) {
combuf = RCCE_malloc(*chunk);
if (!combuf) *chunk = PAD32byte(*chunk/2);
}
return (combuf);
}

165
hermit/usr/ircce/RCCE_put.c Normal file
View file

@ -0,0 +1,165 @@
//***************************************************************************************
// Put data into communication buffer.
//***************************************************************************************
//
// Author: Rob F. Van der Wijngaart
// Intel Corporation
// Date: 008/30/2010
//
//***************************************************************************************
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// [2010-11-03] switched to SCC-optimized memcpy() functions in scc_memcpy.h:
// - memcpy_to_mpb()
// - memcpy_from_mpb()
// by Stefan Lankes, Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
#include "RCCE_lib.h"
#if defined(COPPERRIDGE) && !defined(__hermit__)
#include "scc_memcpy.h"
#endif
void *RCCE_memcpy_put(void *dest, const void *src, size_t count)
{ // function wrapper for external usage of improved memcpy()...
#if defined(COPPERRIDGE) && !defined(__hermit__)
return memcpy_to_mpb(dest, src, count);
#else
return memcpy(dest, src, count);
#endif
}
#if defined(COPPERRIDGE) && !defined(__hermit__)
#define RCCE_memcpy_put(a,b,c) memcpy_to_mpb(a, b, c)
#else
#define RCCE_memcpy_put(a,b,c) memcpy(a, b, c)
#endif
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_put
//--------------------------------------------------------------------------------------
// copy data from address "source" in the local MPB or the calling UE's private memory
// to address "target" in the remote MPB. We do not test to see if a move from the
// calling UE's private memory stays within allocated memory
//--------------------------------------------------------------------------------------
int RCCE_put(
t_vcharp target, // target buffer, MPB
t_vcharp source, // source buffer, MPB or private memory
int num_bytes,
int ID
) {
#ifdef GORY
// we only need to do tests in GORY mode; in non-GORY mode ths function is never
// called by the user, but only be the library
int copy_mode;
// check validity of parameters
if (!target) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_TARGET));
if (!source) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_SOURCE));
if (ID<0 ||
ID>=RCCE_NP) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
if (num_bytes < 0 || num_bytes%RCCE_LINE_SIZE!=0)
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_MESSAGE_LENGTH));
// determine if target data is in MPB; check using local buffer boundaries
if (target - RCCE_comm_buffer[RCCE_IAM]>=0 &&
target+num_bytes - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<=0)
// shift target address to point to remote MPB
target = RCCE_comm_buffer[ID]+(target-RCCE_comm_buffer[RCCE_IAM]);
else return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_TARGET));
// source can be either local MPB or private memory
if (source - RCCE_comm_buffer[RCCE_IAM] >= 0 &&
source+num_bytes - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<=0)
copy_mode = BOTH_IN_COMM_BUFFER;
else
copy_mode = SOURCE_IN_PRIVATE_MEMORY;
// make sure that if the copy is between locations within the same MPB
// there is no overlap between source and target address ranges
if ( copy_mode == BOTH_IN_COMM_BUFFER) {
if (((source-target)>0 && (source+num_bytes-target)<0) ||
((target-source)>0 && (target+num_bytes-source)<0)) {
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_DATA_OVERLAP));
}
}
// ascertain that the start of the buffer is cache line aligned
int start_index = target-RCCE_comm_buffer[ID];
if (start_index%RCCE_LINE_SIZE!=0)
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ALIGNMENT));
// only verify alignment of the target if it is in the MPB
if (copy_mode == BOTH_IN_COMM_BUFFER) {
start_index = source-RCCE_comm_buffer[ID];
if (start_index%RCCE_LINE_SIZE!=0)
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ALIGNMENT));
}
#else
// in non-GORY mode we only need to retain the MPB target shift; we
// already know the target is in the MPB, not private memory
target = RCCE_comm_buffer[ID]+(target-RCCE_comm_buffer[RCCE_IAM]);
#endif
// make sure that any data that has been put in our MPB by another UE is visible
#ifdef _OPENMP
#pragma omp flush
#endif
// do the actual copy
RC_cache_invalidate();
RCCE_memcpy_put((void *)target, (void *)source, num_bytes);
// flush data to make it visible to all threads; cannot use flush list because it
// concerns malloced space
#ifdef _OPENMP
#pragma omp flush
#endif
#ifdef USE_FLAG_EXPERIMENTAL
if(RCCE_debug_synch)
fprintf(STDERR,"UE %d put data: %d address %X \n", RCCE_IAM,*source,target);
#endif
return(RCCE_SUCCESS);
}
#ifdef USE_FLAG_EXPERIMENTAL
int RCCE_put_flag(
t_vcharp target, // target buffer, MPB
t_vcharp source, // source buffer, MPB or private memory
int num_bytes,
int ID
) {
target = RCCE_flag_buffer[ID]+(target-RCCE_comm_buffer[RCCE_IAM]);
if (RCCE_debug_synch)
fprintf(STDERR,"UE %d put flag: %x address %X \n", RCCE_IAM,*source,target);
//if( num_bytes == 1 ) {
*target = *source;
return(RCCE_SUCCESS);
//}
//RCCE_memcpy_put((void *)target, (void *)source, 1);
//*RCCE_fool_write_combine_buffer = 1;
//return(RCCE_ERROR_DEBUG_FLAG);
}
#endif

View file

@ -0,0 +1,131 @@
//***************************************************************************************
// Sorting-related routines
//***************************************************************************************
//
// Author: Rob F. Van der Wijngaart
// Intel Corporation
// Date: 008/30/2010
//
//***************************************************************************************
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//--------------------------------------------------------------------------------------
// FUNCTION: id_compare
//--------------------------------------------------------------------------------------
// comparison function used in routine to sort core IDs
//--------------------------------------------------------------------------------------
int id_compare(
const void *e1, // first element to be compared
const void *e2 // second element to be compared
) {
int v1 = *(int *)e1;
int v2 = *(int *)e2;
return(v1<v2) ? -1 : (v1>v2) ? 1 : 0;
}
// qsort -- qsort interface implemented by faster quicksort.
// J. L. Bentley and M. D. McIlroy, SPE 23 (1993) 1249-1265.
// Copyright 1993, John Wiley.
/*assume sizeof(long) is a power of 2 */
#define SWAPINIT(a, es) swaptype = \
(a-(char*)0 | es) % sizeof(long) ? 2 : es > sizeof(long);
#define swapcode(TYPE, parmi, parmj, n) { \
register TYPE *pi = (TYPE *) (parmi); \
register TYPE *pj = (TYPE *) (parmj); \
do { \
register TYPE t = *pi; \
*pi++ = *pj; \
*pj++ = t; \
} while ((n -= sizeof(TYPE)) > 0); \
}
#include <stddef.h>
static void swapfunc(char *a, char *b, size_t n, int swaptype)
{ if (swaptype <= 1) swapcode(long, a, b, n)
else swapcode(char, a, b, n)
}
#define swap(a, b) \
if (swaptype == 0) { \
t = *(long*)(a); \
*(long*)(a) = *(long*)(b); \
*(long*)(b) = t; \
} else \
swapfunc(a, b, es, swaptype)
#define PVINIT(pv, pm) \
if (swaptype != 0) { pv = a; swap(pv, pm); } \
else { pv = (char*)&v; *(long*)pv = *(long*)pm; }
#define vecswap(a, b, n) if (n > 0) swapfunc(a, b, n, swaptype)
#define min(x, y) ((x)<=(y) ? (x) : (y))
static char *med3(char *a, char *b, char *c, int (*cmp)(const void*, const void*))
{ return cmp(a, b) < 0 ?
(cmp(b, c) < 0 ? b : cmp(a, c) < 0 ? c : a)
: (cmp(b, c) > 0 ? b : cmp(a, c) > 0 ? c : a);
}
void RCCE_qsort(char *a, size_t n, size_t es, int (*cmp)(const void*, const void*))
{
char *pa, *pb, *pc, *pd, *pl, *pm, *pn, *pv;
int r, swaptype;
long t, v;
size_t s;
SWAPINIT(a, es);
if (n < 7) { /* Insertion sort on smallest arrays */
for (pm = a + es; pm < a + n*es; pm += es)
for (pl = pm; pl > a && cmp(pl-es, pl) > 0; pl -= es)
swap(pl, pl-es);
return;
}
pm = a + (n/2)*es; /* Small arrays, middle element */
if (n > 7) {
pl = a;
pn = a + (n-1)*es;
if (n > 40) { /* Big arrays, pseudomedian of 9 */
s = (n/8)*es;
pl = med3(pl, pl+s, pl+2*s, cmp);
pm = med3(pm-s, pm, pm+s, cmp);
pn = med3(pn-2*s, pn-s, pn, cmp);
}
pm = med3(pl, pm, pn, cmp); /* Mid-size, med of 3 */
}
PVINIT(pv, pm); /* pv points to partition value */
pa = pb = a;
pc = pd = a + (n-1)*es;
for (;;) {
while (pb <= pc && (r = cmp(pb, pv)) <= 0) {
if (r == 0) { swap(pa, pb); pa += es; }
pb += es;
}
while (pb <= pc && (r = cmp(pc, pv)) >= 0) {
if (r == 0) { swap(pc, pd); pd -= es; }
pc -= es;
}
if (pb > pc) break;
swap(pb, pc);
pb += es;
pc -= es;
}
pn = a + n*es;
s = min(pa-a, pb-pa ); vecswap(a, pb-s, s);
s = min(pd-pc, pn-pd-es); vecswap(pb, pn-s, s);
if ((s = pb-pa) > es) RCCE_qsort(a, s/es, es, cmp);
if ((s = pd-pc) > es) RCCE_qsort(pn-s, s/es, es, cmp);
}

1350
hermit/usr/ircce/RCCE_recv.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,179 @@
//***************************************************************************************
// Reduction functions.
//***************************************************************************************
// Since reduction is the only message passing operation that depends on the data type,
// it is carried as a parameter. Also, since only collective operations require
// communication domains, they are the only ones that use communicators. All collectives
// implementations are naive, linear operations. There may not be any overlap between
// target and source.
//
// Author: Rob F. Van der Wijngaart
// Intel Corporation
// Date: 008/30/2010
//
//**************************************************************************************
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "RCCE_lib.h"
#define MIN(x,y) ( (x) < (y) ? (x) : (y) )
#define MAX(x,y) ( (x) > (y) ? (x) : (y) )
#include <stdlib.h>
#include <string.h>
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_reduce_general
//--------------------------------------------------------------------------------------
// function used to implement both reduce and allreduce
//--------------------------------------------------------------------------------------
static int RCCE_reduce_general(
char *inbuf, // source buffer for reduction datan
char *outbuf, // target buffer for reduction data
int num, // number of data elements to be reduced
int type, // type of data elements
int op, // reduction operation
int root, // root of reduction tree, used for all reductions
int all, // if 1, use allreduce, if 0, use reduce
RCCE_COMM comm // communication domain within which to reduce
) {
int ue, i, type_size, ierr;
int *iin, *iout;
long *lin, *lout;
float *fin, *fout;
double *din, *dout;
// create aliases for source and target buffers to simplify arithmetic operations
iin = (int *) inbuf; iout = (int *) outbuf;
lin = (long *) inbuf; lout = (long *) outbuf;
fin = (float *) inbuf; fout = (float *) outbuf;
din = (double *) inbuf; dout = (double *) outbuf;
#ifdef GORY
printf("Reduction only implemented for non-gory API\n");
return(1);
#else
switch (op) {
case RCCE_SUM:
case RCCE_MAX:
case RCCE_MIN:
case RCCE_PROD: break;
default: return(RCCE_ERROR_ILLEGAL_OP);
}
switch (type) {
case RCCE_INT: type_size = sizeof(int);
break;
case RCCE_LONG: type_size = sizeof(long);
break;
case RCCE_FLOAT: type_size = sizeof(float);
break;
case RCCE_DOUBLE: type_size = sizeof(double);
break;
default: return(RCCE_ERROR_ILLEGAL_TYPE);
}
if (RCCE_IAM != comm.member[root]) {
// non-root UEs send their source buffers to the root
if (ierr=RCCE_send(inbuf, num*type_size, comm.member[root]))
return(ierr);
// in case of allreduce they also receive the reduced buffer
if (all) if (ierr=RCCE_recv(outbuf, num*type_size, comm.member[root]))
return(ierr);
}
else {
// the root can copy directly from source to target buffer
memcpy(outbuf, inbuf, num*type_size);
for (ue=0; ue<comm.size; ue++) if (ue != root) {
if (ierr=RCCE_recv(inbuf, num*type_size, comm.member[ue]))
return(ierr);
// use combination of operation and data type to reduce number of switch statements
switch (op+(RCCE_NUM_OPS)*(type)) {
case RCCE_SUM_INT: for (i=0; i<num; i++) iout[i] += iin[i]; break;
case RCCE_MAX_INT: for (i=0; i<num; i++) iout[i] = MAX(iout[i],iin[i]); break;
case RCCE_MIN_INT: for (i=0; i<num; i++) iout[i] = MIN(iout[i],iin[i]); break;
case RCCE_PROD_INT: for (i=0; i<num; i++) iout[i] *= iin[i]; break;
case RCCE_SUM_LONG: for (i=0; i<num; i++) lout[i] += lin[i]; break;
case RCCE_MAX_LONG: for (i=0; i<num; i++) lout[i] = MAX(lout[i],lin[i]); break;
case RCCE_MIN_LONG: for (i=0; i<num; i++) lout[i] = MIN(lout[i],lin[i]); break;
case RCCE_PROD_LONG: for (i=0; i<num; i++) lout[i] *= lin[i]; break;
case RCCE_SUM_FLOAT: for (i=0; i<num; i++) fout[i] += fin[i]; break;
case RCCE_MAX_FLOAT: for (i=0; i<num; i++) fout[i] = MAX(fout[i],fin[i]); break;
case RCCE_MIN_FLOAT: for (i=0; i<num; i++) fout[i] = MIN(fout[i],fin[i]); break;
case RCCE_PROD_FLOAT: for (i=0; i<num; i++) fout[i] *= fin[i]; break;
case RCCE_SUM_DOUBLE: for (i=0; i<num; i++) dout[i] += din[i]; break;
case RCCE_MAX_DOUBLE: for (i=0; i<num; i++) dout[i] = MAX(dout[i],din[i]); break;
case RCCE_MIN_DOUBLE: for (i=0; i<num; i++) dout[i] = MIN(dout[i],din[i]); break;
case RCCE_PROD_DOUBLE: for (i=0; i<num; i++) dout[i] *= din[i]; break;
}
}
// in case of allreduce the root sends the reduction results to all non-root UEs
if (all) for (ue=0; ue<comm.size; ue++) if (ue != root)
if(ierr=RCCE_send(outbuf, num*type_size, comm.member[ue]))
return(ierr);
}
return(RCCE_SUCCESS);
#endif GORY
}
//---------------------------------------------------------------------------------------
// FUNCTION: RCCE_allreduce
//---------------------------------------------------------------------------------------
// Reduction function which delivers the reduction results to all participating UEs
//---------------------------------------------------------------------------------------
int RCCE_allreduce(
char *inbuf, // source buffer for reduction datan
char *outbuf, // target buffer for reduction data
int num, // number of data elements to be reduced
int type, // type of data elements
int op, // reduction operation
RCCE_COMM comm // communication domain within which to reduce
){
int root = 0, all = 1;
return(RCCE_error_return(RCCE_debug_comm,
RCCE_reduce_general(inbuf, outbuf, num, type, op, root, all, comm)));
}
//---------------------------------------------------------------------------------------
// FUNCTION: RCCE_reduce
//---------------------------------------------------------------------------------------
// Reduction function which delivers the reduction results to UE root
//---------------------------------------------------------------------------------------
int RCCE_reduce(
char *inbuf, // source buffer for reduction datan
char *outbuf, // target buffer for reduction data
int num, // number of data elements to be reduced
int type, // type of data elements
int op, // reduction operation
int root, // member of "comm" receiving reduction results
RCCE_COMM comm // communication domain within which to reduce
){
int ue, all = 0;
// check to make sure root is member of the communicator
if (root<0 || root >= comm.size)
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
return(RCCE_error_return(RCCE_debug_comm,
RCCE_reduce_general(inbuf, outbuf, num, type, op, root, all, comm)));
}

View file

@ -0,0 +1,992 @@
//***************************************************************************************
// Synchronized receive routines.
//***************************************************************************************
//
// Author: Rob F. Van der Wijngaart
// Intel Corporation
// Date: 008/30/2010
//
//***************************************************************************************
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// [2010-10-25] added support for non-blocking send/recv operations
// - RCCE_isend(), ..._test(), ..._wait(), ..._push()
// - RCCE_irecv(), ..._test(), ..._wait(), ..._push()
// by Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
// [2012-09-10] added support for "tagged" flags
// by Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
#include "RCCE_lib.h"
#if defined(COPPERRIDGE) && !defined(__hermit__)
#include "scc_memcpy.h"
#else
#define memcpy_scc memcpy
#endif
#include <stdlib.h>
#include <string.h>
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_send_general
//--------------------------------------------------------------------------------------
// Synchronized send function (gory and non-gory mode)
//--------------------------------------------------------------------------------------
static int RCCE_send_general(
char *privbuf, // source buffer in local private memory (send buffer)
t_vcharp combuf, // intermediate buffer in MPB
size_t chunk, // size of MPB available for this message (bytes)
RCCE_FLAG *ready, // flag indicating whether receiver is ready
RCCE_FLAG *sent, // flag indicating whether message has been sent by source
size_t size, // size of message (bytes)
int dest, // UE that will receive the message
int copy, // set to 0 for synchronization only (no copying/sending)
int pipe, // use pipelining?
int mcast, // multicast?
void* tag, // additional tag?
int len, // length of additional tag
RCCE_FLAG *probe // flag for probing for incoming messages
) {
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
size_t wsize, // offset within send buffer when putting in "chunk" bytes
remainder, // bytes remaining to be sent
nbytes; // number of bytes to be sent in single RCCE_put call
char *bufptr; // running pointer inside privbuf for current location
#ifdef USE_REMOTE_PUT_LOCAL_GET
if(mcast) return(RCCE_error_return(1, RCCE_ERROR_NO_MULTICAST_SUPPORT));
#endif
if(probe)
#ifdef USE_TAGGED_FLAGS
RCCE_flag_write_tagged(probe, RCCE_FLAG_SET, dest, tag, len);
#else
RCCE_flag_write(probe, RCCE_FLAG_SET, dest);
#endif
#ifdef USE_SYNCH_FOR_ZERO_BYTE
// synchronize even in case of zero byte messages:
if(size == 0) {
#ifdef USE_REMOTE_PUT_LOCAL_GET
RCCE_wait_until(*ready, RCCE_FLAG_SET);
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
#ifdef USE_TAGGED_FLAGS
if(!probe)
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
else
#endif
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
#else // LOCAL PUT / REMOTE GET: (standard)
#ifdef USE_TAGGED_FLAGS
if(!probe)
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
else
#endif
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
RCCE_wait_until(*ready, RCCE_FLAG_SET);
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
#endif // !USE_REMOTE_PUT_LOCAL_GET
return(RCCE_SUCCESS);
}
#endif // USE_SYNCH_FOR_ZERO_BYTE
if(!pipe) {
// send data in units of available chunk size of comm buffer
for (wsize=0; wsize< (size/chunk)*chunk; wsize+=chunk) {
bufptr = privbuf + wsize;
nbytes = chunk;
#ifdef USE_REMOTE_PUT_LOCAL_GET
RCCE_wait_until(*ready, RCCE_FLAG_SET);
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
// copy private data to remote comm buffer
if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, dest);
#ifdef USE_TAGGED_FLAGS
if( (wsize == 0) && (!probe) )
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
else
#endif
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
#else // LOCAL PUT / REMOTE GET: (standard)
// copy private data to own comm buffer
if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, RCCE_IAM);
if(!mcast) {
#ifdef USE_TAGGED_FLAGS
if( (wsize == 0) && (!probe) )
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
else
#endif
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
// wait for the destination to be ready to receive a message
RCCE_wait_until(*ready, RCCE_FLAG_SET);
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
}
else {
RCCE_TNS_barrier(&RCCE_COMM_WORLD);
RCCE_TNS_barrier(&RCCE_COMM_WORLD);
}
#endif // !USE_REMOTE_PUT_LOCAL_GET
} // for
}
else // if(!pipe) -> if(pipe)
{
// pipelined version of send/recv:
size_t subchunk1, subchunk2;
for(wsize = 0; wsize < (size/chunk)*chunk; wsize+=chunk) {
if(wsize == 0) {
// allign sub-chunks to cache line granularity:
subchunk1 = ( (chunk / 2) / RCCE_LINE_SIZE ) * RCCE_LINE_SIZE;
subchunk2 = chunk - subchunk1;
}
bufptr = privbuf + wsize;
nbytes = subchunk1;
#ifdef USE_REMOTE_PUT_LOCAL_GET
RCCE_wait_until(*ready, RCCE_FLAG_SET);
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
// copy private data chunk 1 to remote comm buffer
if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, dest);
#ifdef USE_TAGGED_FLAGS
if( (wsize == 0) && (!probe) )
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
else
#endif
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
#else // LOCAL PUT / REMOTE GET: (standard)
// copy private data chunk 1 to own comm buffer
if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, RCCE_IAM);
#ifdef USE_TAGGED_FLAGS
if( (wsize == 0) && (!probe) )
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
else
#endif
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
RCCE_wait_until(*ready, RCCE_FLAG_SET);
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
#endif // !USE_REMOTE_PUT_LOCAL_GET
bufptr = privbuf + wsize + subchunk1;
nbytes = subchunk2;
#ifdef USE_REMOTE_PUT_LOCAL_GET
RCCE_wait_until(*ready, RCCE_FLAG_SET);
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
// copy private data chunk 2 to remote comm buffer
if(copy) RCCE_put(combuf + subchunk1, (t_vcharp) bufptr, nbytes, dest);
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
#else // LOCAL PUT / REMOTE GET: (standard)
// copy private data chunk 2 to own comm buffer
if(copy) RCCE_put(combuf + subchunk1, (t_vcharp) bufptr, nbytes, RCCE_IAM);
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
RCCE_wait_until(*ready, RCCE_FLAG_SET);
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
#endif // !USE_REMOTE_PUT_LOCAL_GET
} //for
} // if(pipe)
remainder = size%chunk;
// if nothing is left over, we are done
if (!remainder) return(RCCE_SUCCESS);
// send remainder of data--whole cache lines
bufptr = privbuf + (size/chunk)*chunk;
nbytes = remainder - remainder%RCCE_LINE_SIZE;
if (nbytes) {
#ifdef USE_REMOTE_PUT_LOCAL_GET
RCCE_wait_until(*ready, RCCE_FLAG_SET);
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
// copy private data to remote comm buffer
if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, dest);
#ifdef USE_TAGGED_FLAGS
if( (wsize == 0) && (!probe) )
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
else
#endif
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
#else // LOCAL PUT / REMOTE GET: (standard)
// copy private data to own comm buffer
if(copy) RCCE_put(combuf, (t_vcharp)bufptr, nbytes, RCCE_IAM);
if(!mcast) {
#ifdef USE_TAGGED_FLAGS
if( (wsize == 0) && (!probe) )
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
else
#endif
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
// wait for the destination to be ready to receive a message
RCCE_wait_until(*ready, RCCE_FLAG_SET);
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
}
else {
RCCE_TNS_barrier(&RCCE_COMM_WORLD);
RCCE_TNS_barrier(&RCCE_COMM_WORLD);
}
#endif // !USE_REMOTE_PUT_LOCAL_GET
} // if(nbytes)
remainder = remainder%RCCE_LINE_SIZE;
if (!remainder) return(RCCE_SUCCESS);
// remainder is less than a cache line. This must be copied into appropriately sized
// intermediate space before it can be sent to the receiver
bufptr = privbuf + (size/chunk)*chunk + nbytes;
nbytes = RCCE_LINE_SIZE;
if(copy) {
#ifdef COPPERRIDGE
memcpy_scc(padline,bufptr,remainder);
#else
memcpy(padline,bufptr,remainder);
#endif
}
#ifdef USE_REMOTE_PUT_LOCAL_GET
RCCE_wait_until(*ready, RCCE_FLAG_SET);
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
// copy private data to remote comm buffer
if(copy) RCCE_put(combuf, (t_vcharp) padline, nbytes, dest);
#ifdef USE_TAGGED_FLAGS
if( (wsize == 0) && (!probe) )
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
else
#endif
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
#else // LOCAL PUT / REMOTE GET: (standard)
// copy private data to own comm buffer
if(copy) RCCE_put(combuf, (t_vcharp)padline, nbytes, RCCE_IAM);
if(!mcast) {
#ifdef USE_TAGGED_FLAGS
if( (wsize == 0) && (!probe) )
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
else
#endif
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
// wait for the destination to be ready to receive a message
RCCE_wait_until(*ready, RCCE_FLAG_SET);
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
}
else {
RCCE_TNS_barrier(&RCCE_COMM_WORLD);
RCCE_TNS_barrier(&RCCE_COMM_WORLD);
}
#endif // !USE_REMOTE_PUT_LOCAL_GET
return(RCCE_SUCCESS);
}
static int RCCE_push_send_request(RCCE_SEND_REQUEST *request) {
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
int test; // flag for calling RCCE_test_flag()
if(request->finished) return(RCCE_SUCCESS);
if(request->label == 1) goto label1;
if(request->label == 2) goto label2;
if(request->label == 3) goto label3;
if(request->label == 4) goto label4;
if(request->probe)
#ifdef USE_TAGGED_FLAGS
RCCE_flag_write_tagged(request->probe, RCCE_FLAG_SET, request->dest, request->tag, request->len);
#else
RCCE_flag_write(request->probe, RCCE_FLAG_SET, request->dest);
#endif
#ifdef USE_SYNCH_FOR_ZERO_BYTE
// synchronize even in case of zero byte messages:
if(request->size == 0) {
#ifdef USE_REMOTE_PUT_LOCAL_GET
label1:
RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
if(!test) {
request->label = 1;
return(RCCE_PENDING);
}
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
#ifdef USE_TAGGED_FLAGS
if(!request->probe)
RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len);
else
#endif
RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest);
#else // LOCAL PUT / REMOTE GET: (standard)
#ifdef USE_TAGGED_FLAGS
if(!request->probe)
RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len);
else
#endif
RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest);
label1:
RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
if(!test) {
request->label = 1;
return(RCCE_PENDING);
}
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
#endif // !USE_REMOTE_PUT_LOCAL_GET
request->finished = 1;
return(RCCE_SUCCESS);
}
#endif // USE_SYNCH_FOR_ZERO_BYTE
// send data in units of available chunk size of comm buffer
for (; request->wsize < (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) {
request->bufptr = request->privbuf + request->wsize;
request->nbytes = request->chunk;
#ifdef USE_REMOTE_PUT_LOCAL_GET
// wait for the destination to be ready to receive a message
label2:
RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
if(!test) {
request->label = 2;
return(RCCE_PENDING);
}
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
// copy private data to remote comm buffer
if(request->copy) RCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, request->dest);
#ifdef USE_TAGGED_FLAGS
if( (request->wsize == 0) && (!request->probe) )
RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len);
else
#endif
RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest);
#else // LOCAL PUT / REMOTE GET: (standard)
// copy private data to own comm buffer
if(request->copy) RCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, RCCE_IAM);
#ifdef USE_TAGGED_FLAGS
if( (request->wsize == 0) && (!request->probe) )
RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len);
else
#endif
RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest);
// wait for the destination to be ready to receive a message
label2:
RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
if(!test) {
request->label = 2;
return(RCCE_PENDING);
}
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
#endif // !USE_REMOTE_PUT_LOCAL_GET
} // for
request->remainder = request->size % request->chunk;
// if nothing is left over, we are done
if (!request->remainder) {
request->finished = 1;
return(RCCE_SUCCESS);
}
// send remainder of data--whole cache lines
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk;
request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE;
if (request->nbytes) {
#ifdef USE_REMOTE_PUT_LOCAL_GET
// wait for the destination to be ready to receive a message
label3:
RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
if(!test) {
request->label = 3;
return(RCCE_PENDING);
}
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
// copy private data to remote comm buffer
if(request->copy) RCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, request->dest);
#ifdef USE_TAGGED_FLAGS
if( (request->wsize == 0) && (!request->probe) )
RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len);
else
#endif
RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest);
#else // LOCAL PUT / REMOTE GET: (standard)
// copy private data to own comm buffer
if(request->copy) RCCE_put(request->combuf, (t_vcharp)request->bufptr, request->nbytes, RCCE_IAM);
#ifdef USE_TAGGED_FLAGS
if( (request->wsize == 0) && (!request->probe) )
RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len);
else
#endif
RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest);
// wait for the destination to be ready to receive a message
label3:
RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
if(!test) {
request->label = 3;
return(RCCE_PENDING);
}
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
#endif // !USE_REMOTE_PUT_LOCAL_GET
} // if(request->nbytes)
request->remainder = request->size % request->chunk;
request->remainder = request->remainder%RCCE_LINE_SIZE;
// if nothing is left over, we are done
if (!request->remainder)
{
request->finished = 1;
return(RCCE_SUCCESS);
}
// remainder is less than a cache line. This must be copied into appropriately sized
// intermediate space before it can be sent to the receiver
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes;
request->nbytes = RCCE_LINE_SIZE;
#ifdef USE_REMOTE_PUT_LOCAL_GET
// wait for the destination to be ready to receive a message
label4:
RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
if(!test) {
request->label = 4;
return(RCCE_PENDING);
}
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
// copy private data to remote comm buffer
if(request->copy) {
#ifdef COPPERRIDGE
memcpy_scc(padline,request->bufptr,request->remainder);
#else
memcpy(padline,request->bufptr,request->remainder);
#endif
RCCE_put(request->combuf, (t_vcharp) padline, request->nbytes, request->dest);
}
#ifdef USE_TAGGED_FLAGS
#ifdef USE_PROBE_FLAGS_SHORTCUT
if(request->privbuf == NULL)
{
request->finished = 1;
return(RCCE_SUCCESS);
}
#endif
if( (request->wsize == 0) && (!request->probe) )
RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len);
else
#endif
RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest);
#else // LOCAL PUT / REMOTE GET: (standard)
// copy private data to own comm buffer
if(request->copy) {
#ifdef COPPERRIDGE
memcpy_scc(padline,request->bufptr,request->remainder);
#else
memcpy(padline,request->bufptr,request->remainder);
#endif
RCCE_put(request->combuf, (t_vcharp)padline, request->nbytes, RCCE_IAM);
}
#ifdef USE_TAGGED_FLAGS
if( (request->wsize == 0) && (!request->probe) )
RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len);
else
#endif
RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest);
// wait for the destination to be ready to receive a message
label4:
RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
if(!test) {
request->label = 4;
return(RCCE_PENDING);
}
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
#endif // !USE_REMOTE_PUT_LOCAL_GET
request->finished = 1;
return(RCCE_SUCCESS);
}
static void RCCE_init_send_request(
char *privbuf, // source buffer in local private memory (send buffer)
t_vcharp combuf, // intermediate buffer in MPB
size_t chunk, // size of MPB available for this message (bytes)
RCCE_FLAG *ready, // flag indicating whether receiver is ready
RCCE_FLAG *sent, // flag indicating whether message has been sent by source
size_t size, // size of message (bytes)
int dest, // UE that will receive the message
int copy, // set to 0 for synchronization only (no copying/sending)
void* tag, // additional tag?
int len, // length of additional tag
RCCE_FLAG *probe, // flag for probing for incoming messages
RCCE_SEND_REQUEST *request
) {
request->privbuf = privbuf;
request->combuf = combuf;
request->chunk = chunk;
request->ready = ready;
request->sent = sent;
request->size = size;
request->dest = dest;
request->copy = copy;
request->tag = tag;
request->len = len;
request->probe = probe;
request->wsize = 0;
request->remainder = 0;
request->nbytes = 0;
request->bufptr = NULL;
request->label = 0;
request->finished = 0;
request->next = NULL;
return;
}
#ifndef GORY
// this is the LfBS-customized synchronized message passing API
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_send
//--------------------------------------------------------------------------------------
// send function for simplified API; use library-maintained variables for synchronization
//--------------------------------------------------------------------------------------
int RCCE_send(char *privbuf, size_t size, int dest) {
#ifdef USE_PROBE_FLAGS
RCCE_FLAG* probe = &RCCE_probe_flag[RCCE_IAM];
#else
RCCE_FLAG* probe = NULL;
#endif
#ifndef USE_REMOTE_PUT_LOCAL_GET
if(RCCE_send_queue != NULL)
#else
if(RCCE_send_queue[dest] != NULL)
#endif
return(RCCE_REJECTED);
#ifdef USE_TAGGED_FOR_SHORT
if(size <= (RCCE_LINE_SIZE - sizeof(int)))
{
#ifdef USE_PROBE_FLAGS
RCCE_flag_write_tagged(probe, RCCE_FLAG_SET, dest, privbuf, size);
#endif
#ifdef USE_REMOTE_PUT_LOCAL_GET
RCCE_wait_until(RCCE_ready_flag[dest], RCCE_FLAG_SET);
RCCE_flag_write(&RCCE_ready_flag[dest], RCCE_FLAG_UNSET, RCCE_IAM);
#ifndef USE_PROBE_FLAGS_SHORTCUT
#ifdef USE_PROBE_FLAGS
RCCE_flag_write(&RCCE_sent_flag[RCCE_IAM], RCCE_FLAG_SET, dest);
#else
RCCE_flag_write_tagged(&RCCE_sent_flag[RCCE_IAM], RCCE_FLAG_SET, dest, privbuf, size);
#endif
#endif
#else // LOCAL PUT / REMOTE GET: (standard)
#ifdef USE_PROBE_FLAGS
RCCE_flag_write(&RCCE_sent_flag[RCCE_IAM], RCCE_FLAG_SET, dest);
#else
RCCE_flag_write_tagged(&RCCE_sent_flag[RCCE_IAM], RCCE_FLAG_SET, dest, privbuf, size);
#endif
RCCE_wait_until(RCCE_ready_flag[dest], RCCE_FLAG_SET);
RCCE_flag_write(&RCCE_ready_flag[dest], RCCE_FLAG_UNSET, RCCE_IAM);
#endif // !USE_REMOTE_PUT_LOCAL_GET
return(RCCE_SUCCESS);
}
else
#endif
return(RCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
size, dest,
1, 0, 0, // copy, pipe, mcast
NULL, 0, probe)); // tag, len
}
int RCCE_send_tagged(char *privbuf, size_t size, int dest, void* tag, int len) {
#ifdef USE_PROBE_FLAGS
RCCE_FLAG* probe = &RCCE_probe_flag[RCCE_IAM];
#else
RCCE_FLAG* probe = NULL;
#endif
#ifndef USE_REMOTE_PUT_LOCAL_GET
if(RCCE_send_queue != NULL)
#else
if(RCCE_send_queue[dest] != NULL)
#endif
return(RCCE_REJECTED);
#ifdef USE_TAGGED_FLAGS
return(RCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
size, dest,
1, 0, 0, // copy, pipe, mcast
tag, len, probe)); // tag, len, probe
#else
RCCE_send_general(tag, RCCE_buff_ptr, RCCE_chunk,
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
len, dest,
1, 0, 0, // copy, pipe, mcast
NULL, 0, probe); // tag, len, probe
return(RCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
size, dest,
1, 0, 0, // copy, pipe, mcast
NULL, 0, NULL)); // tag, len, probe
#endif
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_send_pipe
//--------------------------------------------------------------------------------------
// send function for simplified API; use library-maintained variables for synchronization
//--------------------------------------------------------------------------------------
int RCCE_send_pipe(char *privbuf, size_t size, int dest) {
#ifdef USE_PROBE_FLAGS
RCCE_FLAG* probe = &RCCE_probe_flag[RCCE_IAM];
#else
RCCE_FLAG* probe = NULL;
#endif
#ifndef USE_REMOTE_PUT_LOCAL_GET
if(RCCE_send_queue != NULL)
#else
if(RCCE_send_queue[dest] != NULL)
#endif
return(RCCE_REJECTED);
#ifdef USE_PIPELINE_FLAGS
return(RCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
&RCCE_ready_flag_pipe[dest], &RCCE_sent_flag_pipe[RCCE_IAM],
size, dest,
1, 1, 0, // copy, pipe, mcast
NULL, 0, probe)); // tag, len, probe
#else
return(RCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
size, dest,
1, 1, 0, // copy, pipe, mcast
NULL, 0, probe)); // tag, len, probe
#endif
}
int RCCE_send_mcast(char *privbuf, size_t size) {
#ifdef USE_PROBE_FLAGS
RCCE_FLAG* probe = &RCCE_probe_flag[RCCE_IAM];
#else
RCCE_FLAG* probe = NULL;
#endif
#ifndef USE_REMOTE_PUT_LOCAL_GET
if(RCCE_send_queue != NULL)
#else
if(RCCE_send_queue != NULL)
#endif
return(RCCE_REJECTED);
return(RCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
NULL, NULL,
size, -1,
1, 0, 1, // copy, pipe, mcast
NULL, 0, probe)); // tag, len
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_isend
//--------------------------------------------------------------------------------------
// non-blocking send function; returns an handle of type RCCE_SEND_REQUEST
//--------------------------------------------------------------------------------------
int RCCE_isend(char *privbuf, size_t size, int dest, RCCE_SEND_REQUEST *request) {
#ifdef USE_PROBE_FLAGS
RCCE_FLAG* probe = &RCCE_probe_flag[RCCE_IAM];
#else
RCCE_FLAG* probe = NULL;
#endif
#ifdef USE_TAGGED_FOR_SHORT
if(size <= (RCCE_LINE_SIZE - sizeof(int)))
{
RCCE_init_send_request(NULL, RCCE_buff_ptr, RCCE_chunk,
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
size, dest, 0, privbuf, size, probe, request);
}
else
#endif
RCCE_init_send_request(privbuf, RCCE_buff_ptr, RCCE_chunk,
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
size, dest, 1, NULL, 0, probe, request);
#ifndef USE_REMOTE_PUT_LOCAL_GET
if(RCCE_send_queue == NULL) {
#else
if(RCCE_send_queue[dest] == NULL) {
#endif
if(RCCE_push_send_request(request) == RCCE_SUCCESS) {
return(RCCE_SUCCESS);
}
else {
#ifndef USE_REMOTE_PUT_LOCAL_GET
RCCE_send_queue = request;
#else
RCCE_send_queue[dest] = request;
#endif
return(RCCE_PENDING);
}
}
else {
#ifndef USE_REMOTE_PUT_LOCAL_GET
if(RCCE_send_queue->next == NULL) {
RCCE_send_queue->next = request;
}
#else
if(RCCE_send_queue[dest]->next == NULL) {
RCCE_send_queue[dest]->next = request;
}
#endif
else {
#ifndef USE_REMOTE_PUT_LOCAL_GET
RCCE_SEND_REQUEST *run = RCCE_send_queue;
#else
RCCE_SEND_REQUEST *run = RCCE_send_queue[dest];
#endif
while(run->next != NULL) run = run->next;
run->next = request;
}
return(RCCE_RESERVED);
}
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_isend_test
//--------------------------------------------------------------------------------------
// test function for completion of the requestes non-blocking send operation
//--------------------------------------------------------------------------------------
int RCCE_isend_test(RCCE_SEND_REQUEST *request, int *test) {
if(request->finished) {
(*test) = 1;
return(RCCE_SUCCESS);
}
#ifndef USE_REMOTE_PUT_LOCAL_GET
if(RCCE_send_queue != request) {
#else
if(RCCE_send_queue[request->dest] != request) {
#endif
(*test) = 0;
return(RCCE_RESERVED);
}
RCCE_push_send_request(request);
if(request->finished) {
#ifndef USE_REMOTE_PUT_LOCAL_GET
RCCE_send_queue = request->next;
#else
RCCE_send_queue[request->dest] = request->next;
#endif
(*test) = 1;
return(RCCE_SUCCESS);
}
(*test) = 0;
return(RCCE_PENDING);
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_isend_push
//--------------------------------------------------------------------------------------
// progress function for pending requests in the isend queue
//--------------------------------------------------------------------------------------
int RCCE_isend_push(int dest) {
#ifndef USE_REMOTE_PUT_LOCAL_GET
RCCE_SEND_REQUEST *request = RCCE_send_queue;
#else
RCCE_SEND_REQUEST *request = RCCE_send_queue[dest];
#endif
if(request == NULL) {
return(RCCE_SUCCESS);
}
if(request->finished) {
return(RCCE_SUCCESS);
}
RCCE_push_send_request(request);
if(request->finished) {
#ifndef USE_REMOTE_PUT_LOCAL_GET
RCCE_send_queue = request->next;
#else
RCCE_send_queue[request->dest] = request->next;
#endif
return(RCCE_SUCCESS);
}
return(RCCE_PENDING);
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_isend_wait
//--------------------------------------------------------------------------------------
// just wait for completion of the requested non-blocking send operation
//--------------------------------------------------------------------------------------
int RCCE_isend_wait(RCCE_SEND_REQUEST *request) {
int ue;
#ifndef USE_REMOTE_PUT_LOCAL_GET
while(!request->finished) {
RCCE_isend_push(-1);
if(!request->finished) {
for(ue=0; ue<RCCE_NP; ue++) {
RCCE_irecv_push(ue);
}
}
}
#else
while(!request->finished) {
RCCE_isend_push(request->dest);
if(!request->finished) {
RCCE_irecv_push(-1);
for(ue=0; ue<RCCE_NP; ue++) {
RCCE_isend_push(ue);
}
}
}
#endif
return(RCCE_SUCCESS);
}
#else
// this is the gory synchronized message passing API
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_send
//--------------------------------------------------------------------------------------
// send function for simplified API; use user-supplied variables for synchronization
//--------------------------------------------------------------------------------------
int RCCE_send(char *privbuf, t_vcharp combuf, size_t chunk, RCCE_FLAG *ready,
RCCE_FLAG *sent, size_t size, int dest) {
return(RCCE_send_general(privbuf, combuf, chunk, ready, sent,
size, dest,
1, 0, 0, // copy, pipe, mcast
NULL, 0, NULL)); // tag, len, probe
}
#endif

View file

@ -0,0 +1,613 @@
///*************************************************************************************
// Synchronization functions.
// Single-bit and whole-cache-line flags are sufficiently different that we provide
// separate implementations of the synchronization routines for each case
//**************************************************************************************
//
// Author: Rob F. Van der Wijngaart
// Intel Corporation
// Date: 008/30/2010
//
//**************************************************************************************
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "RCCE_lib.h"
#if defined(COPPERRIDGE) && !defined(__hermit__)
#include "scc_memcpy.h"
#else
#define memcpy_scc memcpy
#endif
#ifdef USE_BYTE_FLAGS
#include "RCCE_byte_synch.c"
#else
#ifdef SINGLEBITFLAGS
//////////////////////////////////////////////////////////////////
// LOCKING SYNCHRONIZATION USING ONE BIT PER FLAG
//////////////////////////////////////////////////////////////////
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_wait_until
//--------------------------------------------------------------------------------------
// wait until flag in local MPB becomes set or unset. To avoid reading stale data from
// the cache instead of new flag value from the MPB, issue MPB cache invalidation before
// each read, including within the spin cycle
//--------------------------------------------------------------------------------------
int RCCE_wait_until(RCCE_FLAG flag, RCCE_FLAG_STATUS val) {
t_vcharp cflag;
cflag = flag.line_address;
// avoid tests if we use the simplified API
#ifdef GORY
if (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET)
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED));
if (!cflag)
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_ALLOCATED));
// check to see if flag is properly contained in the local comm buffer
if (cflag - RCCE_comm_buffer[RCCE_IAM]>=0 &&
cflag+RCCE_LINE_SIZE - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<0){}
else {
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_IN_COMM_BUFFER));
}
#endif
// always flush/invalidate to ensure we read the most recent value of *flag
// keep reading it until it has the required value
do {
#ifdef _OPENMP
#pragma omp flush
#endif
RC_cache_invalidate();
}
while ((RCCE_bit_value(cflag, flag.location) != val));
return(RCCE_SUCCESS);
}
int RCCE_test_flag(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result) {
t_vcharp cflag;
cflag = flag.line_address;
// avoid tests if we use the simplified API
#ifdef GORY
if (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET)
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED));
if (!cflag)
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_ALLOCATED));
// check to see if flag is properly contained in the local comm buffer
if (cflag - RCCE_comm_buffer[RCCE_IAM]>=0 &&
cflag+RCCE_LINE_SIZE - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<0){}
else {
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_IN_COMM_BUFFER));
}
#endif
// always flush/invalidate to ensure we read the most recent value of *flag
// keep reading it until it has the required value
#ifdef _OPENMP
#pragma omp flush
#endif
RC_cache_invalidate();
if(RCCE_bit_value(cflag, flag.location) != val) {
(*result) = 0;
}
else {
(*result) = 1;
}
return(RCCE_SUCCESS);
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_barrier
//--------------------------------------------------------------------------------------
// very simple, linear barrier
//--------------------------------------------------------------------------------------
int RCCE_barrier(RCCE_COMM *comm) {
int counter, i, error;
int ROOT = 0;
t_vchar cyclechar[RCCE_LINE_SIZE];
t_vchar valchar [RCCE_LINE_SIZE];
t_vcharp gatherp, releasep;
RCCE_FLAG_STATUS cycle;
counter = 0;
gatherp = comm->gather.line_address;
if (RCCE_debug_synch)
fprintf(STDERR,"UE %d has checked into barrier\n", RCCE_IAM);
// flip local barrier variable
if (error = RCCE_get(cyclechar, gatherp, RCCE_LINE_SIZE, RCCE_IAM))
return(RCCE_error_return(RCCE_debug_synch,error));
cycle = RCCE_flip_bit_value(cyclechar, comm->gather.location);
if (error = RCCE_put(comm->gather.line_address, cyclechar, RCCE_LINE_SIZE, RCCE_IAM))
return(RCCE_error_return(RCCE_debug_synch,error));
if (RCCE_IAM==comm->member[ROOT]) {
// read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size),
// we know all UEs have reached the barrier
while (counter != comm->size) {
// skip the first member (#0), because that is the ROOT
for (counter=i=1; i<comm->size; i++) {
// copy flag values out of comm buffer
if (error = RCCE_get(valchar, comm->gather.line_address, RCCE_LINE_SIZE,
comm->member[i]))
return(RCCE_error_return(RCCE_debug_synch,error));
if (RCCE_bit_value(valchar, comm->gather.location) == cycle) counter++;
}
}
// set release flags
for (i=1; i<comm->size; i++)
if (error = RCCE_flag_write(&(comm->release), cycle, comm->member[i]))
return(RCCE_error_return(RCCE_debug_synch,error));
}
else {
if (error = RCCE_wait_until(comm->release, cycle))
return(RCCE_error_return(RCCE_debug_synch,error));
}
if (RCCE_debug_synch) fprintf(STDERR,"UE %d has cleared barrier\n", RCCE_IAM);
return(RCCE_SUCCESS);
}
#else
//////////////////////////////////////////////////////////////////
// LOCKLESS SYNCHRONIZATION USING ONE WHOLE CACHE LINE PER FLAG //
//////////////////////////////////////////////////////////////////
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_wait_until
//--------------------------------------------------------------------------------------
// wait until flag in local MPB becomes set or unset. To avoid reading stale data from
// the cache instead of new flag value from the MPB, issue MPB cache invalidation before
// each read, including within the spin cycle
//--------------------------------------------------------------------------------------
int RCCE_wait_until(RCCE_FLAG flag, RCCE_FLAG_STATUS val) {
t_vcharp cflag;
cflag = (t_vcharp) flag;
#ifdef GORY
if (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET)
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED));
if (!cflag)
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_ALLOCATED));
// check to see if flag is properly contained in the local comm buffer
if (cflag - RCCE_comm_buffer[RCCE_IAM]>=0 &&
cflag+RCCE_LINE_SIZE - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<0){}
else {
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_IN_COMM_BUFFER));
}
#endif
#ifdef USE_REVERTED_FLAGS
flag = flag + RCCE_LINE_SIZE / sizeof(int) - 1;
#endif
// always flush/invalidate to ensure we read the most recent value of *flag
// keep reading it until it has the required value. We only need to read the
// first int of the MPB cache line containing the flag
#ifndef USE_FLAG_EXPERIMENTAL
do {
#ifdef _OPENMP
#pragma omp flush
#endif
RC_cache_invalidate();
} while ((*flag) != val);
#else
if (RCCE_debug_synch)
fprintf(STDERR,"UE %d wait flag: %x from address %X \n", RCCE_IAM,val,flag);
flag = RCCE_flag_buffer[RCCE_IAM]+(flag-RCCE_comm_buffer[RCCE_IAM]);
while ((*flag) != val);
#endif
return(RCCE_SUCCESS);
}
#ifdef USE_TAGGED_FLAGS
int RCCE_wait_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS val, void *tag, int len) {
int i, j;
RCCE_FLAG flag_pos;
#ifndef USE_REVERTED_FLAGS
flag_pos = flag;
#else
flag_pos = flag + RCCE_LINE_SIZE / sizeof(int) - 1;
#endif
do {
#ifdef _OPENMP
#pragma omp flush
#endif
RC_cache_invalidate();
} while ((*flag_pos) != val);
if(tag) {
if( len > ( RCCE_LINE_SIZE - sizeof(int) ) ) len = RCCE_LINE_SIZE - sizeof(int);
#ifndef USE_REVERTED_FLAGS
memcpy_scc(tag, &((char*)flag)[sizeof(int)], len);
#else
memcpy_scc(tag, &((char*)flag)[0], len);
#endif
}
return(RCCE_SUCCESS);
}
#endif
int RCCE_test_flag(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result) {
t_vcharp cflag;
cflag = (t_vcharp) flag;
#ifdef GORY
if (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET)
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED));
if (!cflag)
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_ALLOCATED));
// check to see if flag is properly contained in the local comm buffer
if (cflag - RCCE_comm_buffer[RCCE_IAM]>=0 &&
cflag+RCCE_LINE_SIZE - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<0){}
else {
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_IN_COMM_BUFFER));
}
#endif
#ifdef USE_REVERTED_FLAGS
flag = flag + RCCE_LINE_SIZE / sizeof(int) - 1;
#endif
// always flush/invalidate to ensure we read the most recent value of *flag
// keep reading it until it has the required value. We only need to read the
// first int of the MPB cache line containing the flag
#ifdef _OPENMP
#pragma omp flush
#endif
#ifndef USE_FLAG_EXPERIMENTAL
RC_cache_invalidate();
#endif
if((*flag) != val) {
(*result) = 0;
}
else {
(*result) = 1;
}
return(RCCE_SUCCESS);
}
#ifdef USE_TAGGED_FLAGS
int RCCE_test_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result, void *tag, int len) {
int i, j;
RCCE_FLAG flag_pos;
#ifndef USE_REVERTED_FLAGS
flag_pos = flag;
#else
flag_pos = flag + RCCE_LINE_SIZE / sizeof(int) -1;
#endif
RC_cache_invalidate();
if((*flag_pos) != val) {
(*result) = 0;
}
else {
(*result) = 1;
}
if((*result) && tag) {
if( len > ( RCCE_LINE_SIZE - sizeof(int) ) ) len = RCCE_LINE_SIZE - sizeof(int);
#ifndef USE_REVERTED_FLAGS
memcpy_scc(tag, &((char*)flag)[sizeof(int)], len);
#else
memcpy_scc(tag, &((char*)flag)[0], len);
#endif
}
return(RCCE_SUCCESS);
}
#endif
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_barrier
//--------------------------------------------------------------------------------------
// very simple, linear barrier
//--------------------------------------------------------------------------------------
int RCCE_barrier(RCCE_COMM *comm) {
int counter, i, error;
int ROOT = 0;
volatile unsigned char cyclechar[RCCE_LINE_SIZE];
volatile unsigned char valchar[RCCE_LINE_SIZE];
volatile char *cycle;
volatile char *val;
counter = 0;
cycle = (volatile char *)cyclechar;
val = (volatile char *)valchar;
if (RCCE_debug_synch)
fprintf(STDERR,"UE %d has checked into barrier\n", RCCE_IAM);
#ifdef USE_FAT_BARRIER
// flip local barrier variable
#ifndef USE_FLAG_EXPERIMENTAL
if ((error = RCCE_get(cyclechar, (t_vcharp)(comm->gather[RCCE_IAM]), RCCE_LINE_SIZE, RCCE_IAM)))
#else
if ((error = RCCE_get_flag(cyclechar, (t_vcharp)(comm->gather[RCCE_IAM]), RCCE_LINE_SIZE, RCCE_IAM)))
#endif
return(RCCE_error_return(RCCE_debug_synch,error));
*cycle = !(*cycle);
#ifndef USE_FLAG_EXPERIMENTAL
if ((error = RCCE_put((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM)))
#else
if ((error = RCCE_put_flag((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM)))
#endif
return(RCCE_error_return(RCCE_debug_synch,error));
if ((error = RCCE_put((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, comm->member[ROOT])))
return(RCCE_error_return(RCCE_debug_synch,error));
if (RCCE_IAM==comm->member[ROOT]) {
// read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size),
// we know all UEs have reached the barrier
while (counter != comm->size) {
// skip the first member (#0), because that is the ROOT
for (counter=i=1; i<comm->size; i++) {
/* copy flag values out of comm buffer */
#ifndef USE_FLAG_EXPERIMENTAL
if ((error = RCCE_get(valchar, (t_vcharp)(comm->gather[i]), RCCE_LINE_SIZE, RCCE_IAM)))
#else
if ((error = RCCE_get_flag(valchar, (t_vcharp)(comm->gather[i]), RCCE_LINE_SIZE, RCCE_IAM)))
#endif
return(RCCE_error_return(RCCE_debug_synch,error));
if (*val == *cycle) counter++;
}
}
// set release flags
for (i=1; i<comm->size; i++) {
if ((error = RCCE_flag_write(&(comm->release), *cycle, comm->member[i])))
return(RCCE_error_return(RCCE_debug_synch,error));
}
}
else {
if ((error = RCCE_wait_until(comm->release, *cycle)))
return(RCCE_error_return(RCCE_debug_synch,error));
}
#else // !USE_FAT_BARRIER
// flip local barrier variable
#ifndef USE_FLAG_EXPERIMENTAL
if (error = RCCE_get(cyclechar, (t_vcharp)(comm->gather), RCCE_LINE_SIZE, RCCE_IAM))
#else
if (error = RCCE_get_flag(cyclechar, (t_vcharp)(comm->gather), RCCE_LINE_SIZE, RCCE_IAM))
#endif
return(RCCE_error_return(RCCE_debug_synch,error));
*cycle = !(*cycle);
#ifndef USE_FLAG_EXPERIMENTAL
if (error = RCCE_put((t_vcharp)(comm->gather), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))
#else
if (error = RCCE_put_flag((t_vcharp)(comm->gather), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))
#endif
return(RCCE_error_return(RCCE_debug_synch,error));
if (RCCE_IAM==comm->member[ROOT]) {
// read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size),
// we know all UEs have reached the barrier
while (counter != comm->size) {
// skip the first member (#0), because that is the ROOT
for (counter=i=1; i<comm->size; i++) {
/* copy flag values out of comm buffer */
#ifndef USE_FLAG_EXPERIMENTAL
if (error = RCCE_get(valchar, (t_vcharp)(comm->gather), RCCE_LINE_SIZE,
comm->member[i]))
#else
if (error = RCCE_get_flag(valchar, (t_vcharp)(comm->gather), RCCE_LINE_SIZE,
comm->member[i]))
#endif
return(RCCE_error_return(RCCE_debug_synch,error));
if (*val == *cycle) counter++;
}
}
// set release flags
for (i=1; i<comm->size; i++) {
if (error = RCCE_flag_write(&(comm->release), *cycle, comm->member[i]))
return(RCCE_error_return(RCCE_debug_synch,error));
}
}
else {
if (error = RCCE_wait_until(comm->release, *cycle)) {
return(RCCE_error_return(RCCE_debug_synch,error));
}
}
#endif // !USE_FAT_BARRIER
if (RCCE_debug_synch) fprintf(STDERR,"UE %d has cleared barrier\n", RCCE_IAM);
return(RCCE_SUCCESS);
}
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_nb_barrier
//--------------------------------------------------------------------------------------
// non-blocking version of the linear barrier
//--------------------------------------------------------------------------------------
int RCCE_nb_barrier(RCCE_COMM *comm) {
int i, error;
int ROOT = 0;
volatile unsigned char cyclechar[RCCE_LINE_SIZE];
volatile unsigned char valchar[RCCE_LINE_SIZE];
#ifdef USE_FLAG_EXPERIMENTAL
volatile char *cycle;
volatile char *val;
cycle = (volatile char *)cyclechar;
val = (volatile char *)valchar;
#else
volatile int *cycle;
volatile int *val;
cycle = (volatile int *)cyclechar;
val = (volatile int *)valchar;
#endif
if(comm->label == 1) goto label1;
if(comm->label == 2) goto label2;
comm->count = 0;
if (RCCE_debug_synch)
fprintf(STDERR,"UE %d has checked into barrier\n", RCCE_IAM);
#ifdef USE_FAT_BARRIER
// flip local barrier variable
#ifndef USE_FLAG_EXPERIMENTAL
if ((error = RCCE_get(cyclechar, (t_vcharp)(comm->gather[RCCE_IAM]), RCCE_LINE_SIZE, RCCE_IAM)))
#else
if ((error = RCCE_get_flag(cyclechar, (t_vcharp)(comm->gather[RCCE_IAM]), RCCE_LINE_SIZE, RCCE_IAM)))
#endif
return(RCCE_error_return(RCCE_debug_synch,error));
*cycle = !(*cycle);
#ifndef USE_FLAG_EXPERIMENTAL
if ((error = RCCE_put((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM)))
#else
if ((error = RCCE_put_flag((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM)))
#endif
return(RCCE_error_return(RCCE_debug_synch,error));
if ((error = RCCE_put((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, comm->member[ROOT])))
return(RCCE_error_return(RCCE_debug_synch,error));
if (RCCE_IAM==comm->member[ROOT]) {
// read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size),
// we know all UEs have reached the barrier
comm->cycle = *cycle;
label1:
while (comm->count != comm->size) {
// skip the first member (#0), because that is the ROOT
for (comm->count=i=1; i<comm->size; i++) {
/* copy flag values out of comm buffer */
#ifndef USE_FLAG_EXPERIMENTAL
if ((error = RCCE_get(valchar, (t_vcharp)(comm->gather[i]), RCCE_LINE_SIZE, RCCE_IAM)))
#else
if ((error = RCCE_get_flag(valchar, (t_vcharp)(comm->gather[i]), RCCE_LINE_SIZE, RCCE_IAM)))
#endif
return(RCCE_error_return(RCCE_debug_synch,error));
if (*val == comm->cycle) comm->count++;
}
if(comm->count != comm->size) {
comm->label = 1;
return(RCCE_PENDING);
}
}
// set release flags
for (i=1; i<comm->size; i++) {
if ((error = RCCE_flag_write(&(comm->release), comm->cycle, comm->member[i])))
return(RCCE_error_return(RCCE_debug_synch,error));
}
}
else {
int test;
comm->cycle = *cycle;
label2:
RCCE_test_flag(comm->release, comm->cycle, &test);
if(!test) {
comm->label = 2;
return(RCCE_PENDING);
}
}
comm->label = 0;
#else // !USE_FAT_BARRIER
// flip local barrier variable
#ifndef USE_FLAG_EXPERIMENTAL
if (error = RCCE_get(cyclechar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE, RCCE_IAM))
#else
if (error = RCCE_get_flag(cyclechar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE, RCCE_IAM))
#endif
return(RCCE_error_return(RCCE_debug_synch,error));
*cycle = !(*cycle);
#ifndef USE_FLAG_EXPERIMENTAL
if (error = RCCE_put((t_vcharp)(comm->gather[0]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))
#else
if (error = RCCE_put_flag((t_vcharp)(comm->gather[0]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))
#endif
return(RCCE_error_return(RCCE_debug_synch,error));
if (RCCE_IAM==comm->member[ROOT]) {
// read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size),
// we know all UEs have reached the barrier
comm->cycle = *cycle;
label1:
while (comm->count != comm->size) {
// skip the first member (#0), because that is the ROOT
for (comm->count=i=1; i<comm->size; i++) {
/* copy flag values out of comm buffer */
#ifndef USE_FLAG_EXPERIMENTAL
if (error = RCCE_get(valchar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE,
comm->member[i]))
#else
if (error = RCCE_get_flag(valchar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE,
comm->member[i]))
#endif
return(RCCE_error_return(RCCE_debug_synch,error));
if (*val == comm->cycle) comm->count++;
}
if(comm->count != comm->size) {
comm->label = 1;
return(RCCE_PENDING);
}
}
// set release flags
for (i=1; i<comm->size; i++) {
if (error = RCCE_flag_write(&(comm->release), comm->cycle, comm->member[i]))
return(RCCE_error_return(RCCE_debug_synch,error));
}
}
else {
int test;
comm->cycle = *cycle;
label2:
RCCE_test_flag(comm->release, comm->cycle, &test);
if(!test) {
comm->label = 2;
return(RCCE_PENDING);
}
}
comm->label = 0;
#endif // !USE_FAT_BARRIER
if (RCCE_debug_synch) fprintf(STDERR,"UE %d has cleared barrier\n", RCCE_IAM);
return(RCCE_SUCCESS);
}
#endif
void RCCE_fence() {
return;
}
#endif

290
hermit/usr/ircce/iRCCE.h Normal file
View file

@ -0,0 +1,290 @@
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// [2010-10-25] added support for non-blocking send/recv operations
// - iRCCE_isend(), ..._test(), ..._wait(), ..._push()
// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push()
// by Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
// [2010-11-12] extracted non-blocking code into separate library
// by Carsten Scholtes, University of Bayreuth
//
// [2010-12-09] added functions for a convenient handling of multiple
// pending non-blocking requests
// by Jacek Galowicz, Chair for Operating Systems
// RWTH Aachen University
//
// [2011-04-19] added wildcard mechanism (iRCCE_ANY_SOURCE) for receiving
// a message from an arbitrary remote rank
// by Simon Pickartz, Chair for Operating Systems,
// RWTH Aachen University
//
// [2011-06-16] iRCCE_ANY_LENGTH wildcard mechanism can only be used in
// the SINGLEBITFLAGS=0 case (-> bigflags must be enabled!)
//
// [2011-06-27] merged iRCCE_ANY_SOURCE branch with trunk (iRCCE_ANY_LENGTH)
//
// [2011-11-03] - renamed blocking (pipelined) send/recv functions to
// iRCCE_ssend() / iRCCE_srecv() (strictly synchronous!)
// - added non-blocking by synchronous send/recv functions:
// iRCCE_issend() / iRCCE_isrecv()
// by Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
// [2012-10-29] - added functions for handling "Tagged Flags"
// iRCCE_flag_read/write_tagged(), iRCCE_test/wait_tagged()
// by Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
#ifndef IRCCE_H
#define IRCCE_H
#include "RCCE.h"
#define iRCCE_VERSION "2.0"
#define iRCCE_FLAIR
#define iRCCE_SUCCESS RCCE_SUCCESS
#define iRCCE_ERROR -1
#define iRCCE_PENDING -2
#define iRCCE_RESERVED -3
#define iRCCE_NOT_ENQUEUED -4
#if !defined(SINGLEBITFLAGS) && !defined(RCCE_VERSION)
#define _iRCCE_ANY_LENGTH_
extern const int iRCCE_ANY_LENGTH;
#endif
#if !defined(SINGLEBITFLAGS)
#ifdef _OPENMP
#define iRCCE_MAX_TAGGED_LEN (RCCE_LINE_SIZE - 2 * sizeof(int))
#else
#define iRCCE_MAX_TAGGED_LEN (RCCE_LINE_SIZE - sizeof(int))
#endif
#endif
extern const int iRCCE_ANY_SOURCE;
typedef struct _iRCCE_SEND_REQUEST {
char *privbuf; // source buffer in local private memory (send buffer)
t_vcharp combuf; // intermediate buffer in MPB
size_t chunk; // size of MPB available for this message (bytes)
size_t subchunk1; // sub-chunks for the pipelined message transfe
size_t subchunk2;
RCCE_FLAG *ready; // flag indicating whether receiver is ready
RCCE_FLAG *sent; // flag indicating whether message has been sent by source
RCCE_FLAG_STATUS flag_set_value; // used for iRCCE_ANY_LENGTH wildcard
size_t size; // size of message (bytes)
int dest; // UE that will receive the message
int sync; // flag indicating whether send is synchronous or not
size_t wsize; // offset within send buffer when putting in "chunk" bytes
size_t remainder; // bytes remaining to be sent
size_t nbytes; // number of bytes to be sent in single RCCE_put call
char *bufptr; // running pointer inside privbuf for current location
int label; // jump/goto label for the reentrance of the respective poll function
int finished; // flag that indicates whether the request has already been finished
struct _iRCCE_SEND_REQUEST *next;
} iRCCE_SEND_REQUEST;
typedef struct _iRCCE_RECV_REQUEST {
char *privbuf; // source buffer in local private memory (send buffer)
t_vcharp combuf; // intermediate buffer in MPB
size_t chunk; // size of MPB available for this message (bytes)
size_t subchunk1; // sub-chunks for the pipelined message transfe
size_t subchunk2;
RCCE_FLAG *ready; // flag indicating whether receiver is ready
RCCE_FLAG *sent; // flag indicating whether message has been sent by source
RCCE_FLAG_STATUS flag_set_value; // used for iRCCE_ANY_LENGTH wildcard
size_t size; // size of message (bytes)
int source; // UE that will send the message
int sync; // flag indicating whether recv is synchronous or not
size_t wsize; // offset within send buffer when putting in "chunk" bytes
size_t remainder; // bytes remaining to be sent
size_t nbytes; // number of bytes to be sent in single RCCE_put call
char *bufptr; // running pointer inside privbuf for current location
int label; // jump/goto label for the reentrance of the respective poll function
int finished; // flag that indicates whether the request has already been finished
int started; // flag that indicates whether message parts have already been received
struct _iRCCE_RECV_REQUEST *next;
} iRCCE_RECV_REQUEST;
#define iRCCE_WAIT_LIST_RECV_TYPE 0
#define iRCCE_WAIT_LIST_SEND_TYPE 1
typedef struct _iRCCE_WAIT_LISTELEM {
int type;
struct _iRCCE_WAIT_LISTELEM * next;
void * req;
} iRCCE_WAIT_LISTELEM;
typedef struct _iRCCE_WAIT_LIST {
iRCCE_WAIT_LISTELEM * first;
iRCCE_WAIT_LISTELEM * last;
} iRCCE_WAIT_LIST;
#ifdef AIR
typedef volatile struct _iRCCE_AIR {
#ifndef _OPENMP
int * counter;
int * init;
#else
int counter;
int init;
#endif
} iRCCE_AIR;
#endif
///////////////////////////////////////////////////////////////
//
// THE iRCCE API:
//
// Initialize function:
int iRCCE_init(void);
//
// Non-blocking send/recv functions:
int iRCCE_isend(char *, ssize_t, int, iRCCE_SEND_REQUEST *);
int iRCCE_isend_test(iRCCE_SEND_REQUEST *, int *);
int iRCCE_isend_wait(iRCCE_SEND_REQUEST *);
int iRCCE_isend_push(void);
int iRCCE_irecv(char *, ssize_t, int, iRCCE_RECV_REQUEST *);
int iRCCE_irecv_test(iRCCE_RECV_REQUEST *, int *);
int iRCCE_irecv_wait(iRCCE_RECV_REQUEST *);
int iRCCE_irecv_push(void);
//
// Pipelined send/recv functions: (syncronous and blocking)
int iRCCE_ssend(char *, ssize_t, int);
int iRCCE_srecv(char *, ssize_t, int);
int iRCCE_srecv_test(char *, ssize_t, int, int*);
//
// Non-blocking pipelined send/recv functions:
int iRCCE_issend(char *, ssize_t, int, iRCCE_SEND_REQUEST *);
int iRCCE_isrecv(char *, ssize_t, int, iRCCE_RECV_REQUEST *);
//
// SCC-customized put/get and memcpy functions:
int iRCCE_put(t_vcharp, t_vcharp, int, int);
int iRCCE_get(t_vcharp, t_vcharp, int, int);
void* iRCCE_memcpy_put(void*, const void*, size_t);
void* iRCCE_memcpy_get(void*, const void*, size_t);
t_vcharp iRCCE_malloc(size_t);
#define iRCCE_memcpy iRCCE_memcpy_put
//
// Blocking and non-blocking 'probe' functions for incommimg messages:
int iRCCE_probe(int, int*);
int iRCCE_iprobe(int, int*, int*);
//
// Wait/test-all/any functions:
void iRCCE_init_wait_list(iRCCE_WAIT_LIST*);
void iRCCE_add_to_wait_list(iRCCE_WAIT_LIST*, iRCCE_SEND_REQUEST *, iRCCE_RECV_REQUEST *);
int iRCCE_test_all(iRCCE_WAIT_LIST*, int *);
int iRCCE_wait_all(iRCCE_WAIT_LIST*);
int iRCCE_test_any(iRCCE_WAIT_LIST*, iRCCE_SEND_REQUEST **, iRCCE_RECV_REQUEST **);
int iRCCE_wait_any(iRCCE_WAIT_LIST*, iRCCE_SEND_REQUEST **, iRCCE_RECV_REQUEST **);
//
// Query functions for request handle parameters:
int iRCCE_get_dest(iRCCE_SEND_REQUEST*);
int iRCCE_get_source(iRCCE_RECV_REQUEST*);
int iRCCE_get_size(iRCCE_SEND_REQUEST*, iRCCE_RECV_REQUEST*);
int iRCCE_get_length(void);
//
// Cancel functions for yet not started non-blocking requests:
int iRCCE_isend_cancel(iRCCE_SEND_REQUEST *, int *);
int iRCCE_irecv_cancel(iRCCE_RECV_REQUEST *, int *);
//
// Functions for handling tagged flags: (need whole cache line per flag)
#ifndef SINGLEBITFLAGS
int iRCCE_flag_alloc_tagged(RCCE_FLAG *);
int iRCCE_flag_write_tagged(RCCE_FLAG *, RCCE_FLAG_STATUS, int, void *, int);
int iRCCE_flag_read_tagged(RCCE_FLAG, RCCE_FLAG_STATUS *, int, void *, int);
int iRCCE_wait_tagged(RCCE_FLAG, RCCE_FLAG_STATUS, void *, int);
int iRCCE_test_tagged(RCCE_FLAG, RCCE_FLAG_STATUS, int *, void *, int);
int iRCCE_get_max_tagged_len(void);
#endif
//
// Functions for handling Atomic Increment Registers (AIR):
#ifdef AIR
int iRCCE_atomic_alloc(iRCCE_AIR **);
int iRCCE_atomic_inc(iRCCE_AIR*, int*);
int iRCCE_atomic_read(iRCCE_AIR*, int*);
int iRCCE_atomic_write(iRCCE_AIR*, int);
#endif
//
// Improved Collectives:
int iRCCE_barrier(RCCE_COMM*);
int iRCCE_bcast(char *, size_t, int, RCCE_COMM);
int iRCCE_mcast(char *, size_t, int);
int iRCCE_msend(char *, ssize_t);
int iRCCE_mrecv(char *, ssize_t, int);
//
// Functions form the GORY RCCE interface mapped to iRCCE:
t_vcharp iRCCE_malloc(size_t);
int iRCCE_flag_alloc(RCCE_FLAG *);
int iRCCE_flag_write(RCCE_FLAG *, RCCE_FLAG_STATUS, int);
int iRCCE_flag_read(RCCE_FLAG, RCCE_FLAG_STATUS *, int);
int iRCCE_wait_until(RCCE_FLAG, RCCE_FLAG_STATUS);
//
// Please Note: Since we're running in NON-GORY mode, there are no "free()" functions!
//
///////////////////////////////////////////////////////////////
//
// Just for convenience:
#if 1
#define RCCE_isend iRCCE_isend
#define RCCE_isend_test iRCCE_isend_test
#define RCCE_isend_wait iRCCE_isend_wait
#define RCCE_isend_push iRCCE_isend_push
#define RCCE_irecv iRCCE_irecv
#define RCCE_irecv_test iRCCE_irecv_test
#define RCCE_irecv_wait iRCCE_irecv_wait
#define RCCE_irecv_push iRCCE_irecv_push
#define RCCE_SEND_REQUEST iRCCE_SEND_REQUEST
#define RCCE_RECV_REQUEST iRCCE_RECV_REQUEST
#ifdef _iRCCE_TAGGED_FLAGS_
#define RCCE_flag_write_tagged iRCCE_flag_write_tagged
#define RCCE_flag_read_tagged iRCCE_flag_read_tagged
#define RCCE_wait_tagged iRCCE_wait_tagged
#define RCCE_test_tagged iRCCE_test_tagged
#define RCCE_flag_alloc_tagged iRCCE_flag_alloc_tagged
#define RCCE_flag_free_tagged iRCCE_flag_free_tagged
#endif
#endif
//
#if 1
#define iRCCE_send iRCCE_ssend
#define iRCCE_recv iRCCE_srecv
#define iRCCE_recv_test iRCCE_srecv_test
#endif
//
#if 1
#define iRCCE_issend_test iRCCE_isend_test
#define iRCCE_issend_wait iRCCE_isend_wait
#define iRCCE_issend_push iRCCE_isend_push
#define iRCCE_isrecv_test iRCCE_irecv_test
#define iRCCE_isrecv_wait iRCCE_irecv_wait
#define iRCCE_isrecv_push iRCCE_irecv_push
#endif
//
///////////////////////////////////////////////////////////////
#endif

View file

@ -0,0 +1,195 @@
//***************************************************************************************
// Administrative routines.
//***************************************************************************************
//
// Author: Rob F. Van der Wijngaart
// Intel Corporation
// Date: 008/30/2010
//
//***************************************************************************************
//
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// [2010-10-25] added support for non-blocking send/recv operations
// - iRCCE_isend(), ..._test(), ..._wait(), ..._push()
// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push()
// by Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
// [2010-11-12] extracted non-blocking code into separate library
// by Carsten Scholtes
//
// [2011-02-21] added support for multiple incoming queues
// (one recv queue per remote rank)
//
// [2011-04-19] added wildcard mechanism (iRCCE_ANY_SOURCE) for receiving
// a message from an arbitrary remote rank
// by Simon Pickartz, Chair for Operating Systems,
// RWTH Aachen University
//
// [2011-06-27] merged iRCCE_ANY_SOURCE branch with trunk (iRCCE_ANY_LENGTH)
//
#include "RCCE.h"
#if defined(SCC) && !defined(__hermit__)
#include "SCC_API.h"
#endif
#include "iRCCE_lib.h"
// send request queue
iRCCE_SEND_REQUEST* iRCCE_isend_queue;
// recv request queue
iRCCE_RECV_REQUEST* iRCCE_irecv_queue[RCCE_MAXNP];
// recv request queue for those with source = iRCCE_ANY_SOURCE
iRCCE_RECV_REQUEST* iRCCE_irecv_any_source_queue;
// global variables for for inquiring recent source rank and recent message length
int iRCCE_recent_source = -1;
int iRCCE_recent_length = 0;
#ifdef _iRCCE_ANY_LENGTH_
const int iRCCE_ANY_LENGTH = -1 >> 1;
#endif
const int iRCCE_ANY_SOURCE = -1;
#ifdef AIR
iRCCE_AIR iRCCE_atomic_inc_regs[2*RCCE_MAXNP];
int iRCCE_atomic_alloc_counter = 0;
iRCCE_AIR* iRCCE_atomic_barrier[2];
#endif
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_init
//--------------------------------------------------------------------------------------
// initialize the library
//--------------------------------------------------------------------------------------
int iRCCE_init(void)
{
int i;
#ifdef AIR
#ifndef _OPENMP
int * air_base = (int *) MallocConfigReg(FPGA_BASE + 0xE000);
#endif
#endif
for(i=0; i<RCCE_MAXNP; i++) {
iRCCE_irecv_queue[i] = NULL;
}
iRCCE_isend_queue = NULL;
iRCCE_irecv_any_source_queue = NULL;
#ifdef AIR
#ifndef _OPENMP
// Assign and Initialize First Set of Atomic Increment Registers
for (i = 0; i < RCCE_MAXNP; i++)
{
iRCCE_atomic_inc_regs[i].counter = air_base + 2*i;
iRCCE_atomic_inc_regs[i].init = air_base + 2*i + 1;
if(RCCE_IAM == 0)
*iRCCE_atomic_inc_regs[i].init = 0;
}
// Assign and Initialize Second Set of Atomic Increment Registers
air_base = (int *) MallocConfigReg(FPGA_BASE + 0xF000);
for (i = 0; i < RCCE_MAXNP; i++)
{
iRCCE_atomic_inc_regs[RCCE_MAXNP+i].counter = air_base + 2*i;
iRCCE_atomic_inc_regs[RCCE_MAXNP+i].init = air_base + 2*i + 1;
if(RCCE_IAM == 0)
*iRCCE_atomic_inc_regs[RCCE_MAXNP+i].init = 0;
}
#endif
// We need two AIRs for iRCCE_barrier();
iRCCE_atomic_alloc(&iRCCE_atomic_barrier[0]);
iRCCE_atomic_alloc(&iRCCE_atomic_barrier[1]);
#endif
RCCE_barrier(&RCCE_COMM_WORLD);
return (iRCCE_SUCCESS);
}
//--------------------------------------------------------------------------------------
// Functions form the GORY RCCE interface:
//--------------------------------------------------------------------------------------
// ... (more or less) just wrapped by respective iRCCE functions
//--------------------------------------------------------------------------------------
t_vcharp iRCCE_malloc(size_t size)
{
t_vcharp result;
int count;
// new flag takes exactly one cache line, whether using single bit flags or not
if (size % RCCE_LINE_SIZE != 0) return NULL;
// if chunk size becomes zero, we have allocated too many flags
if (size > RCCE_chunk) return NULL;
result = RCCE_flags_start;
// reduce maximum size of message payload chunk
RCCE_chunk -= size;
// move running pointer to next available flags line
RCCE_flags_start += size;
// move running pointer to new start of payload data area
RCCE_buff_ptr += size;
return result;
}
int iRCCE_flag_alloc(RCCE_FLAG *flag)
{
#if !defined(SINGLEBITFLAGS)
return iRCCE_flag_alloc_tagged(flag);
#else
return RCCE_flag_alloc(flag);
#endif
}
int iRCCE_flag_write(RCCE_FLAG *flag, RCCE_FLAG_STATUS val, int ID)
{
#if !defined(SINGLEBITFLAGS)
return iRCCE_flag_write_tagged(flag, val, ID, NULL, 0);
#else
return RCCE_flag_write(flag, val, ID);
#endif
}
int iRCCE_flag_read(RCCE_FLAG flag, RCCE_FLAG_STATUS *val, int ID)
{
#if !defined(SINGLEBITFLAGS)
return iRCCE_flag_read_tagged(flag, val, ID, NULL, 0);
#else
return RCCE_flag_read(flag, val, ID);
#endif
}
int iRCCE_wait_until(RCCE_FLAG flag, RCCE_FLAG_STATUS val)
{
#if !defined(SINGLEBITFLAGS)
return iRCCE_wait_tagged(flag, val, NULL, 0);
#else
return iRCCE_wait_until(flag, val);
#endif
}

View file

@ -0,0 +1,195 @@
//***************************************************************************************
// Functions for handling Atomic Increment Registers (AIR).
//***************************************************************************************
//
// Copyright 2012, Chair for Operating Systems, RWTH Aachen University
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "iRCCE_lib.h"
#ifdef AIR
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_atomic_alloc
//--------------------------------------------------------------------------------------
// Allocates a new AIR register; returns iRCCE_ERRO if all AIRs are already allocated
//--------------------------------------------------------------------------------------
int iRCCE_atomic_alloc(iRCCE_AIR** reg)
{
if(iRCCE_atomic_alloc_counter < 2 * RCCE_NP) {
int next_reg = RC_COREID[iRCCE_atomic_alloc_counter];
if(iRCCE_atomic_alloc_counter > RCCE_NP) next_reg += RCCE_MAXNP;
(*reg) = &iRCCE_atomic_inc_regs[next_reg];
#ifdef _OPENMP
#pragma omp master
{
iRCCE_atomic_alloc_counter++;
}
#pragma omp barrier
#else
iRCCE_atomic_alloc_counter++;
#endif
iRCCE_atomic_write((*reg), 0);
return iRCCE_SUCCESS;
}
else {
return iRCCE_ERROR;
}
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_atomic_inc
//--------------------------------------------------------------------------------------
// Increments an AIR register and returns its privious content
//--------------------------------------------------------------------------------------
int iRCCE_atomic_inc(iRCCE_AIR* reg, int* value)
{
int _value;
if(value == NULL) value = &value;
#ifndef _OPENMP
(*value) = (*reg->counter);
#else
#pragma omp critical
{
(*value) = reg->counter;
reg->counter++;
reg->init = reg->counter;
}
#endif
return iRCCE_SUCCESS;
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_atomic_read
//--------------------------------------------------------------------------------------
// Returns the current value of an AIR register
//--------------------------------------------------------------------------------------
int iRCCE_atomic_read(iRCCE_AIR* reg, int* value)
{
#ifndef _OPENMP
(*value) = (*reg->init);
#else
#pragma omp critical
{
(*value) =reg->init;
}
#endif
return iRCCE_SUCCESS;
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_atomic_write
//--------------------------------------------------------------------------------------
// Initializes an AIR register by writing a start value
//--------------------------------------------------------------------------------------
int iRCCE_atomic_write(iRCCE_AIR* reg, int value)
{
#ifndef _OPENMP
(*reg->init) = value;
#else
#pragma omp critical
{
reg->init = value;
reg->counter = value;
}
#endif
return iRCCE_SUCCESS;
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_barrier
//--------------------------------------------------------------------------------------
// A barrier version based on the Atomic Increment Registers (AIR); if AIRs are not
// supported, the function makes a fall-back to the common RCCE_barrier().
//--------------------------------------------------------------------------------------
static void RC_wait(int wait) {
#ifndef _OPENMP
asm volatile( "movl %%eax,%%ecx\n\t"
"test:nop\n\t"
"loop test"
: /* no output registers */
: "a" (wait)
: "%ecx" );
#endif
return;
}
static int idx = 0;
static unsigned int rnd = 0;
#ifdef _OPENMP
#pragma omp threadprivate (idx, rnd)
#endif
int iRCCE_barrier(RCCE_COMM *comm)
{
int backoff = BACKOFF_MIN, wait, i = 0;
int counter;
if(comm == NULL) comm = &RCCE_COMM_WORLD;
if (comm == &RCCE_COMM_WORLD) {
iRCCE_atomic_inc(iRCCE_atomic_barrier[idx], &counter);
if (counter < (comm->size-1))
{
iRCCE_atomic_read(iRCCE_atomic_barrier[idx], &counter);
while (counter > 0)
{
rnd = rnd * 1103515245u + 12345u;
wait = BACKOFF_MIN + (rnd % (backoff << i));
RC_wait(wait);
if (wait < BACKOFF_MAX) i++;
iRCCE_atomic_read(iRCCE_atomic_barrier[idx], &counter);
}
}
else
{
iRCCE_atomic_write(iRCCE_atomic_barrier[idx], 0);
}
idx = !idx;
return(RCCE_SUCCESS);
}
else
{
return RCCE_barrier(comm);
}
}
#else // !AIR
int iRCCE_barrier(RCCE_COMM *comm)
{
if(comm == NULL) return RCCE_barrier(&RCCE_COMM_WORLD);
else return RCCE_barrier(comm);
}
#endif // !AIR

View file

@ -0,0 +1,78 @@
//***************************************************************************************
// Get data from communication buffer.
//***************************************************************************************
//
// Author: Rob F. Van der Wijngaart
// Intel Corporation
// Date: 008/30/2010
//
//***************************************************************************************
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// [2010-11-03] switched to SCC-optimized memcpy() functions in scc_memcpy.h:
// - memcpy_to_mpb()
// - memcpy_from_mpb()
// by Stefan Lankes, Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
#include "iRCCE_lib.h"
#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__)
#include "scc_memcpy.h"
#endif
void* iRCCE_memcpy_get(void *dest, const void *src, size_t count)
{
#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__)
return memcpy_from_mpb(dest, src, count);
#else
return memcpy(dest, src, count);
#endif
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_get
//--------------------------------------------------------------------------------------
// copy data from address "source" in the remote MPB to address "target" in either the
// local MPB, or in the calling UE's private memory. We do not test to see if a move
// into the calling UE's private memory stays within allocated memory *
//--------------------------------------------------------------------------------------
int iRCCE_get(
t_vcharp target, // target buffer, MPB or private memory
t_vcharp source, // source buffer, MPB
int num_bytes, // number of bytes to copy (must be multiple of cache line size
int ID // rank of source UE
) {
// in non-GORY mode we only need to retain the MPB source shift; we
// already know the source is in the MPB, not private memory
source = RCCE_comm_buffer[ID]+(source-RCCE_comm_buffer[RCCE_IAM]);
// do the actual copy, making sure we copy fresh data
#ifdef _OPENMP
#pragma omp flush
#endif
RC_cache_invalidate();
iRCCE_memcpy_get((void *)target, (void *)source, num_bytes);
// flush data to make sure it is visible to all threads; cannot use a flush list
// because it concerns malloced space
#ifdef _OPENMP
#pragma omp flush
#endif
return(iRCCE_SUCCESS);
}

View file

@ -0,0 +1,709 @@
//***************************************************************************************
// Synchronized receive routines.
//***************************************************************************************
//
// Author: Rob F. Van der Wijngaart
// Intel Corporation
// Date: 008/30/2010
//
//***************************************************************************************
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// [2010-10-25] added support for non-blocking send/recv operations
// - iRCCE_isend(), ..._test(), ..._wait(), ..._push()
// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push()
// by Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
// [2010-11-12] extracted non-blocking code into separate library
// by Carsten Scholtes
//
// [2010-12-09] added cancel functions for non-blocking send/recv requests
// by Carsten Clauss
//
// [2011-02-21] added support for multiple incoming queues
// (one recv queue per remote rank)
//
// [2011-04-19] added wildcard mechanism (iRCCE_ANY_SOURCE) for receiving
// a message from an arbitrary remote rank
// by Simon Pickartz, Chair for Operating Systems,
// RWTH Aachen University
//
// [2011-06-27] merged iRCCE_ANY_SOURCE branch with trunk (iRCCE_ANY_LENGTH)
//
// [2011-08-02] added iRCCE_iprobe() function for probing for incomming messages
//
// [2011-11-03] added non-blocking by synchronous send/recv functions:
// iRCCE_issend() / iRCCE_isrecv()
//
#include "iRCCE_lib.h"
#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__)
#include "scc_memcpy.h"
#else
#define memcpy_scc memcpy
#endif
#ifdef SINGLEBITFLAGS
#warning iRCCE_ANY_LENGTH: for using this wildcard, SINGLEBITFLAGS must be disabled! (make SINGLEBITFLAGS=0)
#endif
#ifdef RCCE_VERSION
#warning iRCCE_ANY_LENGTH: for using this wildcard, iRCCE must be built against RCCE release V1.0.13!
#endif
static int iRCCE_push_recv_request(iRCCE_RECV_REQUEST *request) {
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
int test; // flag for calling iRCCE_test_flag()
if(request->finished) return(iRCCE_SUCCESS);
if(request->sync) return iRCCE_push_srecv_request(request);
if(request->label == 1) goto label1;
if(request->label == 2) goto label2;
if(request->label == 3) goto label3;
#ifdef _iRCCE_ANY_LENGTH_
RCCE_flag_read(*(request->sent), &(request->flag_set_value), RCCE_IAM);
if(request->flag_set_value == 0) {
return(iRCCE_PENDING);
}
request->size = (size_t)request->flag_set_value;
#endif
// receive data in units of available chunk size of MPB
for (; request->wsize < (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) {
request->bufptr = request->privbuf + request->wsize;
request->nbytes = request->chunk;
label1:
iRCCE_test_flag(*(request->sent), request->flag_set_value, &test);
if(!test) {
request->label = 1;
return(iRCCE_PENDING);
}
request->started = 1;
RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM);
// copy data from source's MPB space to private memory
iRCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, request->source);
// tell the source I have moved data out of its comm buffer
RCCE_flag_write(request->ready, request->flag_set_value, request->source);
}
request->remainder = request->size % request->chunk;
// if nothing is left over, we are done
if (!request->remainder) {
if(iRCCE_recent_source != request->source) iRCCE_recent_source = request->source;
if(iRCCE_recent_length != request->size) iRCCE_recent_length = request->size;
request->finished = 1;
return(iRCCE_SUCCESS);
}
// receive remainder of data--whole cache lines
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk;
request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE;
if (request->nbytes) {
label2:
iRCCE_test_flag(*(request->sent), request->flag_set_value, &test);
if(!test) {
request->label = 2;
return(iRCCE_PENDING);
}
request->started = 1;
RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM);
// copy data from source's MPB space to private memory
iRCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, request->source);
// tell the source I have moved data out of its comm buffer
RCCE_flag_write(request->ready, request->flag_set_value, request->source);
}
request->remainder = request->size % request->chunk;
request->remainder = request->remainder % RCCE_LINE_SIZE;
if (!request->remainder) {
if(iRCCE_recent_source != request->source) iRCCE_recent_source = request->source;
if(iRCCE_recent_length != request->size) iRCCE_recent_length = request->size;
request->finished = 1;
return(iRCCE_SUCCESS);
}
// remainder is less than cache line. This must be copied into appropriately sized
// intermediate space before exact number of bytes get copied to the final destination
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes;
request->nbytes = RCCE_LINE_SIZE;
label3:
iRCCE_test_flag(*(request->sent), request->flag_set_value, &test);
if(!test) {
request->label = 3;
return(iRCCE_PENDING);
}
request->started = 1;
RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM);
// copy data from source's MPB space to private memory
iRCCE_get((t_vcharp)padline, request->combuf, request->nbytes, request->source);
memcpy_scc(request->bufptr,padline,request->remainder);
// tell the source I have moved data out of its comm buffer
RCCE_flag_write(request->ready, request->flag_set_value, request->source);
if(iRCCE_recent_source != request->source) iRCCE_recent_source = request->source;
if(iRCCE_recent_length != request->size) iRCCE_recent_length = request->size;
request->finished = 1;
return(iRCCE_SUCCESS);
}
static void iRCCE_init_recv_request(
char *privbuf, // source buffer in local private memory (send buffer)
t_vcharp combuf, // intermediate buffer in MPB
size_t chunk, // size of MPB available for this message (bytes)
RCCE_FLAG *ready, // flag indicating whether receiver is ready
RCCE_FLAG *sent, // flag indicating whether message has been sent by source
size_t size, // size of message (bytes)
int source, // UE that will send the message
int sync, // flag indicating whether recv is synchronous or not
iRCCE_RECV_REQUEST *request
) {
request->privbuf = privbuf;
request->combuf = combuf;
request->chunk = chunk;
request->ready = ready;
request->sent = sent;
request->size = size;
request->source = source;
request->sync = sync;
request->subchunk1 = chunk / 2;
request->subchunk1 = ( (chunk / 2) / RCCE_LINE_SIZE ) * RCCE_LINE_SIZE;
request->subchunk2 = chunk - request->subchunk1;
request->wsize = 0;
request->remainder = 0;
request->nbytes = 0;
request->bufptr = NULL;
request->label = 0;
request->finished = 0;
request->started = 0;
request->next = NULL;
#ifndef _iRCCE_ANY_LENGTH_
request->flag_set_value = RCCE_FLAG_SET;
#else
request->flag_set_value = (RCCE_FLAG_STATUS)size;
#endif
return;
}
static int iRCCE_irecv_search_source() {
int i, j;
int res = iRCCE_ANY_SOURCE;
for( i=0; i<RCCE_NP*3; ++i ){
j =i%RCCE_NP;
if ( j == RCCE_IAM )
continue;
// only take source if recv-queue is empty
if(!iRCCE_irecv_queue[j]) {
int test;
iRCCE_test_flag(RCCE_sent_flag[j], 0, &test);
if(!test) {
res = j;
break;
}
}
}
return res;
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_irecv
//--------------------------------------------------------------------------------------
// non-blocking recv function; returns an handle of type iRCCE_RECV_REQUEST
//--------------------------------------------------------------------------------------
static iRCCE_RECV_REQUEST blocking_irecv_request;
#ifdef _OPENMP
#pragma omp threadprivate (blocking_irecv_request)
#endif
inline static int iRCCE_irecv_generic(char *privbuf, ssize_t size, int source, iRCCE_RECV_REQUEST *request, int sync) {
if(request == NULL){
request = &blocking_irecv_request;
// find source (blocking)
if( source == iRCCE_ANY_SOURCE ){
int i;
for( i=0;;i=(i+1)%RCCE_NP ){
if( (!iRCCE_irecv_queue[i]) && (i != RCCE_IAM) ) {
int test;
iRCCE_test_flag(RCCE_sent_flag[i], 0, &test);
if(!test) {
source = i;
break;
}
}
}
}
}
if(size == 0) {
if(sync) {
// just synchronize:
size = 1;
privbuf = (char*)&size;
} else
size = -1;
}
if(size <= 0) {
#ifdef _iRCCE_ANY_LENGTH_
if(size != iRCCE_ANY_LENGTH)
#endif
{
iRCCE_init_recv_request(privbuf, RCCE_buff_ptr, RCCE_chunk,
&RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source],
size, source, sync, request);
request->finished = 1;
return(iRCCE_SUCCESS);
}
}
if( source == iRCCE_ANY_SOURCE ) {
source = iRCCE_irecv_search_source(); // first try to find a source
if( source == iRCCE_ANY_SOURCE ){ // queue request if no source available
iRCCE_init_recv_request(privbuf, RCCE_buff_ptr, RCCE_chunk,
&RCCE_ready_flag[RCCE_IAM], NULL,
size, iRCCE_ANY_SOURCE, sync, request);
// put anysource-request in irecv_any_source_queue
if( iRCCE_irecv_any_source_queue == NULL ){
iRCCE_irecv_any_source_queue = request;
}
else {
if( iRCCE_irecv_any_source_queue->next == NULL ) {
iRCCE_irecv_any_source_queue->next = request;
}
else {
iRCCE_RECV_REQUEST* run = iRCCE_irecv_any_source_queue;
while( run->next != NULL ) run = run->next;
run->next = request;
}
}
return iRCCE_RESERVED;
}
}
if (source<0 || source >= RCCE_NP)
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
else {
iRCCE_init_recv_request(privbuf, RCCE_buff_ptr, RCCE_chunk,
&RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source],
size, source, sync, request);
if(iRCCE_irecv_queue[source] == NULL) {
if(iRCCE_push_recv_request(request) == iRCCE_SUCCESS) {
return(iRCCE_SUCCESS);
}
else {
iRCCE_irecv_queue[source] = request;
if(request == &blocking_irecv_request) {
iRCCE_irecv_wait(request);
return(iRCCE_SUCCESS);
}
return(iRCCE_PENDING);
}
}
else {
if(iRCCE_irecv_queue[source]->next == NULL) {
iRCCE_irecv_queue[source]->next = request;
}
else {
iRCCE_RECV_REQUEST *run = iRCCE_irecv_queue[source];
while(run->next != NULL) run = run->next;
run->next = request;
}
if(request == &blocking_irecv_request) {
iRCCE_irecv_wait(request);
return(iRCCE_SUCCESS);
}
return(iRCCE_RESERVED);
}
}
}
int iRCCE_irecv(char *privbuf, ssize_t size, int dest, iRCCE_RECV_REQUEST *request) {
return iRCCE_irecv_generic(privbuf, size, dest, request, 0);
}
int iRCCE_isrecv(char *privbuf, ssize_t size, int dest, iRCCE_RECV_REQUEST *request) {
return iRCCE_irecv_generic(privbuf, size, dest, request, 1);
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_probe
//--------------------------------------------------------------------------------------
// probe for incomming messages (non-blocking / does not receive)
//--------------------------------------------------------------------------------------
int iRCCE_iprobe(int source, int* test_rank, int* test_flag)
{
// determine source of request if given source = iRCCE_ANY_SOURCE
if( source == iRCCE_ANY_SOURCE ) {
source = iRCCE_irecv_search_source(); // first try to find a source
}
else {
int res;
iRCCE_test_flag(RCCE_sent_flag[source], RCCE_FLAG_SET, &res);
if(!res) source = iRCCE_ANY_SOURCE;
}
if(source != iRCCE_ANY_SOURCE) { // message found:
if (test_rank != NULL) (*test_rank) = source;
if (test_flag != NULL) (*test_flag) = 1;
#ifdef _iRCCE_ANY_LENGTH_
{
int size = iRCCE_ANY_LENGTH;
RCCE_flag_read(RCCE_sent_flag[source], &size, RCCE_IAM);
if(iRCCE_recent_length != size) iRCCE_recent_length = size;
}
#endif
if(iRCCE_recent_source != source) iRCCE_recent_source = source;
}
else {
if (test_rank != NULL) (*test_rank) = iRCCE_ANY_SOURCE;
if (test_flag != NULL) (*test_flag) = 0;
}
return iRCCE_SUCCESS;
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_irecv_test
//--------------------------------------------------------------------------------------
// test function for completion of the requestes non-blocking recv operation
// Just provide NULL instead of the testvar if you don't need it
//--------------------------------------------------------------------------------------
int iRCCE_irecv_test(iRCCE_RECV_REQUEST *request, int *test) {
int source;
if(request == NULL) {
if(iRCCE_irecv_push() == iRCCE_SUCCESS) {
if (test) (*test) = 1;
return(iRCCE_SUCCESS);
}
else {
if (test) (*test) = 0;
return(iRCCE_PENDING);
}
}
// does request still have no source?
if( request->source == iRCCE_ANY_SOURCE ) {
request->source = iRCCE_irecv_search_source();
if( request->source == iRCCE_ANY_SOURCE ) {
if (test) (*test) = 0;
return iRCCE_RESERVED;
}
else { // take request out of wait_any_source-list
// find request in queue
if( request == iRCCE_irecv_any_source_queue ) {
iRCCE_irecv_any_source_queue = iRCCE_irecv_any_source_queue->next;
}
else {
iRCCE_RECV_REQUEST* run = iRCCE_irecv_any_source_queue;
while( run->next != request ) run = run->next;
run->next = request->next;
}
request->next = NULL;
request->sent = &RCCE_sent_flag[request->source]; // set senders flag
source = request->source;
// queue request in iRCCE_irecv_queue
if(iRCCE_irecv_queue[source] == NULL) {
if(iRCCE_push_recv_request(request) == iRCCE_SUCCESS) {
if (test) (*test) = 1;
return(iRCCE_SUCCESS);
}
else {
iRCCE_irecv_queue[source] = request;
if(request == &blocking_irecv_request) {
iRCCE_irecv_wait(request);
if (test) (*test) = 1;
return(iRCCE_SUCCESS);
}
if (test) (*test) = 0;
return(iRCCE_PENDING);
}
}
else {
if(iRCCE_irecv_queue[source]->next == NULL) {
iRCCE_irecv_queue[source]->next = request;
}
else {
iRCCE_RECV_REQUEST *run = iRCCE_irecv_queue[source];
while(run->next != NULL) run = run->next;
run->next = request;
}
if(request == &blocking_irecv_request) {
iRCCE_irecv_wait(request);
if (test) (*test) = 1;
return(iRCCE_SUCCESS);
}
if (test) (*test) = 1;
return(iRCCE_RESERVED);
}
}
}
else {
source = request->source;
if(request->finished) {
if (test) (*test) = 1;
return(iRCCE_SUCCESS);
}
if(iRCCE_irecv_queue[source] != request) {
if (test) (*test) = 0;
return(iRCCE_RESERVED);
}
iRCCE_push_recv_request(request);
if(request->finished) {
iRCCE_irecv_queue[source] = request->next;
if (test) (*test) = 1;
return(iRCCE_SUCCESS);
}
if (test) (*test) = 0;
return(iRCCE_PENDING);
}
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_irecv_push
//--------------------------------------------------------------------------------------
// progress function for pending requests in the irecv queue
//--------------------------------------------------------------------------------------
static int iRCCE_irecv_push_source(int source) {
iRCCE_RECV_REQUEST *request = iRCCE_irecv_queue[source];
if(request == NULL) {
return(iRCCE_SUCCESS);
}
if(request->finished) {
return(iRCCE_SUCCESS);
}
iRCCE_push_recv_request(request);
if(request->finished) {
iRCCE_irecv_queue[source] = request->next;
return(iRCCE_SUCCESS);
}
return(iRCCE_PENDING);
}
int iRCCE_irecv_push(void) {
iRCCE_RECV_REQUEST* help_request;
// first check sourceless requests
if( iRCCE_irecv_any_source_queue != NULL) {
while( iRCCE_irecv_any_source_queue != NULL ) {
iRCCE_irecv_any_source_queue->source = iRCCE_irecv_search_source();
if( iRCCE_irecv_any_source_queue->source == iRCCE_ANY_SOURCE ) {
break;
}
// source found for first request in iRCCE_irecv_any_source_queue
else {
// set senders flag
iRCCE_irecv_any_source_queue->sent = &RCCE_sent_flag[iRCCE_irecv_any_source_queue->source];
// take request out of irecv_any_source_queue
help_request = iRCCE_irecv_any_source_queue;
iRCCE_irecv_any_source_queue = iRCCE_irecv_any_source_queue->next;
help_request->next = NULL;
// put request into irecv_queue
if(iRCCE_irecv_queue[help_request->source] == NULL) {
iRCCE_irecv_queue[help_request->source] = help_request;
}
else {
iRCCE_RECV_REQUEST *run = iRCCE_irecv_queue[help_request->source];
while(run->next != NULL) run = run->next;
run->next = help_request;
}
}
}
}
int i, j;
int retval = iRCCE_SUCCESS;
for(i=0; i<RCCE_NP; i++) {
j = iRCCE_irecv_push_source(i);
if(j != iRCCE_SUCCESS) {
retval = j;
}
}
return (iRCCE_irecv_any_source_queue == NULL)? retval : iRCCE_RESERVED;
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_irecv_wait
//--------------------------------------------------------------------------------------
// just wait for completion of the requested non-blocking send operation
//--------------------------------------------------------------------------------------
int iRCCE_irecv_wait(iRCCE_RECV_REQUEST *request) {
if(request != NULL) {
while(!request->finished) {
iRCCE_irecv_push();
iRCCE_isend_push();
}
}
else {
do {
iRCCE_isend_push();
}
while( iRCCE_irecv_push() != iRCCE_SUCCESS );
}
return(iRCCE_SUCCESS);
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_irecv_cancel
//--------------------------------------------------------------------------------------
// try to cancel a pending non-blocking recv request
//--------------------------------------------------------------------------------------
int iRCCE_irecv_cancel(iRCCE_RECV_REQUEST *request, int *test) {
int source;
iRCCE_RECV_REQUEST *run;
if( (request == NULL) || (request->finished) ) {
if (test) (*test) = 0;
return iRCCE_NOT_ENQUEUED;
}
// does request have any source specified?
if( request->source == iRCCE_ANY_SOURCE ) {
for( run = iRCCE_irecv_any_source_queue; run->next != NULL; run = run->next ) {
if( run->next == request ) {
run->next = run->next->next;
if (test) (*test) = 1;
return iRCCE_SUCCESS;
}
}
if (test) (*test) = 0;
return iRCCE_NOT_ENQUEUED;
}
source = request->source;
if(iRCCE_irecv_queue[source] == NULL) {
if (test) (*test) = 0;
return iRCCE_NOT_ENQUEUED;
}
if(iRCCE_irecv_queue[source] == request) {
// have parts of the message already been received?
if(request->started) {
if (test) (*test) = 0;
return iRCCE_PENDING;
}
else {
// no, thus request can be canceld just in time:
iRCCE_irecv_queue[source] = request->next;
if (test) (*test) = 1;
return iRCCE_SUCCESS;
}
}
for(run = iRCCE_irecv_queue[source]; run->next != NULL; run = run->next) {
// request found --> remove it from recv queue:
if(run->next == request) {
run->next = run->next->next;
if (test) (*test) = 1;
return iRCCE_SUCCESS;
}
}
if (test) (*test) = 0;
return iRCCE_NOT_ENQUEUED;
}

View file

@ -0,0 +1,411 @@
//***************************************************************************************
// Non-blocking send routines.
//***************************************************************************************
//
// Author: Rob F. Van der Wijngaart
// Intel Corporation
// Date: 008/30/2010
//
//***************************************************************************************
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// [2010-10-25] added support for non-blocking send/recv operations
// - iRCCE_isend(), ..._test(), ..._wait(), ..._push()
// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push()
// by Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
// [2010-11-12] extracted non-blocking code into separate library
// by Carsten Scholtes
//
// [2010-12-09] added cancel functions for non-blocking send/recv requests
// by Carsten Clauss
//
// [2011-11-03] added non-blocking by synchronous send/recv functions:
// iRCCE_issend() / iRCCE_isrecv()
//
#ifdef GORY
#error iRCCE _cannot_ be built in GORY mode!
#endif
#include "iRCCE_lib.h"
#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__)
#include "scc_memcpy.h"
#else
#define memcpy_scc memcpy
#endif
static int iRCCE_push_send_request(iRCCE_SEND_REQUEST *request) {
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
int test; // flag for calling iRCCE_test_flag()
if(request->finished) return(iRCCE_SUCCESS);
if(request->sync) return iRCCE_push_ssend_request(request);
if(request->label == 1) goto label1;
if(request->label == 2) goto label2;
if(request->label == 3) goto label3;
// send data in units of available chunk size of comm buffer
for (; request->wsize< (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) {
request->bufptr = request->privbuf + request->wsize;
request->nbytes = request->chunk;
// copy private data to own comm buffer
iRCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, RCCE_IAM);
RCCE_flag_write(request->sent, request->flag_set_value, request->dest);
// wait for the destination to be ready to receive a message
label1:
iRCCE_test_flag(*(request->ready), request->flag_set_value, &test);
if(!test) {
request->label = 1;
return(iRCCE_PENDING);
}
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
}
request->remainder = request->size % request->chunk;
// if nothing is left over, we are done
if (!request->remainder) {
request->finished = 1;
return(iRCCE_SUCCESS);
}
// send remainder of data--whole cache lines
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk;
request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE;
if (request->nbytes) {
// copy private data to own comm buffer
iRCCE_put(request->combuf, (t_vcharp)request->bufptr, request->nbytes, RCCE_IAM);
RCCE_flag_write(request->sent, request->flag_set_value, request->dest);
// wait for the destination to be ready to receive a message
label2:
iRCCE_test_flag(*(request->ready), request->flag_set_value, &test);
if(!test) {
request->label = 2;
return(iRCCE_PENDING);
}
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
}
request->remainder = request->size % request->chunk;
request->remainder = request->remainder%RCCE_LINE_SIZE;
// if nothing is left over, we are done
if (!request->remainder)
{
request->finished = 1;
return(iRCCE_SUCCESS);
}
// remainder is less than a cache line. This must be copied into appropriately sized
// intermediate space before it can be sent to the receiver
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes;
request->nbytes = RCCE_LINE_SIZE;
// copy private data to own comm buffer
memcpy_scc(padline,request->bufptr,request->remainder);
iRCCE_put(request->combuf, (t_vcharp)padline, request->nbytes, RCCE_IAM);
RCCE_flag_write(request->sent, request->flag_set_value, request->dest);
// wait for the destination to be ready to receive a message
label3:
iRCCE_test_flag(*(request->ready), request->flag_set_value, &test);
if(!test) {
request->label = 3;
return(iRCCE_PENDING);
}
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
request->finished = 1;
return(iRCCE_SUCCESS);
}
static void iRCCE_init_send_request(
char *privbuf, // source buffer in local private memory (send buffer)
t_vcharp combuf, // intermediate buffer in MPB
size_t chunk, // size of MPB available for this message (bytes)
RCCE_FLAG *ready, // flag indicating whether receiver is ready
RCCE_FLAG *sent, // flag indicating whether message has been sent by source
size_t size, // size of message (bytes)
int dest, // UE that will receive the message
int sync, // flag indicating whether send is synchronous or not
iRCCE_SEND_REQUEST *request
) {
request->privbuf = privbuf;
request->combuf = combuf;
request->chunk = chunk;
request->ready = ready;
request->sent = sent;
request->size = size;
request->dest = dest;
request->sync = sync;
request->subchunk1 = ( (chunk / 2) / RCCE_LINE_SIZE ) * RCCE_LINE_SIZE;
request->subchunk2 = chunk - request->subchunk1;
request->wsize = 0;
request->remainder = 0;
request->nbytes = 0;
request->bufptr = NULL;
request->label = 0;
request->finished = 0;
request->next = NULL;
#ifndef _iRCCE_ANY_LENGTH_
request->flag_set_value = RCCE_FLAG_SET;
#else
request->flag_set_value = (RCCE_FLAG_STATUS)size;
#endif
return;
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_isend
//--------------------------------------------------------------------------------------
// non-blocking send function; returns a handle of type iRCCE_SEND_REQUEST
//--------------------------------------------------------------------------------------
static iRCCE_SEND_REQUEST blocking_isend_request;
#ifdef _OPENMP
#pragma omp threadprivate (blocking_isend_request)
#endif
inline static int iRCCE_isend_generic(char *privbuf, ssize_t size, int dest, iRCCE_SEND_REQUEST *request, int sync) {
if(request == NULL) request = &blocking_isend_request;
if(size == 0) {
if(sync) {
// just synchronize:
size = 1;
privbuf = (char*)&size;
} else
size = -1;
}
if(size < 0) {
iRCCE_init_send_request(privbuf, RCCE_buff_ptr, RCCE_chunk,
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
size, dest, sync, request);
request->finished = 1;
return(iRCCE_SUCCESS);
}
if (dest<0 || dest >= RCCE_NP)
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
else {
iRCCE_init_send_request(privbuf, RCCE_buff_ptr, RCCE_chunk,
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
size, dest, sync, request);
if(iRCCE_isend_queue == NULL) {
if(iRCCE_push_send_request(request) == iRCCE_SUCCESS) {
return(iRCCE_SUCCESS);
}
else {
iRCCE_isend_queue = request;
if(request == &blocking_isend_request) {
iRCCE_isend_wait(request);
return(iRCCE_SUCCESS);
}
return(iRCCE_PENDING);
}
}
else {
if(iRCCE_isend_queue->next == NULL) {
iRCCE_isend_queue->next = request;
}
else {
iRCCE_SEND_REQUEST *run = iRCCE_isend_queue;
while(run->next != NULL) run = run->next;
run->next = request;
}
if(request == &blocking_isend_request) {
iRCCE_isend_wait(request);
return(iRCCE_SUCCESS);
}
return(iRCCE_RESERVED);
}
}
}
int iRCCE_isend(char *privbuf, ssize_t size, int dest, iRCCE_SEND_REQUEST *request) {
return iRCCE_isend_generic(privbuf, size, dest, request, 0);
}
int iRCCE_issend(char *privbuf, ssize_t size, int dest, iRCCE_SEND_REQUEST *request) {
return iRCCE_isend_generic(privbuf, size, dest, request, 1);
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_isend_push
//--------------------------------------------------------------------------------------
// progress function for pending requests in the isend queue
//--------------------------------------------------------------------------------------
int iRCCE_isend_push(void) {
iRCCE_SEND_REQUEST *request = iRCCE_isend_queue;
if(request == NULL) {
return(iRCCE_SUCCESS);
}
if(request->finished) {
return(iRCCE_SUCCESS);
}
iRCCE_push_send_request(request);
if(request->finished) {
iRCCE_isend_queue = request->next;
return(iRCCE_SUCCESS);
}
return(iRCCE_PENDING);
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_isend_test
//--------------------------------------------------------------------------------------
// test function for completion of the requestes non-blocking send operation
// Just provide NULL instead of testvar if you don't need it
//--------------------------------------------------------------------------------------
int iRCCE_isend_test(iRCCE_SEND_REQUEST *request, int *test) {
if(request == NULL) {
iRCCE_isend_push();
if(iRCCE_isend_queue == NULL) {
if (test) (*test) = 1;
return(iRCCE_SUCCESS);
}
else {
if (test) (*test) = 0;
return(iRCCE_PENDING);
}
}
if(request->finished) {
if (test) (*test) = 1;
return(iRCCE_SUCCESS);
}
if(iRCCE_isend_queue != request) {
iRCCE_isend_push();
if(iRCCE_isend_queue != request) {
if (test) (*test) = 0;
return(iRCCE_RESERVED);
}
}
iRCCE_push_send_request(request);
if(request->finished) {
iRCCE_isend_queue = request->next;
if (test) (*test) = 1;
return(iRCCE_SUCCESS);
}
if (test) (*test) = 0;
return(iRCCE_PENDING);
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_isend_wait
//--------------------------------------------------------------------------------------
// just wait for completion of the requestes non-blocking send operation
//--------------------------------------------------------------------------------------
int iRCCE_isend_wait(iRCCE_SEND_REQUEST *request) {
if(request != NULL) {
while(!request->finished) {
iRCCE_isend_push();
iRCCE_irecv_push();
}
}
else {
while(iRCCE_isend_queue != NULL) {
iRCCE_isend_push();
iRCCE_irecv_push();
}
}
return(iRCCE_SUCCESS);
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_isend_cancel
//--------------------------------------------------------------------------------------
// try to cancel a pending non-blocking send request
//--------------------------------------------------------------------------------------
int iRCCE_isend_cancel(iRCCE_SEND_REQUEST *request, int *test) {
iRCCE_SEND_REQUEST *run;
if( (request == NULL) || (request->finished) ) {
if (test) (*test) = 0;
return iRCCE_NOT_ENQUEUED;
}
if(iRCCE_isend_queue == NULL) {
if (test) (*test) = 0;
return iRCCE_NOT_ENQUEUED;
}
if(iRCCE_isend_queue == request) {
if (test) (*test) = 0;
return iRCCE_PENDING;
}
for(run = iRCCE_isend_queue; run->next != NULL; run = run->next) {
// request found --> remove it from send queue:
if(run->next == request) {
run->next = run->next->next;
if (test) (*test) = 1;
return iRCCE_SUCCESS;
}
}
if (test) (*test) = 0;
return iRCCE_NOT_ENQUEUED;
}

View file

@ -0,0 +1,62 @@
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// [2010-10-25] added support for non-blocking send/recv operations
// - iRCCE_isend(), ..._test(), ..._wait(), ..._push()
// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push()
// by Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
// [2010-11-12] extracted non-blocking code into separate library
// by Carsten Scholtes
//
// [2011-04-19] added wildcard mechanism (iRCCE_ANY_SOURCE) for receiving
// a message from an arbitrary remote rank
// by Simon Pickartz, Chair for Operating Systems,
// RWTH Aachen University
//
// [2011-06-27] merged iRCCE_ANY_SOURCE branch with trunk (iRCCE_ANY_LENGTH)
//
#ifndef IRCCE_LIB_H
#define IRCCE_LIB_H
#include "RCCE_lib.h"
#include "iRCCE.h"
#ifdef AIR
#define FPGA_BASE 0xf9000000
#define BACKOFF_MIN 8
#define BACKOFF_MAX 256
extern iRCCE_AIR iRCCE_atomic_inc_regs[];
extern int iRCCE_atomic_alloc_counter;
extern iRCCE_AIR* iRCCE_atomic_barrier[2];
#endif
extern iRCCE_SEND_REQUEST* iRCCE_isend_queue;
extern iRCCE_RECV_REQUEST* iRCCE_irecv_queue[RCCE_MAXNP];
extern iRCCE_RECV_REQUEST* iRCCE_irecv_any_source_queue;
extern int iRCCE_recent_source;
extern int iRCCE_recent_length;
#ifdef _OPENMP
#pragma omp threadprivate (iRCCE_isend_queue, iRCCE_irecv_queue, iRCCE_irecv_any_source_queue, iRCCE_recent_source, iRCCE_recent_length)
#endif
int iRCCE_test_flag(RCCE_FLAG, RCCE_FLAG_STATUS, int *);
int iRCCE_push_ssend_request(iRCCE_SEND_REQUEST *request);
int iRCCE_push_srecv_request(iRCCE_RECV_REQUEST *request);
#endif

View file

@ -0,0 +1,289 @@
//***************************************************************************************
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// [2010-11-26] added xxx
// by Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
#include "iRCCE_lib.h"
#include <stdlib.h>
#include <string.h>
#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__)
#include "scc_memcpy.h"
#else
#define memcpy_scc memcpy
#endif
static int iRCCE_msend_general(
char *privbuf, // source buffer in local private memory (send buffer)
t_vcharp combuf, // intermediate buffer in MPB
size_t chunk, // size of MPB available for this message (bytes)
RCCE_FLAG *sent, // flag indicating whether message has been sent by source
ssize_t size // size of message (bytes)
) {
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
size_t wsize, // offset within send buffer when putting in "chunk" bytes
remainder, // bytes remaining to be sent
nbytes; // number of bytes to be sent in single iRCCE_put call
char *bufptr; // running pointer inside privbuf for current location
size_t subchunk1, subchunk2; // sub-chunks for the pipelined message transfer
int ue;
#ifndef _iRCCE_ANY_LENGTH_
#define FLAG_SET_VALUE RCCE_FLAG_SET
#else
RCCE_FLAG_STATUS FLAG_SET_VALUE = (RCCE_FLAG_STATUS)size;
#endif
// send data in units of available chunk size of comm buffer
for (wsize=0; wsize< (size/chunk)*chunk; wsize+=chunk) {
bufptr = privbuf + wsize;
nbytes = chunk;
// copy private data to own comm buffer
RCCE_put(combuf, (t_vcharp) bufptr, nbytes, RCCE_IAM);
for(ue=0; ue<RCCE_NP; ue++)
if(ue!=RCCE_IAM) RCCE_flag_write(sent, FLAG_SET_VALUE, ue);
iRCCE_barrier(NULL);
}
remainder = size%chunk;
// if nothing is left over, we are done
if (!remainder) return(iRCCE_SUCCESS);
// send remainder of data--whole cache lines
bufptr = privbuf + (size/chunk)*chunk;
nbytes = remainder - remainder%RCCE_LINE_SIZE;
if (nbytes) {
// copy private data to own comm buffer
iRCCE_put(combuf, (t_vcharp)bufptr, nbytes, RCCE_IAM);
for(ue=0; ue<RCCE_NP; ue++)
if(ue!=RCCE_IAM) RCCE_flag_write(sent, FLAG_SET_VALUE, ue);
iRCCE_barrier(NULL);
}
remainder = remainder%RCCE_LINE_SIZE;
if (!remainder) return(iRCCE_SUCCESS);
// remainder is less than a cache line. This must be copied into appropriately sized
// intermediate space before it can be sent to the receiver
bufptr = privbuf + (size/chunk)*chunk + nbytes;
nbytes = RCCE_LINE_SIZE;
// copy private data to own comm buffer
memcpy_scc(padline, bufptr, remainder);
iRCCE_put(combuf, (t_vcharp)padline, nbytes, RCCE_IAM);
for(ue=0; ue<RCCE_NP; ue++)
if(ue!=RCCE_IAM) RCCE_flag_write(sent, FLAG_SET_VALUE, ue);
iRCCE_barrier(NULL);
return(iRCCE_SUCCESS);
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_msend
//--------------------------------------------------------------------------------------
// pipelined multicast send function (blocking and synchronous!)
//--------------------------------------------------------------------------------------
int iRCCE_msend(char *privbuf, ssize_t size) {
if(size <= 0) return(iRCCE_SUCCESS);
while(iRCCE_isend_queue != NULL) {
// wait for completion of pending non-blocking requests
iRCCE_isend_push();
iRCCE_irecv_push();
}
return(iRCCE_msend_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
&RCCE_sent_flag[RCCE_IAM], size));
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_mrecv_general
//--------------------------------------------------------------------------------------
// multicast receive function
//--------------------------------------------------------------------------------------
static int iRCCE_mrecv_general(
char *privbuf, // destination buffer in local private memory (receive buffer)
t_vcharp combuf, // intermediate buffer in MPB
size_t chunk, // size of MPB available for this message (bytes)
RCCE_FLAG *sent, // flag indicating whether message has been sent by source
ssize_t size, // size of message (bytes)
int source // UE that sent the message
) {
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
size_t wsize, // offset within receive buffer when pulling in "chunk" bytes
remainder, // bytes remaining to be received
nbytes; // number of bytes to be received in single iRCCE_get call
char *bufptr; // running pointer inside privbuf for current location
size_t subchunk1, subchunk2; // sub-chunks for the pipelined message transfer
#ifndef _iRCCE_ANY_LENGTH_
#define FLAG_SET_VALUE RCCE_FLAG_SET
#else
RCCE_FLAG_STATUS FLAG_SET_VALUE;
while (1) {
RCCE_flag_read(*sent, &size, RCCE_IAM);
if(size!=0) break;
}
FLAG_SET_VALUE = (RCCE_FLAG_STATUS)size;
#endif
if(iRCCE_recent_source != source) iRCCE_recent_source = source;
if(iRCCE_recent_length != size) iRCCE_recent_length = size;
// receive data in units of available chunk size of MPB
for (wsize=0; wsize< (size/chunk)*chunk; wsize+=chunk) {
bufptr = privbuf + wsize;
nbytes = chunk;
RCCE_wait_until(*sent, RCCE_FLAG_SET);
RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);
// copy data from local MPB space to private memory
RCCE_get((t_vcharp)bufptr, combuf, nbytes, source);
iRCCE_barrier(NULL);
}
remainder = size%chunk;
// if nothing is left over, we are done
if (!remainder) return(iRCCE_SUCCESS);
// receive remainder of data--whole cache lines
bufptr = privbuf + (size/chunk)*chunk;
nbytes = remainder - remainder % RCCE_LINE_SIZE;
if (nbytes) {
RCCE_wait_until(*sent, FLAG_SET_VALUE);
RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);
// copy data from local MPB space to private memory
iRCCE_get((t_vcharp)bufptr, combuf, nbytes, source);
iRCCE_barrier(NULL);
}
remainder = remainder % RCCE_LINE_SIZE;
if (!remainder) return(iRCCE_SUCCESS);
// remainder is less than cache line. This must be copied into appropriately sized
// intermediate space before exact number of bytes get copied to the final destination
bufptr = privbuf + (size/chunk)*chunk + nbytes;
nbytes = RCCE_LINE_SIZE;
RCCE_wait_until(*sent, FLAG_SET_VALUE);
RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);
// copy data from local MPB space to private memory
iRCCE_get((t_vcharp)padline, combuf, nbytes, source);
memcpy_scc(bufptr, padline, remainder);
iRCCE_barrier(NULL);
return(iRCCE_SUCCESS);
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_mrecv
//--------------------------------------------------------------------------------------
// multicast recv function (blocking!)
//--------------------------------------------------------------------------------------
int iRCCE_mrecv(char *privbuf, ssize_t size, int source) {
int ignore = 0;
if(size <= 0) {
#ifdef _iRCCE_ANY_LENGTH_
if (size != iRCCE_ANY_LENGTH)
#endif
{
return(iRCCE_SUCCESS);
}
}
// determine source of request if given source = iRCCE_ANY_SOURCE
if (source == iRCCE_ANY_SOURCE) {
// wait for completion of _all_ pending non-blocking requests:
iRCCE_irecv_wait(NULL);
int i, res;
for( i=0;;i=(i+1)%RCCE_NP ){
iRCCE_test_flag(RCCE_sent_flag[i], RCCE_FLAG_SET, &res);
if ( (i != RCCE_IAM) && (res) ) {
source = i;
break;
}
}
}
// wait for completion of pending (ans source-related) non-blocking requests:
while(iRCCE_irecv_queue[source] != NULL) {
iRCCE_irecv_push();
iRCCE_isend_push();
}
if (source<0 || source >= RCCE_NP)
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
else {
return(iRCCE_mrecv_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
&RCCE_sent_flag[source], size, source));
}
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_mcast
//--------------------------------------------------------------------------------------
// multicast based on msend() and mrecv()
//--------------------------------------------------------------------------------------
int iRCCE_mcast(char *buf, size_t size, int root)
{
if(RCCE_IAM != root) {
return iRCCE_mrecv(buf, size, root);
} else {
return iRCCE_msend(buf, size);
}
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_bcast
//--------------------------------------------------------------------------------------
// wrapper function for using iRCCE's multicast feature
//--------------------------------------------------------------------------------------
int iRCCE_bcast(char *buf, size_t size, int root, RCCE_COMM comm)
{
if(memcmp(&comm, &RCCE_COMM_WORLD, sizeof(RCCE_COMM)) == 0) {
return RCCE_bcast(buf, size, root, comm);
} else {
return iRCCE_mcast(buf, size, root);
}
}

View file

@ -0,0 +1,82 @@
//***************************************************************************************
// Put data into communication buffer.
//***************************************************************************************
//
// Author: Rob F. Van der Wijngaart
// Intel Corporation
// Date: 008/30/2010
//
//***************************************************************************************
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// [2010-11-03] switched to SCC-optimized memcpy() functions in scc_memcpy.h:
// - memcpy_to_mpb()
// - memcpy_from_mpb()
// by Stefan Lankes, Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
#include "iRCCE_lib.h"
#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__)
#include "scc_memcpy.h"
#else
#define memcpy_to_mpb memcpy
#endif
void* iRCCE_memcpy_put(void *dest, const void *src, size_t count)
{
#if defined COPPERRIDGE || defined SCC
return memcpy_to_mpb(dest, src, count);
#else
return memcpy(dest, src, count);
#endif
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_put
//--------------------------------------------------------------------------------------
// copy data from address "source" in the local MPB or the calling UE's private memory
// to address "target" in the remote MPB. We do not test to see if a move from the
// calling UE's private memory stays within allocated memory
//--------------------------------------------------------------------------------------
int iRCCE_put(
t_vcharp target, // target buffer, MPB
t_vcharp source, // source buffer, MPB or private memory
int num_bytes,
int ID
) {
// in non-GORY mode we only need to retain the MPB target shift; we
// already know the target is in the MPB, not private memory
target = RCCE_comm_buffer[ID]+(target-RCCE_comm_buffer[RCCE_IAM]);
// make sure that any data that has been put in our MPB by another UE is visible
#ifdef _OPENMP
#pragma omp flush
#endif
// do the actual copy
RC_cache_invalidate();
iRCCE_memcpy_put((void *)target, (void *)source, num_bytes);
// flush data to make it visible to all threads; cannot use flush list because it
// concerns malloced space
#ifdef _OPENMP
#pragma omp flush
#endif
return(iRCCE_SUCCESS);
}

View file

@ -0,0 +1,497 @@
//***************************************************************************************
// Non-blocking receive routines.
//***************************************************************************************
//
// Author: Rob F. Van der Wijngaart
// Intel Corporation
// Date: 008/30/2010
//
//***************************************************************************************
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// [2010-11-26] added a _pipelined_ version of blocking send/recv
// by Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
// [2011-04-19] added wildcard mechanism (iRCCE_ANY_SOURCE) for receiving
// a message from an arbitrary remote rank
// by Simon Pickartz, Chair for Operating Systems,
// RWTH Aachen University
//
// [2011-05-31] added iRCCE_ANY_LENGTH wildcard mechanism
// by Carsten Clauss
//
// [2011-06-27] merged iRCCE_ANY_SOURCE branch with trunk (iRCCE_ANY_LENGTH)
//
// [2011-08-02] added iRCCE_iprobe() function for probing for incomming messages
//
// [2011-11-03] added internal push function for non-blocking synchronous send
// iRCCE_push_srecv_request() (called by iRCCE_push_recv_request)
//
#include "iRCCE_lib.h"
#include <stdlib.h>
#include <string.h>
#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__)
#include "scc_memcpy.h"
#else
#define memcpy_scc memcpy
#endif
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_srecv_general
//--------------------------------------------------------------------------------------
// pipelined receive function
//--------------------------------------------------------------------------------------
static int iRCCE_srecv_general(
char *privbuf, // destination buffer in local private memory (receive buffer)
t_vcharp combuf, // intermediate buffer in MPB
size_t chunk, // size of MPB available for this message (bytes)
RCCE_FLAG *ready, // flag indicating whether receiver is ready
RCCE_FLAG *sent, // flag indicating whether message has been sent by source
ssize_t size, // size of message (bytes)
int source, // UE that sent the message
int *test // if 1 upon entry, do nonblocking receive; if message available
// set to 1, otherwise to 0
) {
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
size_t wsize, // offset within receive buffer when pulling in "chunk" bytes
remainder, // bytes remaining to be received
nbytes; // number of bytes to be received in single iRCCE_get call
int first_test; // only use first chunk to determine if message has been received yet
char *bufptr; // running pointer inside privbuf for current location
size_t subchunk1, subchunk2; // sub-chunks for the pipelined message transfer
#ifndef _iRCCE_ANY_LENGTH_
#define FLAG_SET_VALUE RCCE_FLAG_SET
#else
RCCE_FLAG_STATUS FLAG_SET_VALUE;
while (1) {
RCCE_flag_read(*sent, &size, RCCE_IAM);
if(size!=0) break;
}
FLAG_SET_VALUE = (RCCE_FLAG_STATUS)size;
#endif
if(iRCCE_recent_source != source) iRCCE_recent_source = source;
if(iRCCE_recent_length != size) iRCCE_recent_length = size;
first_test = 1;
for (wsize=0; wsize < (size/chunk)*chunk; wsize+=chunk) {
if (*test && first_test) {
first_test = 0;
iRCCE_test_flag(*sent, RCCE_FLAG_SET, test);
if (!(*test)) return(iRCCE_PENDING);
}
if(wsize == 0) {
// allign sub-chunks to cache line granularity:
subchunk1 = ( (chunk / 2) / RCCE_LINE_SIZE ) * RCCE_LINE_SIZE;
subchunk2 = chunk - subchunk1;
}
bufptr = privbuf + wsize;
nbytes = subchunk1;
RCCE_wait_until(*sent, FLAG_SET_VALUE);
RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);
RCCE_flag_write(ready, RCCE_FLAG_SET, source);
iRCCE_get((t_vcharp)bufptr, combuf, nbytes, source);
bufptr = privbuf + wsize + subchunk1;
nbytes = subchunk2;
RCCE_wait_until(*sent, FLAG_SET_VALUE);
RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);
RCCE_flag_write(ready, RCCE_FLAG_SET, source);
iRCCE_get((t_vcharp)bufptr, combuf + subchunk1, nbytes, source);
}
remainder = size%chunk;
// if nothing is left over, we are done
if (!remainder) return(iRCCE_SUCCESS);
// receive remainder of data--whole cache lines
bufptr = privbuf + (size/chunk)*chunk;
nbytes = remainder - remainder % RCCE_LINE_SIZE;
if (nbytes) {
// if function is called in test mode, check if first chunk has been sent already.
// If so, proceed as usual. If not, exit immediately
if (*test && first_test) {
first_test = 0;
iRCCE_test_flag(*sent, RCCE_FLAG_SET, test);
if (!(*test)) return(iRCCE_PENDING);
}
RCCE_wait_until(*sent, FLAG_SET_VALUE);
RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);
// copy data from local MPB space to private memory
iRCCE_get((t_vcharp)bufptr, combuf, nbytes, source);
// tell the source I have moved data out of its comm buffer
RCCE_flag_write(ready, RCCE_FLAG_SET, source);
}
remainder = remainder % RCCE_LINE_SIZE;
if (!remainder) return(iRCCE_SUCCESS);
// remainder is less than cache line. This must be copied into appropriately sized
// intermediate space before exact number of bytes get copied to the final destination
bufptr = privbuf + (size/chunk)*chunk + nbytes;
nbytes = RCCE_LINE_SIZE;
// if function is called in test mode, check if first chunk has been sent already.
// If so, proceed as usual. If not, exit immediately
if (*test && first_test) {
first_test = 0;
iRCCE_test_flag(*sent, RCCE_FLAG_SET, test);
if (!(*test)) return(iRCCE_PENDING);
}
RCCE_wait_until(*sent, FLAG_SET_VALUE);
RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);
// copy data from local MPB space to private memory
iRCCE_get((t_vcharp)padline, combuf, nbytes, source);
memcpy_scc(bufptr, padline, remainder);
// tell the source I have moved data out of its comm buffer
RCCE_flag_write(ready, RCCE_FLAG_SET, source);
return(iRCCE_SUCCESS);
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_srecv
//--------------------------------------------------------------------------------------
// pipelined recv function (blocking!)
//--------------------------------------------------------------------------------------
int iRCCE_srecv(char *privbuf, ssize_t size, int source) {
int ignore = 0;
if(size < 0) {
#ifdef _iRCCE_ANY_LENGTH_
if (size != iRCCE_ANY_LENGTH)
#endif
{
return(iRCCE_SUCCESS);
}
}
if(size == 0) {
// just synchronize:
size = 1;
privbuf = (char*)&size;
}
// determine source of request if given source = iRCCE_ANY_SOURCE
if (source == iRCCE_ANY_SOURCE) {
// wait for completion of _all_ pending non-blocking requests:
iRCCE_irecv_wait(NULL);
int i, res;
for( i=0;;i=(i+1)%RCCE_NP ){
iRCCE_test_flag(RCCE_sent_flag[i], RCCE_FLAG_SET, &res);
if ( (i != RCCE_IAM) && (res) ) {
source = i;
break;
}
}
}
// wait for completion of pending (ans source-related) non-blocking requests:
while(iRCCE_irecv_queue[source] != NULL) {
iRCCE_irecv_push();
iRCCE_isend_push();
}
#if !defined(SINGLEBITFLAGS) && !defined(RCCE_VERSION)
if(size <= iRCCE_MAX_TAGGED_LEN) {
#ifndef _iRCCE_ANY_LENGTH_
#define FLAG_SET_VALUE RCCE_FLAG_SET
#else
RCCE_FLAG_STATUS FLAG_SET_VALUE;
if(size == iRCCE_ANY_LENGTH) {
while (1) {
RCCE_flag_read(RCCE_sent_flag[source], &size, RCCE_IAM);
if(size!=0) break;
}
}
FLAG_SET_VALUE = (RCCE_FLAG_STATUS)size;
#endif
if(size <= iRCCE_MAX_TAGGED_LEN) {
// just wait and then read the tagged flag with payload:
iRCCE_wait_tagged(RCCE_sent_flag[source], FLAG_SET_VALUE, privbuf, size);
RCCE_flag_write(&RCCE_sent_flag[source], RCCE_FLAG_UNSET, RCCE_IAM);
RCCE_flag_write(&RCCE_ready_flag[RCCE_IAM], RCCE_FLAG_SET, source);
return(RCCE_SUCCESS);
}
}
#endif
if (source<0 || source >= RCCE_NP)
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
else {
return(iRCCE_srecv_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
&RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source],
size, source, &ignore));
}
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_probe
//--------------------------------------------------------------------------------------
// probe for incomming messages (blocking / does not receive)
//--------------------------------------------------------------------------------------
int iRCCE_probe(int source, int* test_rank)
{
// determine source of request if given source = iRCCE_ANY_SOURCE
if (source == iRCCE_ANY_SOURCE) {
// wait for completion of _all_ pending non-blocking requests:
iRCCE_irecv_wait(NULL);
int i, res;
for( i=0;;i=(i+1)%RCCE_NP ){
iRCCE_test_flag(RCCE_sent_flag[i], RCCE_FLAG_SET, &res);
if ( (i != RCCE_IAM) && (res) ) {
source = i;
break;
}
}
}
else {
int res;
do {
iRCCE_test_flag(RCCE_sent_flag[source], RCCE_FLAG_SET, &res);
}
while(!res);
}
if (test_rank != NULL) {
(*test_rank) = source;
}
#ifdef _iRCCE_ANY_LENGTH_
{
int size;
RCCE_flag_read(RCCE_sent_flag[source], &size, RCCE_IAM);
if(iRCCE_recent_length != size) iRCCE_recent_length = size;
}
#endif
if(iRCCE_recent_source != source) iRCCE_recent_source = source;
return iRCCE_SUCCESS;
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_recv
//--------------------------------------------------------------------------------------
// pipelined recv function (non-blocking / analogous to RCCE_recv_test fuction)
//--------------------------------------------------------------------------------------
int iRCCE_srecv_test(char *privbuf, ssize_t size, int source, int *test) {
if(test == NULL) return iRCCE_recv(privbuf, size, source);
if(size <= 0) {
#ifdef _iRCCE_ANY_LENGTH_
if(size != iRCCE_ANY_LENGTH)
#endif
{
(*test) = 1;
return(iRCCE_SUCCESS);
}
}
// determine source of request if given source = iRCCE_ANY_SOURCE
if (source == iRCCE_ANY_SOURCE) {
// check whether there are still pending non-blocking receive requests:
if(iRCCE_irecv_push() != iRCCE_SUCCESS) {
(*test) = 0;
return(iRCCE_PENDING);
}
int i, res;
for( i=0; i<RCCE_NP; i++){
iRCCE_test_flag(RCCE_sent_flag[i], RCCE_FLAG_SET, &res);
if ( (i != RCCE_IAM) && (res) ) {
source = i;
break;
}
}
}
if (source == iRCCE_ANY_SOURCE) {
// currently, there is no message available (from any source):
(*test) = 0;
return (iRCCE_PENDING);
}
if(iRCCE_irecv_queue[source] != NULL) {
// push pending non-blocking requests
iRCCE_irecv_push();
iRCCE_isend_push();
if(iRCCE_irecv_queue[source] != NULL) {
(*test) = 0;
return (iRCCE_PENDING);
}
}
if (source<0 || source >= RCCE_NP)
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
else {
(*test) = 1;
return(iRCCE_srecv_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
&RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source],
size, source, test));
}
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_push_srecv_request
//--------------------------------------------------------------------------------------
// pipelined push for recv function (non-blocking and stricly synchronous!)
//--------------------------------------------------------------------------------------
int iRCCE_push_srecv_request(iRCCE_RECV_REQUEST *request) {
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
int test; // flag for calling iRCCE_test_flag()
if(request->finished) return(iRCCE_SUCCESS);
if(request->label == 1) goto label1;
if(request->label == 2) goto label2;
if(request->label == 3) goto label3;
if(request->label == 4) goto label4;
#ifdef _iRCCE_ANY_LENGTH_
RCCE_flag_read(*(request->sent), &(request->flag_set_value), RCCE_IAM);
if(request->flag_set_value == 0) {
return(iRCCE_PENDING);
}
request->size = (size_t)request->flag_set_value;
#endif
// receive data in units of available chunk size of MPB
for (; request->wsize < (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) {
request->bufptr = request->privbuf + request->wsize;
request->nbytes = request->subchunk1;
label1:
iRCCE_test_flag(*(request->sent), request->flag_set_value, &test);
if(!test) {
request->label = 1;
return(iRCCE_PENDING);
}
request->started = 1;
RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM);
RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source);
iRCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, request->source);
request->bufptr = request->privbuf + request->wsize + request->subchunk1;
request->nbytes = request->subchunk2;
label2:
iRCCE_test_flag(*(request->sent), request->flag_set_value, &test);
if(!test) {
request->label = 2;
return(iRCCE_PENDING);
}
RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM);
RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source);
iRCCE_get((t_vcharp)request->bufptr, request->combuf + request->subchunk1, request->nbytes, request->source);
}
request->remainder = request->size % request->chunk;
// if nothing is left over, we are done
if (!request->remainder) {
if(iRCCE_recent_source != request->source) iRCCE_recent_source = request->source;
if(iRCCE_recent_length != request->size) iRCCE_recent_length = request->size;
request->finished = 1;
return(iRCCE_SUCCESS);
}
// receive remainder of data--whole cache lines
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk;
request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE;
if (request->nbytes) {
label3:
iRCCE_test_flag(*(request->sent), request->flag_set_value, &test);
if(!test) {
request->label = 3;
return(iRCCE_PENDING);
}
request->started = 1;
RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM);
// copy data from source's MPB space to private memory
iRCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, request->source);
// tell the source I have moved data out of its comm buffer
RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source);
}
request->remainder = request->size % request->chunk;
request->remainder = request->remainder % RCCE_LINE_SIZE;
if (!request->remainder) {
if(iRCCE_recent_source != request->source) iRCCE_recent_source = request->source;
if(iRCCE_recent_length != request->size) iRCCE_recent_length = request->size;
request->finished = 1;
return(iRCCE_SUCCESS);
}
// remainder is less than cache line. This must be copied into appropriately sized
// intermediate space before exact number of bytes get copied to the final destination
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes;
request->nbytes = RCCE_LINE_SIZE;
label4:
iRCCE_test_flag(*(request->sent), request->flag_set_value, &test);
if(!test) {
request->label = 4;
return(iRCCE_PENDING);
}
request->started = 1;
RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM);
// copy data from source's MPB space to private memory
iRCCE_get((t_vcharp)padline, request->combuf, request->nbytes, request->source);
memcpy_scc(request->bufptr,padline,request->remainder);
// tell the source I have moved data out of its comm buffer
RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source);
if(iRCCE_recent_source != request->source) iRCCE_recent_source = request->source;
if(iRCCE_recent_length != request->size) iRCCE_recent_length = request->size;
request->finished = 1;
return(iRCCE_SUCCESS);
}

View file

@ -0,0 +1,282 @@
//***************************************************************************************
// Synchronized receive routines.
//***************************************************************************************
//
// Author: Rob F. Van der Wijngaart
// Intel Corporation
// Date: 008/30/2010
//
//***************************************************************************************
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// [2010-11-26] added a _pipelined_ version of blocking send/recv
// by Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
// [2011-05-31] added iRCCE_ANY_LENGTH wildcard mechanism
// by Carsten Clauss
//
// [2011-11-03] added internal push function for non-blocking synchronous send
// iRCCE_push_ssend_request() (called by iRCCE_push_send_request)
//
#include "iRCCE_lib.h"
#include <stdlib.h>
#include <string.h>
#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__)
#include "scc_memcpy.h"
#else
#define memcpy_scc memcpy
#endif
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_ssend_general
//--------------------------------------------------------------------------------------
// pipelined send function
//--------------------------------------------------------------------------------------
static int iRCCE_ssend_general(
char *privbuf, // source buffer in local private memory (send buffer)
t_vcharp combuf, // intermediate buffer in MPB
size_t chunk, // size of MPB available for this message (bytes)
RCCE_FLAG *ready, // flag indicating whether receiver is ready
RCCE_FLAG *sent, // flag indicating whether message has been sent by source
ssize_t size, // size of message (bytes)
int dest // UE that will receive the message
) {
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
size_t wsize, // offset within send buffer when putting in "chunk" bytes
remainder, // bytes remaining to be sent
nbytes; // number of bytes to be sent in single iRCCE_put call
char *bufptr; // running pointer inside privbuf for current location
size_t subchunk1, subchunk2; // sub-chunks for the pipelined message transfer
#ifndef _iRCCE_ANY_LENGTH_
#define FLAG_SET_VALUE RCCE_FLAG_SET
#else
RCCE_FLAG_STATUS FLAG_SET_VALUE = (RCCE_FLAG_STATUS)size;
#endif
for (wsize = 0; wsize < (size/chunk)*chunk; wsize+=chunk) {
if(wsize == 0) {
// allign sub-chunks to cache line granularity:
subchunk1 = ( (chunk / 2) / RCCE_LINE_SIZE ) * RCCE_LINE_SIZE;
subchunk2 = chunk - subchunk1;
}
bufptr = privbuf + wsize;
nbytes = subchunk1;
iRCCE_put(combuf, (t_vcharp) bufptr, nbytes, RCCE_IAM);
RCCE_flag_write(sent, FLAG_SET_VALUE, dest);
RCCE_wait_until(*ready, RCCE_FLAG_SET);
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
bufptr = privbuf + wsize + subchunk1;
nbytes = subchunk2;
iRCCE_put(combuf + subchunk1, (t_vcharp) bufptr, nbytes, RCCE_IAM);
RCCE_flag_write(sent, FLAG_SET_VALUE, dest);
RCCE_wait_until(*ready, RCCE_FLAG_SET);
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
}
remainder = size%chunk;
// if nothing is left over, we are done
if (!remainder) return(iRCCE_SUCCESS);
// send remainder of data--whole cache lines
bufptr = privbuf + (size/chunk)*chunk;
nbytes = remainder - remainder%RCCE_LINE_SIZE;
if (nbytes) {
// copy private data to own comm buffer
iRCCE_put(combuf, (t_vcharp)bufptr, nbytes, RCCE_IAM);
RCCE_flag_write(sent, FLAG_SET_VALUE, dest);
// wait for the destination to be ready to receive a message
RCCE_wait_until(*ready, RCCE_FLAG_SET);
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
}
remainder = remainder%RCCE_LINE_SIZE;
if (!remainder) return(iRCCE_SUCCESS);
// remainder is less than a cache line. This must be copied into appropriately sized
// intermediate space before it can be sent to the receiver
bufptr = privbuf + (size/chunk)*chunk + nbytes;
nbytes = RCCE_LINE_SIZE;
// copy private data to own comm buffer
memcpy_scc(padline, bufptr, remainder);
iRCCE_put(combuf, (t_vcharp)padline, nbytes, RCCE_IAM);
RCCE_flag_write(sent, FLAG_SET_VALUE, dest);
// wait for the destination to be ready to receive a message
RCCE_wait_until(*ready, RCCE_FLAG_SET);
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
return(iRCCE_SUCCESS);
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_ssend
//--------------------------------------------------------------------------------------
// pipelined send function (blocking and synchronous!)
//--------------------------------------------------------------------------------------
int iRCCE_ssend(char *privbuf, ssize_t size, int dest) {
if(size < 0) return(iRCCE_SUCCESS);
if(size == 0) {
// just synchronize:
size = 1;
privbuf = (char*)&size;
}
while(iRCCE_isend_queue != NULL) {
// wait for completion of pending non-blocking requests
iRCCE_isend_push();
iRCCE_irecv_push();
}
#if !defined(SINGLEBITFLAGS) && !defined(RCCE_VERSION)
if(size <= iRCCE_MAX_TAGGED_LEN) {
// just write the tagged 'sent' flag (with payload) and wait for 'ready' flag:
iRCCE_flag_write_tagged(&RCCE_sent_flag[RCCE_IAM], (RCCE_FLAG_STATUS)size, dest, privbuf, size);
RCCE_wait_until(RCCE_ready_flag[dest], RCCE_FLAG_SET);
RCCE_flag_write(&RCCE_ready_flag[dest], RCCE_FLAG_UNSET, RCCE_IAM);
return(RCCE_SUCCESS);
}
#endif
if (dest<0 || dest >= RCCE_NP)
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
else
return(iRCCE_ssend_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
size, dest));
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_push_ssend_request
//--------------------------------------------------------------------------------------
// pipelined push for send function (non-blocking and stricly synchronous!)
//--------------------------------------------------------------------------------------
int iRCCE_push_ssend_request(iRCCE_SEND_REQUEST *request) {
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
int test; // flag for calling iRCCE_test_flag()
if(request->finished) return(iRCCE_SUCCESS);
if(request->label == 1) goto label1;
if(request->label == 2) goto label2;
if(request->label == 3) goto label3;
if(request->label == 4) goto label4;
// send data in units of available chunk size of comm buffer
for (request->wsize = 0; request->wsize < (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) {
request->bufptr = request->privbuf + request->wsize;
request->nbytes = request->subchunk1;
iRCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, RCCE_IAM);
RCCE_flag_write(request->sent, request->flag_set_value, request->dest);
label1:
iRCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
if(!test) {
request->label = 1;
return(iRCCE_PENDING);
}
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
request->bufptr = request->privbuf + request->wsize + request->subchunk1;
request->nbytes = request->subchunk2;
iRCCE_put(request->combuf + request->subchunk1, (t_vcharp) request->bufptr, request->nbytes, RCCE_IAM);
RCCE_flag_write(request->sent, request->flag_set_value, request->dest);
label2:
iRCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
if(!test) {
request->label = 2;
return(iRCCE_PENDING);
}
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
}
request->remainder = request->size % request->chunk;
// if nothing is left over, we are done
if (!request->remainder) {
request->finished = 1;
return(iRCCE_SUCCESS);
}
// send remainder of data--whole cache lines
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk;
request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE;
if (request->nbytes) {
// copy private data to own comm buffer
iRCCE_put(request->combuf, (t_vcharp)request->bufptr, request->nbytes, RCCE_IAM);
RCCE_flag_write(request->sent, request->flag_set_value, request->dest);
// wait for the destination to be ready to receive a message
label3:
iRCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
if(!test) {
request->label = 3;
return(iRCCE_PENDING);
}
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
}
request->remainder = request->size % request->chunk;
request->remainder = request->remainder%RCCE_LINE_SIZE;
// if nothing is left over, we are done
if (!request->remainder)
{
request->finished = 1;
return(iRCCE_SUCCESS);
}
// remainder is less than a cache line. This must be copied into appropriately sized
// intermediate space before it can be sent to the receiver
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes;
request->nbytes = RCCE_LINE_SIZE;
// copy private data to own comm buffer
memcpy(padline,request->bufptr,request->remainder);
iRCCE_put(request->combuf, (t_vcharp)padline, request->nbytes, RCCE_IAM);
RCCE_flag_write(request->sent, request->flag_set_value, request->dest);
// wait for the destination to be ready to receive a message
label4:
iRCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
if(!test) {
request->label = 4;
return(iRCCE_PENDING);
}
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
request->finished = 1;
return(iRCCE_SUCCESS);
}

View file

@ -0,0 +1,279 @@
///*************************************************************************************
// Synchronization functions.
// Single-bit and whole-cache-line flags are sufficiently different that we provide
// separate implementations of the synchronization routines for each case
//**************************************************************************************
//
// Author: Rob F. Van der Wijngaart
// Intel Corporation
// Date: 008/30/2010
//
//**************************************************************************************
//
// Copyright 2010 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// [2010-10-25] added support for non-blocking send/recv operations
// - iRCCE_isend(), ..._test(), ..._wait(), ..._push()
// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push()
// by Carsten Clauss, Chair for Operating Systems,
// RWTH Aachen University
//
// [2010-11-12] extracted non-blocking code into separate library
// by Carsten Scholtes
//
// [2011-01-21] updated the datatype of RCCE_FLAG according to the
// recent version of RCCE
//
// [2011-04-12] added marco test for rcce version
//
// [2012-11-06] add barrier implementation as described in:
// USENIX HotPar'12 Eval. Hardw. Synch. Supp. SCC
// by Pablo Reble
//
#include "iRCCE_lib.h"
#ifdef SINGLEBITFLAGS
#warning iRCCE_TAGGED_FLAGS: for using this feature, SINGLEBITFLAGS must be disabled! (make SINGLEBITFLAGS=0)
#endif
#ifdef SINGLEBITFLAGS
int iRCCE_test_flag(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result) {
t_vcharp cflag;
#ifdef RCCE_VERSION
// this is a newer version than V1.0.13
t_vcharp flaga;
#endif
cflag = flag.line_address;
#ifdef RCCE_VERSION
// this is a newer version than V1.0.13
flaga = flag.flag_addr;
#endif
// always flush/invalidate to ensure we read the most recent value of *flag
// keep reading it until it has the required value
#ifdef _OPENMP
#pragma omp flush
#endif
RC_cache_invalidate();
#ifdef RCCE_VERSION
// this is a newer version than V1.0.13
if(RCCE_bit_value(flaga, (flag.location)%RCCE_FLAGS_PER_BYTE) != val) {
#else
if(RCCE_bit_value(cflag, flag.location) != val) {
#endif
(*result) = 0;
}
else {
(*result) = 1;
}
return(iRCCE_SUCCESS);
}
#else
//////////////////////////////////////////////////////////////////
// LOCKLESS SYNCHRONIZATION USING ONE WHOLE CACHE LINE PER FLAG //
//////////////////////////////////////////////////////////////////
int iRCCE_test_flag(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result) {
#ifndef RCCE_VERSION
RCCE_FLAG flag_pos = flag;
#endif
#ifdef _OPENMP
#pragma omp flush
#endif
RC_cache_invalidate();
#ifdef RCCE_VERSION
if((RCCE_FLAG_STATUS)(*flag.flag_addr) != val) {
#else
if((*flag_pos) != val) {
#endif
(*result) = 0;
}
else {
(*result) = 1;
}
return(iRCCE_SUCCESS);
}
//////////////////////////////////////////////////////////////////////////
// FUNCTIONS FOR HANDLING TAGGED FLAGS (NEED WHOLE CACHE LINE PER FLAG) //
//////////////////////////////////////////////////////////////////////////
int iRCCE_flag_alloc_tagged(RCCE_FLAG *flag)
{
#ifdef RCCE_VERSION
// this is a newer version than V1.0.13
flag->flag_addr = RCCE_malloc(RCCE_LINE_SIZE);
if (!(flag->flag_addr)) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED));
return(RCCE_SUCCESS);
#else
return RCCE_flag_alloc(flag);
#endif
}
int iRCCE_flag_write_tagged(RCCE_FLAG *flag, RCCE_FLAG_STATUS val, int ID, void *tag, int len) {
unsigned char val_array[RCCE_LINE_SIZE] =
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
int error, i, j;
*(int *) val_array = val;
#ifdef _OPENMP
*(int *) &val_array[RCCE_LINE_SIZE-sizeof(int)] = val;
#endif
if(tag)
{
if(len > iRCCE_MAX_TAGGED_LEN) len = iRCCE_MAX_TAGGED_LEN;
iRCCE_memcpy_put(&val_array[sizeof(int)], tag, len);
}
#ifdef RCCE_VERSION
error = iRCCE_put(flag->flag_addr, val_array, RCCE_LINE_SIZE, ID);
#else
error = iRCCE_put((t_vcharp)(*flag), val_array, RCCE_LINE_SIZE, ID);
#endif
return(RCCE_error_return(RCCE_debug_synch,error));
}
int iRCCE_flag_read_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS *val, int ID, void *tag, int len) {
unsigned char val_array[RCCE_LINE_SIZE];
int error, i, j;
#ifdef RCCE_VERSION
if(error=iRCCE_get(val_array, flag.flag_addr, RCCE_LINE_SIZE, ID))
return(RCCE_error_return(RCCE_debug_synch,error));
#else
if(error=iRCCE_get(val_array, (t_vcharp)flag, RCCE_LINE_SIZE, ID))
return(RCCE_error_return(RCCE_debug_synch,error));
#endif
if(val) *val = *(int *)val_array;
#ifdef _OPENMP
if(val) *val = *(int *)&val_array[RCCE_LINE_SIZE-sizeof(int)];
#endif
if( (val) && (*val) && (tag) ) {
if(len > iRCCE_MAX_TAGGED_LEN) len = iRCCE_MAX_TAGGED_LEN;
iRCCE_memcpy_put(tag, &val_array[sizeof(int)], len);
}
return(RCCE_SUCCESS);
}
int iRCCE_wait_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS val, void *tag, int len) {
int i, j;
#ifndef RCCE_VERSION
RCCE_FLAG flag_pos = flag;
#ifdef _OPENMP
flag_pos = flag + RCCE_LINE_SIZE / sizeof(int) - 1;
#endif
#endif
do {
#ifdef _OPENMP
#pragma omp flush
#endif
RC_cache_invalidate();
#ifdef RCCE_VERSION
// this is a newer version than V1.0.13
#ifdef _OPENMP
} while ((RCCE_FLAG_STATUS)(*( ((int*)flag.flag_addr) + RCCE_LINE_SIZE / sizeof(int) - 1)) != val);
#else
} while ((RCCE_FLAG_STATUS)(*flag.flag_addr) != val);
#endif
#else
} while ((*flag_pos) != val);
#endif
if(tag) {
if(len > iRCCE_MAX_TAGGED_LEN) len = iRCCE_MAX_TAGGED_LEN;
#ifdef RCCE_VERSION
iRCCE_memcpy_put(tag, &((char*)flag.flag_addr)[sizeof(int)], len);
#else
iRCCE_memcpy_put(tag, &((char*)flag)[sizeof(int)], len);
#endif
}
return(RCCE_SUCCESS);
}
int iRCCE_test_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result, void *tag, int len) {
int i, j;
#ifndef RCCE_VERSION
RCCE_FLAG flag_pos = flag;
#ifdef _OPENMP
flag_pos = flag + RCCE_LINE_SIZE / sizeof(int) - 1;
#endif
#endif
#ifdef _OPENMP
#pragma omp flush
#endif
RC_cache_invalidate();
#ifdef RCCE_VERSION
if((RCCE_FLAG_STATUS)(*flag.flag_addr) != val) {
#else
if((*flag_pos) != val) {
#endif
(*result) = 0;
}
else {
(*result) = 1;
}
if((*result) && tag) {
if(len > iRCCE_MAX_TAGGED_LEN) len = iRCCE_MAX_TAGGED_LEN;
#ifdef RCCE_VERSION
iRCCE_memcpy_put(tag, &((char*)flag.flag_addr)[sizeof(int)], len);
#else
iRCCE_memcpy_put(tag, &((char*)flag)[sizeof(int)], len);
#endif
}
return(RCCE_SUCCESS);
}
int iRCCE_get_max_tagged_len(void)
{
return iRCCE_MAX_TAGGED_LEN;
}
#endif

View file

@ -0,0 +1,324 @@
/****************************************************************************************
* Functions for a convenient handling of multiple outstanding non-blocking requests
****************************************************************************************
*
* Authors: Jacek Galowicz, Carsten Clauss
* Chair for Operating Systems, RWTH Aachen University
* Date: 2010-12-09
*
****************************************************************************************
*
* Copyright 2010 Jacek Galowicz, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#include "iRCCE_lib.h"
void iRCCE_init_wait_list(iRCCE_WAIT_LIST *list)
{
list->first = NULL;
list->last = NULL;
}
static void iRCCE_add_wait_list_generic(iRCCE_WAIT_LIST *list, iRCCE_WAIT_LISTELEM * elem)
{
if (list->first == NULL) {
list->first = elem;
list->last = elem;
return;
}
list->last->next = elem;
list->last = elem;
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_add_recv_to_wait_list
//--------------------------------------------------------------------------------------
// Function for adding Send requests to the waitall-queue
//--------------------------------------------------------------------------------------
void iRCCE_add_send_to_wait_list(iRCCE_WAIT_LIST *list, iRCCE_SEND_REQUEST * req)
{
iRCCE_WAIT_LISTELEM *elem;
elem = (iRCCE_WAIT_LISTELEM*)malloc(sizeof(iRCCE_WAIT_LISTELEM));
elem->type = iRCCE_WAIT_LIST_SEND_TYPE;
elem->next = NULL;
elem->req = (void*)req;
iRCCE_add_wait_list_generic(list, elem);
return;
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_add_send_to_wait_list
//--------------------------------------------------------------------------------------
// Function for adding Recv requests to the waitall-queue
//--------------------------------------------------------------------------------------
void iRCCE_add_recv_to_wait_list(iRCCE_WAIT_LIST *list, iRCCE_RECV_REQUEST * req)
{
iRCCE_WAIT_LISTELEM *elem;
elem = (iRCCE_WAIT_LISTELEM*)malloc(sizeof(iRCCE_WAIT_LISTELEM));
elem->type = iRCCE_WAIT_LIST_RECV_TYPE;
elem->next = NULL;
elem->req = (void*)req;
iRCCE_add_wait_list_generic(list, elem);
return;
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_add_to_wait_list
//--------------------------------------------------------------------------------------
// Function for adding Send and/or Recv requests to the waitall-queue
//--------------------------------------------------------------------------------------
void iRCCE_add_to_wait_list(iRCCE_WAIT_LIST *list, iRCCE_SEND_REQUEST * send_req, iRCCE_RECV_REQUEST * recv_req)
{
if (send_req != NULL) iRCCE_add_send_to_wait_list(list, send_req);
if (recv_req != NULL) iRCCE_add_recv_to_wait_list(list, recv_req);
return;
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_wait_all
//--------------------------------------------------------------------------------------
// Blocking wait for completion of all enqueued send and recv calls
//--------------------------------------------------------------------------------------
int iRCCE_wait_all(iRCCE_WAIT_LIST *list)
{
while(iRCCE_test_all(list, NULL) != iRCCE_SUCCESS) ;
return iRCCE_SUCCESS;
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_test_all
//--------------------------------------------------------------------------------------
// Nonblocking test for completion of all enqueued send and recv calls
// Just provide NULL instead of testvar if you don't need it
//--------------------------------------------------------------------------------------
int iRCCE_test_all(iRCCE_WAIT_LIST *list, int *test)
{
int retval = iRCCE_SUCCESS;
int req_state;
iRCCE_WAIT_LISTELEM *pElem;
iRCCE_WAIT_LISTELEM *pLastElem;
iRCCE_WAIT_LISTELEM *pTemp;
pLastElem = NULL;
pElem = list->first;
while (pElem != NULL) {
if (pElem->type == iRCCE_WAIT_LIST_SEND_TYPE)
req_state = iRCCE_isend_test((iRCCE_SEND_REQUEST*)pElem->req, NULL);
else
req_state = iRCCE_irecv_test((iRCCE_RECV_REQUEST*)pElem->req, NULL);
if (req_state == iRCCE_SUCCESS) {
// Remove this element from the list
if (pElem == list->first) {
list->first = pElem->next;
}
else if (pElem == list->last) {
list->last = pLastElem;
pLastElem->next = NULL;
}
else {
pLastElem->next = pElem->next;
}
pTemp = pElem->next;
free(pElem);
pElem = pTemp;
}
else {
retval = iRCCE_PENDING;
pLastElem = pElem;
pElem = pElem->next;
}
}
if (test) {
if (retval == iRCCE_SUCCESS) {
(*test) = 1;
}
else {
(*test) = 0;
}
}
return retval;
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_wait_any
//--------------------------------------------------------------------------------------
// Blocking wait for completion of any enqueued send and recv request
//--------------------------------------------------------------------------------------
int iRCCE_wait_any(iRCCE_WAIT_LIST *list, iRCCE_SEND_REQUEST ** send_request, iRCCE_RECV_REQUEST ** recv_request)
{
while(iRCCE_test_any(list, send_request, recv_request) != iRCCE_SUCCESS) ;
return iRCCE_SUCCESS;
}
//--------------------------------------------------------------------------------------
// FUNCTION: iRCCE_test_any
//--------------------------------------------------------------------------------------
// Nonblocking test for completion of any enqueued send or recv request
//--------------------------------------------------------------------------------------
int iRCCE_test_any(iRCCE_WAIT_LIST *list, iRCCE_SEND_REQUEST ** send_request, iRCCE_RECV_REQUEST ** recv_request)
{
int req_state;
iRCCE_WAIT_LISTELEM *pElem;
iRCCE_WAIT_LISTELEM *pLastElem;
iRCCE_WAIT_LISTELEM *pTemp;
pLastElem = NULL;
pElem = list->first;
while (pElem != NULL) {
if (pElem->type == iRCCE_WAIT_LIST_SEND_TYPE)
req_state = iRCCE_isend_test((iRCCE_SEND_REQUEST*)pElem->req, NULL);
else
req_state = iRCCE_irecv_test((iRCCE_RECV_REQUEST*)pElem->req, NULL);
if (req_state == iRCCE_SUCCESS) {
// Remove this element from the list
if (pElem == list->first) {
list->first = pElem->next;
}
else if (pElem == list->last) {
list->last = pLastElem;
pLastElem->next = NULL;
}
else {
pLastElem->next = pElem->next;
}
if (pElem->type == iRCCE_WAIT_LIST_SEND_TYPE) {
if(send_request) {
(*send_request) = (iRCCE_SEND_REQUEST*)pElem->req;
}
if(recv_request) {
(*recv_request) = NULL;
}
}
else {
if(send_request) {
(*send_request) = NULL;
}
if(recv_request) {
(*recv_request) = (iRCCE_RECV_REQUEST*)pElem->req;
}
}
pTemp = pElem->next;
free(pElem);
pElem = pTemp;
return iRCCE_SUCCESS;
}
else {
pLastElem = pElem;
pElem = pElem->next;
}
}
if(send_request) {
(*send_request) = NULL;
}
if(recv_request) {
(*recv_request) = NULL;
}
return iRCCE_PENDING;
}
//--------------------------------------------------------------------------------------
// FUNCTIONS: iRCCE_get_dest, iRCCE_get_source, iRCCE_get_length, iRCCE_get_status
//--------------------------------------------------------------------------------------
// Functions to determine the respective sender/receiver after test_any() / wait_any()
// (Can also be used after receiving a message via wildcard mechanism!)
//--------------------------------------------------------------------------------------
int iRCCE_get_dest(iRCCE_SEND_REQUEST *request)
{
if(request != NULL) return request->dest;
return iRCCE_ERROR;
}
//--------------------------------------------------------------------------------------
int iRCCE_get_source(iRCCE_RECV_REQUEST *request)
{
if(request != NULL) return request->source;
return iRCCE_recent_source;
}
//--------------------------------------------------------------------------------------
int iRCCE_get_size(iRCCE_SEND_REQUEST * send_req, iRCCE_RECV_REQUEST * recv_req)
{
if(send_req != NULL) return send_req->size;
if(recv_req != NULL) return recv_req->size;
return iRCCE_recent_length;
}
//--------------------------------------------------------------------------------------
int iRCCE_get_length(void)
{
return iRCCE_recent_length;
}
//--------------------------------------------------------------------------------------
int iRCCE_get_status(iRCCE_SEND_REQUEST * send_req, iRCCE_RECV_REQUEST * recv_req)
{
if(send_req != NULL) {
if(send_req->finished) {
return(iRCCE_SUCCESS);
}
if(iRCCE_isend_queue != send_req) {
return(iRCCE_RESERVED);
}
else
{
return(iRCCE_PENDING);
}
}
if(recv_req != NULL) {
if(recv_req->finished) {
return(iRCCE_SUCCESS);
}
if(iRCCE_irecv_queue[recv_req->source] != recv_req) {
return(iRCCE_RESERVED);
}
else
{
return(iRCCE_PENDING);
}
}
return iRCCE_ERROR;
}

110
hermit/usr/ircce/syscall.h Normal file
View file

@ -0,0 +1,110 @@
/*
* Copyright (c) 2011, Stefan Lankes, RWTH Aachen University
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __SYSCALL_H__
#define __SYSCALL_H__
#ifdef __cplusplus
extern "C" {
#endif
#define __NR_exit 0
#define __NR_write 1
#define __NR_open 2
#define __NR_close 3
#define __NR_read 4
#define __NR_lseek 5
#define __NR_unlink 6
#define __NR_getpid 7
#define __NR_kill 8
#define __NR_fstat 9
#define __NR_sbrk 10
#define __NR_fork 11
#define __NR_wait 12
#define __NR_execve 13
#define __NR_times 14
#define __NR_accept 15
#define __NR_bind 16
#define __NR_closesocket 17
#define __NR_connect 18
#define __NR_listen 19
#define __NR_recv 20
#define __NR_send 21
#define __NR_socket 22
#define __NR_getsockopt 23
#define __NR_setsockopt 24
#define __NR_gethostbyname 25
#define __NR_sendto 26
#define __NR_recvfrom 27
#define __NR_select 28
#define __NR_stat 29
#define __NR_dup 30
#define __NR_dup2 31
#define __NR_msleep 32
#define __NR_yield 33
#define __NR_sem_init 34
#define __NR_sem_destroy 35
#define __NR_sem_wait 36
#define __NR_sem_post 37
#define __NR_sem_timedwait 38
#define __NR_getprio 39
#define __NR_setprio 40
#define __NR_clone 41
#define __NR_sem_cancelablewait 42
#define __NR_get_ticks 43
#define __NR_rcce_init 44
#define __NR_rcce_fini 45
#define __NR_rcce_malloc 46
inline static long
syscall(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2)
{
long res;
// note: syscall stores the return address in rcx and rflags in r11
asm volatile ("syscall"
: "=a" (res)
: "a" (nr), "D" (arg0), "S" (arg1), "d" (arg2)
: "memory", "%rcx", "%r11");
return res;
}
#define SYSCALL0(NR) \
syscall(NR, 0, 0, 0)
#define SYSCALL1(NR, ARG0) \
syscall(NR, (unsigned long)ARG0, 0, 0)
#define SYSCALL2(NR, ARG0, ARG1) \
syscall(NR, (unsigned long)ARG0, (unsigned long)ARG1, 0)
#define SYSCALL3(NR, ARG0, ARG1, ARG2) \
syscall(NR, (unsigned long)ARG0, (unsigned long)ARG1, (unsigned long)ARG2)
#ifdef __cplusplus
}
#endif
#endif