mirror of
https://github.com/hermitcore/libhermit.git
synced 2025-03-09 00:00:03 +01:00
add iRCCE as communication library between isles
- iRCCE was designed for the SCC - see http://www.lfbs.rwth-aachen.de/publications/files/iRCCE.pdf - HermitCreate creates virtual message passing puffers to emulate the SCC
This commit is contained in:
parent
3f1c5c1d77
commit
e339311d05
32 changed files with 10724 additions and 0 deletions
|
@ -55,6 +55,9 @@ libs:
|
|||
$Q$(MAKE) TARGET=$(TARGET) CC_FOR_TARGET=$(CC_FOR_TARGET) AR_FOR_TARGET=$(AR_FOR_TARGET) CFLAGS_FOR_TARGET+="-I. -Iplatform/hermit -Iplatform/helper -Wall" -C pte
|
||||
$Q$(MAKE) TARGET=$(TARGET) CC_FOR_TARGET=$(CC_FOR_TARGET) AR_FOR_TARGET=$(AR_FOR_TARGET) CFLAGS_FOR_TARGET+="-I. -Wall -pthread" -C libgomp depend
|
||||
$Q$(MAKE) TARGET=$(TARGET) CC_FOR_TARGET=$(CC_FOR_TARGET) AR_FOR_TARGET=$(AR_FOR_TARGET) CFLAGS_FOR_TARGET+="-I. -Wall -pthread" -C libgomp
|
||||
$Q$(MAKE) TARGET=$(TARGET) CC_FOR_TARGET=$(CC_FOR_TARGET) AR_FOR_TARGET=$(AR_FOR_TARGET) CFLAGS_FOR_TARGET+="-I. -Wall" -C ircce depend
|
||||
$Q$(MAKE) TARGET=$(TARGET) CC_FOR_TARGET=$(CC_FOR_TARGET) AR_FOR_TARGET=$(AR_FOR_TARGET) CFLAGS_FOR_TARGET+="-I. -Wall" -C ircce
|
||||
|
||||
|
||||
$(TMP)/gcc:
|
||||
@echo Build final gcc
|
||||
|
@ -73,6 +76,8 @@ veryclean:
|
|||
@echo Propper cleaning of the toolchain
|
||||
$Q$(MAKE) -C pte veryclean
|
||||
$Q$(MAKE) -C libgomp veryclean
|
||||
$Q$(MAKE) -C ircce veryclean
|
||||
$Q$(MAKE) -C tests veryclean
|
||||
$Q$(MAKE) -C benchmarks veryclean
|
||||
$Q$(RM) $(TOPDIR)/$(ARCH)
|
||||
$Q$(RM) $(TMP)
|
||||
|
|
43
hermit/usr/ircce/Makefile
Normal file
43
hermit/usr/ircce/Makefile
Normal file
|
@ -0,0 +1,43 @@
|
|||
NEWLIB = ../x86/x86_64-hermit
|
||||
MAKE = make
|
||||
ARFLAGS_FOR_TARGET = rsv
|
||||
CP = cp
|
||||
C_source = $(wildcard *.c)
|
||||
NAME = libircce.a
|
||||
OBJS = $(C_source:.c=.o)
|
||||
|
||||
#
|
||||
# Prettify output
|
||||
V = 0
|
||||
ifeq ($V,0)
|
||||
Q = @
|
||||
P = > /dev/null
|
||||
endif
|
||||
|
||||
# other implicit rules
|
||||
%.o : %.c
|
||||
@echo [CC] $@
|
||||
$Q$(CC_FOR_TARGET) -c $(CFLAGS_FOR_TARGET) -o $@ $<
|
||||
|
||||
default: all
|
||||
|
||||
all: $(NAME)
|
||||
|
||||
$(NAME): $(OBJS)
|
||||
$Q$(AR_FOR_TARGET) $(ARFLAGS_FOR_TARGET) $@ $(OBJS)
|
||||
$Q$(CP) $@ $(NEWLIB)/lib
|
||||
$Q$(CP) *.h $(NEWLIB)/include
|
||||
|
||||
clean:
|
||||
@echo Cleaning examples
|
||||
$Q$(RM) $(NAME) *.o *~
|
||||
|
||||
veryclean:
|
||||
@echo Propper cleaning examples
|
||||
$Q$(RM) $(NAME) *.o *~
|
||||
|
||||
depend:
|
||||
$Q$(CC_FOR_TARGET) -MM $(CFLAGS_FOR_TARGET) *.c > Makefile.dep
|
||||
|
||||
-include Makefile.dep
|
||||
# DO NOT DELETE
|
374
hermit/usr/ircce/RCCE.h
Normal file
374
hermit/usr/ircce/RCCE.h
Normal file
|
@ -0,0 +1,374 @@
|
|||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// [2010-10-25] added support for non-blocking send/recv operations
|
||||
// - RCCE_isend(), ..._test(), ..._wait(), ..._push()
|
||||
// - RCCE_irecv(), ..._test(), ..._wait(), ..._push()
|
||||
// by Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
// [2012-09-10] added support for "tagged" flags
|
||||
// - RCCE_send_tagged(), RCCE_recv_tagged(), RCCE_recv_probe_tagged()
|
||||
// by Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
#ifndef RCCE_H
|
||||
#define RCCE_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __hermit__
|
||||
#define SCC
|
||||
#define COPPERRIDGE
|
||||
#define USE_REMOTE_PUT_LOCAL_GET
|
||||
#undef SHMADD
|
||||
#endif
|
||||
|
||||
#define _RCCE "1.0.13 release"
|
||||
// #define USE_BYTE_FLAGS
|
||||
// #define USE_FLAG_EXPERIMENTAL
|
||||
// little trick to allow the application to be called "RCCE_APP" under
|
||||
// OpenMP, and "main" otherwise
|
||||
|
||||
#define ABS(x) ((x > 0)?x:-x)
|
||||
|
||||
#ifndef _OPENMP
|
||||
#define RCCE_APP main
|
||||
#endif
|
||||
|
||||
// modify next line for Intel BareMetal, which supports stdout, but not stdferr
|
||||
#define STDERR stdout
|
||||
|
||||
#ifdef __hermit__
|
||||
#define LOG2_LINE_SIZE 6
|
||||
#else
|
||||
#define LOG2_LINE_SIZE 5
|
||||
#endif
|
||||
#define RCCE_LINE_SIZE (1<<LOG2_LINE_SIZE)
|
||||
// RCCE_BUFF_SIZE_MAX is space per UE, which is half of the space per tile
|
||||
#define RCCE_BUFF_SIZE_MAX (1<<13)
|
||||
|
||||
#ifdef SHMADD
|
||||
//64MB
|
||||
//#define RCCE_SHM_SIZE_MAX 0x4000000
|
||||
// 128MB
|
||||
//#define RCCE_SHM_SIZE_MAX 0x8000000
|
||||
// 256MB
|
||||
//#define RCCE_SHM_SIZE_MAX 0x10000000
|
||||
// 512MB
|
||||
#define RCCE_SHM_SIZE_MAX 0x20000000
|
||||
// 960MB
|
||||
//#define RCCE_SHM_SIZE_MAX 0x3C000000
|
||||
#else
|
||||
#ifndef SCC_COUPLED_SYSTEMS
|
||||
// 64MB
|
||||
#define RCCE_SHM_SIZE_MAX (1<<26)
|
||||
#else
|
||||
// In Coupled Mode only 4MB
|
||||
#define RCCE_SHM_SIZE_MAX (1<<22)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __hermit__
|
||||
#define RCCE_MAX_BOARDS 1
|
||||
#define RCCE_MAXNP_PER_BOARD 8
|
||||
#else
|
||||
#define RCCE_MAX_BOARDS 2 /* allow up to 2 SCC boards for now */
|
||||
#define RCCE_MAXNP_PER_BOARD 48
|
||||
#endif
|
||||
#define RCCE_MAXNP (RCCE_MAX_BOARDS * RCCE_MAXNP_PER_BOARD)
|
||||
#define RCCE_SUCCESS 0
|
||||
#define RCCE_PENDING -1
|
||||
#define RCCE_RESERVED -2
|
||||
#define RCCE_REJECTED -3
|
||||
#define RCCE_ERROR_BASE 1234321
|
||||
#define RCCE_ERROR_TARGET (RCCE_ERROR_BASE + 1)
|
||||
#define RCCE_ERROR_SOURCE (RCCE_ERROR_BASE + 2)
|
||||
#define RCCE_ERROR_ID (RCCE_ERROR_BASE + 3)
|
||||
#define RCCE_ERROR_MESSAGE_LENGTH (RCCE_ERROR_BASE + 4)
|
||||
#define RCCE_ERROR_FLAG_UNDEFINED (RCCE_ERROR_BASE + 5)
|
||||
#define RCCE_ERROR_NUM_UES (RCCE_ERROR_BASE + 6)
|
||||
#define RCCE_ERROR_DATA_OVERLAP (RCCE_ERROR_BASE + 7)
|
||||
#define RCCE_ERROR_ALIGNMENT (RCCE_ERROR_BASE + 8)
|
||||
#define RCCE_ERROR_DEBUG_FLAG (RCCE_ERROR_BASE + 9)
|
||||
#define RCCE_ERROR_FLAG_NOT_IN_COMM_BUFFER (RCCE_ERROR_BASE + 10)
|
||||
#define RCCE_ERROR_FLAG_STATUS_UNDEFINED (RCCE_ERROR_BASE + 11)
|
||||
#define RCCE_ERROR_FLAG_NOT_ALLOCATED (RCCE_ERROR_BASE + 12)
|
||||
#define RCCE_ERROR_VAL_UNDEFINED (RCCE_ERROR_BASE + 13)
|
||||
#define RCCE_ERROR_INVALID_ERROR_CODE (RCCE_ERROR_BASE + 14)
|
||||
#define RCCE_ERROR_RPC_NOT_ALLOCATED (RCCE_ERROR_BASE + 15)
|
||||
#define RCCE_ERROR_RPC_INTERNAL (RCCE_ERROR_BASE + 16)
|
||||
#define RCCE_ERROR_MULTIPLE_RPC_REQUESTS (RCCE_ERROR_BASE + 17)
|
||||
#define RCCE_ERROR_FDIVIDER (RCCE_ERROR_BASE + 18)
|
||||
#define RCCE_ERROR_FREQUENCY_EXCEEDED (RCCE_ERROR_BASE + 19)
|
||||
#define RCCE_ERROR_NO_ACTIVE_RPC_REQUEST (RCCE_ERROR_BASE + 20)
|
||||
#define RCCE_ERROR_STALE_RPC_REQUEST (RCCE_ERROR_BASE + 21)
|
||||
#define RCCE_ERROR_COMM_UNDEFINED (RCCE_ERROR_BASE + 22)
|
||||
#define RCCE_ERROR_ILLEGAL_OP (RCCE_ERROR_BASE + 23)
|
||||
#define RCCE_ERROR_ILLEGAL_TYPE (RCCE_ERROR_BASE + 24)
|
||||
#define RCCE_ERROR_MALLOC (RCCE_ERROR_BASE + 25)
|
||||
#define RCCE_ERROR_COMM_INITIALIZED (RCCE_ERROR_BASE + 26)
|
||||
#define RCCE_ERROR_CORE_NOT_IN_HOSTFILE (RCCE_ERROR_BASE + 27)
|
||||
#define RCCE_ERROR_NO_MULTICAST_SUPPORT (RCCE_ERROR_BASE + 28)
|
||||
#define RCCE_MAX_ERROR_STRING 45
|
||||
|
||||
#define RCCE_DEBUG_ALL 111111
|
||||
#define RCCE_DEBUG_SYNCH 111444
|
||||
#define RCCE_DEBUG_COMM 111555
|
||||
#define RCCE_DEBUG_RPC 111666
|
||||
#define RCCE_DEBUG_DEBUG 111888
|
||||
|
||||
#define RCCE_FLAG_SET 1
|
||||
#define RCCE_FLAG_UNSET 0
|
||||
|
||||
#define RCCE_NUM_OPS 4
|
||||
#define RCCE_OP_BASE 23232323
|
||||
#define RCCE_SUM (RCCE_OP_BASE)
|
||||
#define RCCE_MIN (RCCE_OP_BASE+1)
|
||||
#define RCCE_MAX (RCCE_OP_BASE+2)
|
||||
#define RCCE_PROD (RCCE_OP_BASE+3)
|
||||
|
||||
#define RCCE_TYPE_BASE 63636363
|
||||
#define RCCE_INT (RCCE_TYPE_BASE)
|
||||
#define RCCE_LONG (RCCE_TYPE_BASE+1)
|
||||
#define RCCE_FLOAT (RCCE_TYPE_BASE+2)
|
||||
#define RCCE_DOUBLE (RCCE_TYPE_BASE+3)
|
||||
|
||||
// MPB pointer type
|
||||
typedef volatile unsigned char* t_vcharp;
|
||||
|
||||
#if (defined(SINGLEBITFLAGS) || defined(USE_BYTE_FLAGS)) && !defined(USE_FLAG_EXPERIMENTAL)
|
||||
typedef struct {
|
||||
int location; /* location of bit within line (0-255) */
|
||||
t_vcharp flag_addr; /* address of byte containing flag inside cache line */
|
||||
t_vcharp line_address; /* start of cache line containing flag */
|
||||
} RCCE_FLAG;
|
||||
#else
|
||||
#ifdef USE_FLAG_EXPERIMENTAL
|
||||
typedef volatile unsigned char *RCCE_FLAG;
|
||||
#else
|
||||
typedef volatile int *RCCE_FLAG;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef USE_FLAG_EXPERIMENTAL
|
||||
typedef unsigned char RCCE_FLAG_STATUS;
|
||||
#else
|
||||
typedef int RCCE_FLAG_STATUS;
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
int size;
|
||||
int my_rank;
|
||||
int initialized;
|
||||
int member[RCCE_MAXNP];
|
||||
#ifdef USE_FAT_BARRIER
|
||||
RCCE_FLAG gather[RCCE_MAXNP];
|
||||
#else
|
||||
RCCE_FLAG gather;
|
||||
#endif
|
||||
RCCE_FLAG release;
|
||||
volatile int cycle;
|
||||
volatile int count;
|
||||
int step;
|
||||
int label;
|
||||
} RCCE_COMM;
|
||||
|
||||
typedef struct _RCCE_SEND_REQUEST {
|
||||
char *privbuf; // source buffer in local private memory (send buffer)
|
||||
t_vcharp combuf; // intermediate buffer in MPB
|
||||
size_t chunk; // size of MPB available for this message (bytes)
|
||||
RCCE_FLAG *ready; // flag indicating whether receiver is ready
|
||||
RCCE_FLAG *sent; // flag indicating whether message has been sent by source
|
||||
size_t size; // size of message (bytes)
|
||||
int dest; // UE that will receive the message
|
||||
|
||||
int copy; // set to 0 for synchronization only (no copying/sending)
|
||||
void* tag; // additional tag?
|
||||
int len; // length of additional tag
|
||||
RCCE_FLAG *probe; // flag for probing for incoming messages
|
||||
|
||||
size_t wsize; // offset within send buffer when putting in "chunk" bytes
|
||||
size_t remainder; // bytes remaining to be sent
|
||||
size_t nbytes; // number of bytes to be sent in single RCCE_put call
|
||||
char *bufptr; // running pointer inside privbuf for current location
|
||||
|
||||
int label; // jump/goto label for the reentrance of the respective poll function
|
||||
int finished; // flag that indicates whether the request has already been finished
|
||||
|
||||
struct _RCCE_SEND_REQUEST *next;
|
||||
} RCCE_SEND_REQUEST;
|
||||
|
||||
typedef struct _RCCE_RECV_REQUEST {
|
||||
char *privbuf; // source buffer in local private memory (send buffer)
|
||||
t_vcharp combuf; // intermediate buffer in MPB
|
||||
size_t chunk; // size of MPB available for this message (bytes)
|
||||
RCCE_FLAG *ready; // flag indicating whether receiver is ready
|
||||
RCCE_FLAG *sent; // flag indicating whether message has been sent by source
|
||||
size_t size; // size of message (bytes)
|
||||
int source; // UE that will send the message
|
||||
|
||||
int copy; // set to 0 for cancel function
|
||||
void* tag; // additional tag?
|
||||
int len; // length of additional tag
|
||||
RCCE_FLAG *probe; // flag for probing for incoming messages
|
||||
|
||||
size_t wsize; // offset within send buffer when putting in "chunk" bytes
|
||||
size_t remainder; // bytes remaining to be sent
|
||||
size_t nbytes; // number of bytes to be sent in single RCCE_put call
|
||||
char *bufptr; // running pointer inside privbuf for current location
|
||||
|
||||
int label; // jump/goto label for the reentrance of the respective poll function
|
||||
int finished; // flag that indicates whether the request has already been finished
|
||||
|
||||
struct _RCCE_RECV_REQUEST *next;
|
||||
} RCCE_RECV_REQUEST;
|
||||
|
||||
typedef struct tree_s {
|
||||
int parent; // UE of parent
|
||||
int num_children;
|
||||
int child[RCCE_MAXNP]; // UEs of children
|
||||
} tree_t;
|
||||
|
||||
#ifdef RC_POWER_MANAGEMENT
|
||||
typedef struct{
|
||||
int release;
|
||||
int old_voltage_level;
|
||||
int new_voltage_level;
|
||||
int old_frequency_divider;
|
||||
int new_frequency_divider;
|
||||
long long start_cycle;
|
||||
} RCCE_REQUEST;
|
||||
int RCCE_power_domain(void);
|
||||
int RCCE_iset_power(int, RCCE_REQUEST *, int *, int *);
|
||||
int RCCE_wait_power(RCCE_REQUEST *);
|
||||
int RCCE_set_frequency_divider(int, int *);
|
||||
int RCCE_power_domain_master(void);
|
||||
int RCCE_power_domain_size(void);
|
||||
#endif
|
||||
|
||||
int RCCE_init(int *, char***);
|
||||
int RCCE_finalize(void);
|
||||
double RCCE_wtime(void);
|
||||
int RCCE_ue(void);
|
||||
int RCCE_num_ues(void);
|
||||
#ifdef SCC_COUPLED_SYSTEMS
|
||||
int RCCE_dev(void);
|
||||
int RCCE_dev_ue(void);
|
||||
int RCCE_num_dev(void);
|
||||
int RCCE_num_ues_dev(int);
|
||||
int RCCE_ue_to_dev(int);
|
||||
#endif
|
||||
#ifdef GORY
|
||||
t_vcharp RCCE_malloc(size_t);
|
||||
t_vcharp RCCE_malloc_request(size_t, size_t *);
|
||||
t_vcharp RCCE_palloc(size_t,int);
|
||||
void RCCE_free(t_vcharp);
|
||||
int RCCE_put(t_vcharp, t_vcharp, int, int);
|
||||
int RCCE_get(t_vcharp, t_vcharp, int, int);
|
||||
int RCCE_wait_until(RCCE_FLAG, RCCE_FLAG_STATUS);
|
||||
int RCCE_test_flag(RCCE_FLAG, RCCE_FLAG_STATUS, int *);
|
||||
int RCCE_flag_alloc(RCCE_FLAG *);
|
||||
int RCCE_flag_free(RCCE_FLAG *);
|
||||
int RCCE_flag_write(RCCE_FLAG *, RCCE_FLAG_STATUS, int);
|
||||
int RCCE_flag_read(RCCE_FLAG, RCCE_FLAG_STATUS *, int);
|
||||
int RCCE_flag_write_tagged(RCCE_FLAG *, RCCE_FLAG_STATUS, int, char*, int);
|
||||
int RCCE_flag_read_tagged(RCCE_FLAG, RCCE_FLAG_STATUS *, int, char*, int);
|
||||
int RCCE_send(char *, t_vcharp, size_t, RCCE_FLAG *, RCCE_FLAG *, size_t, int);
|
||||
int RCCE_recv(char *, t_vcharp, size_t, RCCE_FLAG *, RCCE_FLAG *, size_t, int, RCCE_FLAG *);
|
||||
int RCCE_recv_test(char *, t_vcharp, size_t, RCCE_FLAG *, RCCE_FLAG *, size_t, int, int *, RCCE_FLAG *);
|
||||
#ifdef USE_FLAG_EXPERIMENTAL
|
||||
int RCCE_put_flag(t_vcharp, t_vcharp, int, int);
|
||||
int RCCE_get_flag(t_vcharp, t_vcharp, int, int);
|
||||
#endif
|
||||
#else
|
||||
// standard non-gory functions:
|
||||
|
||||
t_vcharp RCCE_malloc(size_t);
|
||||
|
||||
int RCCE_flag_write(RCCE_FLAG *, RCCE_FLAG_STATUS, int);
|
||||
int RCCE_flag_read(RCCE_FLAG, RCCE_FLAG_STATUS *, int);
|
||||
|
||||
int RCCE_send(char *, size_t, int);
|
||||
int RCCE_recv(char *, size_t, int);
|
||||
int RCCE_recv_test(char *, size_t, int, int *);
|
||||
int RCCE_send_pipe(char *, size_t, int);
|
||||
int RCCE_recv_pipe(char *, size_t, int);
|
||||
int RCCE_send_mcast(char *, size_t);
|
||||
int RCCE_recv_mcast(char *, size_t, int);
|
||||
int RCCE_send_tagged(char *, size_t, int, void *, int);
|
||||
int RCCE_recv_tagged(char *, size_t, int, void *, int);
|
||||
int RCCE_recv_probe_tagged(int, int *, t_vcharp *, void *, int);
|
||||
int RCCE_allreduce(char *, char *, int, int, int, RCCE_COMM);
|
||||
int RCCE_reduce(char *, char *, int, int, int, int, RCCE_COMM);
|
||||
int RCCE_bcast(char *, size_t, int, RCCE_COMM);
|
||||
int RCCE_recv_probe(int, int *, t_vcharp *);
|
||||
int RCCE_recv_cancel(size_t, int);
|
||||
int RCCE_isend(char *, size_t, int, RCCE_SEND_REQUEST *);
|
||||
int RCCE_isend_test(RCCE_SEND_REQUEST *, int *);
|
||||
int RCCE_isend_wait(RCCE_SEND_REQUEST *);
|
||||
int RCCE_isend_push(int);
|
||||
int RCCE_irecv(char *, size_t, int, RCCE_RECV_REQUEST *);
|
||||
int RCCE_irecv_test(RCCE_RECV_REQUEST *, int *);
|
||||
int RCCE_irecv_wait(RCCE_RECV_REQUEST *);
|
||||
int RCCE_irecv_push(int);
|
||||
|
||||
#endif
|
||||
t_vcharp RCCE_shmalloc(size_t);
|
||||
void RCCE_shfree(t_vcharp);
|
||||
void RCCE_shflush(void);
|
||||
t_vcharp RCCE_shrealloc(t_vcharp, size_t);
|
||||
|
||||
// LfBS-customized functions:
|
||||
void* RCCE_memcpy_get(void *, const void *, size_t);
|
||||
void* RCCE_memcpy_put(void *, const void *, size_t);
|
||||
#define RCCE_memcpy(a,b,c) RCCE_memcpy_put(a,b,c)
|
||||
|
||||
int RCCE_comm_split(int (*)(int, void *), void *, RCCE_COMM *);
|
||||
int RCCE_comm_free(RCCE_COMM *);
|
||||
int RCCE_comm_size(RCCE_COMM, int *);
|
||||
int RCCE_comm_rank(RCCE_COMM, int *);
|
||||
void RCCE_fence(void);
|
||||
int RCCE_barrier(RCCE_COMM *);
|
||||
int RCCE_tree_init(RCCE_COMM *, tree_t *, int);
|
||||
int RCCE_tree_barrier(RCCE_COMM *, tree_t *);
|
||||
int RCCE_tournament_barrier(RCCE_COMM *);
|
||||
int RCCE_tournament_fixed_barrier(RCCE_COMM *);
|
||||
int RCCE_dissemination_barrier(RCCE_COMM *);
|
||||
int RCCE_TNS_barrier(RCCE_COMM *);
|
||||
int RCCE_AIR_barrier(RCCE_COMM *);
|
||||
int RCCE_AIR_barrier2(RCCE_COMM *);
|
||||
int RCCE_nb_barrier(RCCE_COMM *);
|
||||
int RCCE_nb_TNS_barrier(RCCE_COMM *);
|
||||
int RCCE_nb_AIR_barrier(RCCE_COMM *);
|
||||
int RCCE_error_string(int, char *, int *);
|
||||
int RCCE_debug_set(int);
|
||||
int RCCE_debug_unset(int);
|
||||
|
||||
extern RCCE_COMM RCCE_COMM_WORLD;
|
||||
#ifdef RC_POWER_MANAGEMENT
|
||||
extern RCCE_COMM RCCE_P_COMM;
|
||||
#define RCCE_POWER_DEFAULT -99999
|
||||
#endif
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp threadprivate (RCCE_COMM_WORLD)
|
||||
#ifdef RC_POWER_MANAGEMENT
|
||||
#pragma omp threadprivate (RCCE_P_COMM)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
1354
hermit/usr/ircce/RCCE_admin.c
Normal file
1354
hermit/usr/ircce/RCCE_admin.c
Normal file
File diff suppressed because it is too large
Load diff
73
hermit/usr/ircce/RCCE_bcast.c
Normal file
73
hermit/usr/ircce/RCCE_bcast.c
Normal file
|
@ -0,0 +1,73 @@
|
|||
//***************************************************************************************
|
||||
// Broadcast functions.
|
||||
//***************************************************************************************
|
||||
// Since only collective operations require communication domains, they are the only ones
|
||||
// that use communicators. All collectives implementations are naive, linear operations.
|
||||
// There may not be any overlap between target and source.
|
||||
//
|
||||
// Author: Rob F. Van der Wijngaart
|
||||
// Intel Corporation
|
||||
// Date: 008/30/2010
|
||||
//
|
||||
//**************************************************************************************
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "RCCE_lib.h"
|
||||
|
||||
#ifdef USE_RCCE_COMM
|
||||
#ifndef GORY
|
||||
#include "RCCE_comm/RCCE_bcast.c"
|
||||
#endif
|
||||
#else
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// RCCE_bcast
|
||||
//--------------------------------------------------------------------------------------
|
||||
// function that sends data from UE root to all other UEs in the communicator
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_bcast(
|
||||
char *buf, // private memory, used for sending (root) and receiving (other UEs)
|
||||
size_t num, // number of bytes to be sent
|
||||
int root, // source within "comm" of broadcast data
|
||||
RCCE_COMM comm // communication domain
|
||||
) {
|
||||
|
||||
int ue, ierr;
|
||||
#ifdef GORY
|
||||
printf("Collectives only implemented for simplified API\n");
|
||||
return(1);
|
||||
#else
|
||||
// check to make sure root is member of the communicator
|
||||
if (root<0 || root >= comm.size)
|
||||
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
|
||||
|
||||
if (RCCE_IAM == comm.member[root]) {
|
||||
for (ue=0; ue<comm.size; ue++) if (ue != root)
|
||||
if(ierr=RCCE_send(buf, num, comm.member[ue]))
|
||||
return(RCCE_error_return(RCCE_debug_comm,ierr));
|
||||
}
|
||||
else if(ierr=RCCE_recv(buf, num, comm.member[root]))
|
||||
return(RCCE_error_return(RCCE_debug_comm,ierr));
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
159
hermit/usr/ircce/RCCE_comm.c
Normal file
159
hermit/usr/ircce/RCCE_comm.c
Normal file
|
@ -0,0 +1,159 @@
|
|||
//***************************************************************************************
|
||||
// Communicator manipulation and accessor routines.
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Author: Rob F. Van der Wijngaart
|
||||
// Intel Corporation
|
||||
// Date: 008/30/2010
|
||||
//
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "RCCE_lib.h"
|
||||
|
||||
#if defined(COPPERRIDGE) && !defined(__hermit__)
|
||||
#define RCCE_memcpy_put(a,b,c) memcpy_to_mpb(a, b, c)
|
||||
#include "scc_memcpy.h"
|
||||
#else
|
||||
#define RCCE_memcpy_put(a,b,c) memcpy(a, b, c)
|
||||
#endif
|
||||
|
||||
#ifdef USE_RCCE_COMM
|
||||
#ifndef GORY
|
||||
#include "RCCE_comm/RCCE_scatter.c"
|
||||
#include "RCCE_comm/RCCE_gather.c"
|
||||
#include "RCCE_comm/RCCE_allgather.c"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_comm_split
|
||||
// RCCE_comm_split works like MPI_Comm_split, but:
|
||||
// 1. Always uses the default global communicator as the basis, not an
|
||||
// arbitrary communicator
|
||||
// 2. Uses the rank of the UE in the global communicator as the key
|
||||
// 3. Uses a function, operating on UE's global rank, to compute color
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_comm_split(
|
||||
int (*color)(int, void *), // function returning a color value for given ue and aux
|
||||
void *aux, // optional user-supplied data structure
|
||||
RCCE_COMM *comm // new communicator
|
||||
) {
|
||||
|
||||
int i, my_color, error;
|
||||
|
||||
if (!comm) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_COMM_UNDEFINED));
|
||||
|
||||
// start with a barrier to make sure all UEs are participating, unless we are still
|
||||
// defining the global communicator; there is no danger in skipping the barrier in
|
||||
// that case, because the global communicator is defined in RCCE_init, which must be
|
||||
// called by all cores before any other RCCE calls
|
||||
if (comm != &RCCE_COMM_WORLD) RCCE_barrier(&RCCE_COMM_WORLD);
|
||||
|
||||
// determine the size of the communicator
|
||||
my_color = color(RCCE_IAM, aux);
|
||||
|
||||
comm->size = 0;
|
||||
for (i=0; i<RCCE_NP; i++) {
|
||||
if (color(i, aux) == my_color) {
|
||||
if (i == RCCE_IAM) comm->my_rank = comm->size;
|
||||
comm->member[comm->size++] = i;
|
||||
}
|
||||
}
|
||||
|
||||
// note: we only need to allocate new synch flags if the communicator has not yet been
|
||||
// initialized. It is legal to overwrite an initialized communcator, in which case the
|
||||
// membership may change, but the same synchronization flags can be used
|
||||
if (comm->initialized == RCCE_COMM_INITIALIZED) return(RCCE_SUCCESS);
|
||||
|
||||
#ifndef USE_FAT_BARRIER
|
||||
if((error=RCCE_flag_alloc(&(comm->gather))))
|
||||
return(RCCE_error_return(RCCE_debug_comm,error));
|
||||
#else
|
||||
for (i=0; i<RCCE_NP; i++) {
|
||||
if((error=RCCE_flag_alloc(&(comm->gather[i]))))
|
||||
return(RCCE_error_return(RCCE_debug_comm,error));
|
||||
}
|
||||
#endif
|
||||
|
||||
if(error=RCCE_flag_alloc(&(comm->release)))
|
||||
return(RCCE_error_return(RCCE_debug_comm,error));
|
||||
|
||||
comm->label = 0;
|
||||
|
||||
comm->initialized = RCCE_COMM_INITIALIZED;
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
// DO NOT USE THIS FUNCTION IN NON-GORY MODE UNTIL MALLOC_FREE HAS BEEN IMPLEMENTED
|
||||
int RCCE_comm_free(RCCE_COMM *comm) {
|
||||
printf("DO NOT USE IN NON-GORY MODE UNTIL MALLOC_FREE HAS BEEN IMPLEMENTED\n");
|
||||
if (comm->initialized != RCCE_COMM_INITIALIZED)
|
||||
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_COMM_INITIALIZED));
|
||||
|
||||
#ifndef USE_FAT_BARRIER
|
||||
RCCE_flag_free(&(comm->gather));
|
||||
#else
|
||||
{ int i;
|
||||
for (i=0; i<RCCE_NP; i++)
|
||||
RCCE_flag_free(&(comm->gather[i]));
|
||||
}
|
||||
#endif
|
||||
|
||||
RCCE_flag_free(&(comm->release));
|
||||
comm->initialized = RCCE_COMM_NOT_INITIALIZED;
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_comm_size
|
||||
// returns the number of UEs inside the communicator
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_comm_size(
|
||||
RCCE_COMM comm, // communicator
|
||||
int *size // return value (size)
|
||||
) {
|
||||
|
||||
if (comm.initialized == RCCE_COMM_INITIALIZED) {
|
||||
*size = comm.size;
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
else return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_COMM_INITIALIZED));
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_comm_rank
|
||||
// returns the rank of the calling UE inside the communicator
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_comm_rank(
|
||||
RCCE_COMM comm, // communicator
|
||||
int *rank // return value (rank)
|
||||
) {
|
||||
|
||||
if (comm.initialized == RCCE_COMM_INITIALIZED) {
|
||||
*rank = comm.my_rank;
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
else return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_COMM_INITIALIZED));
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_global_color
|
||||
// use this trivial color function to define global communicator
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_global_color(int rank, void *nothing) {return(1);}
|
163
hermit/usr/ircce/RCCE_debug.c
Normal file
163
hermit/usr/ircce/RCCE_debug.c
Normal file
|
@ -0,0 +1,163 @@
|
|||
//***************************************************************************************
|
||||
// Diagnostic routines.
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Author: Rob F. Van der Wijngaart
|
||||
// Intel Corporation
|
||||
// Date: 008/30/2010
|
||||
//
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "RCCE_lib.h"
|
||||
#include "RCCE_debug.h"
|
||||
|
||||
#define MAX_ERROR_NUMBER 26
|
||||
|
||||
//......................................................................................
|
||||
// GLOBAL VARIABLES USED BY THE LIBRARY
|
||||
//......................................................................................
|
||||
const char *RCCE_estrings[] = {
|
||||
/* 0 */ "Success",
|
||||
/* 1 */ "Invalid target buffer",
|
||||
/* 2 */ "Invalid source buffer",
|
||||
/* 3 */ "Invalid UE ID",
|
||||
/* 4 */ "Invalid message length",
|
||||
/* 5 */ "Flag variable undefined",
|
||||
/* 6 */ "Emulated NUEs do not match requested NUEs",
|
||||
/* 7 */ "Message buffers overlap in comm buffer",
|
||||
/* 8 */ "Data buffer misalignment",
|
||||
/* 9 */ "Debug flag not defined",
|
||||
/* 10 */ "RCCE_flag variable not inside comm buffer",
|
||||
/* 11 */ "Flag status not defined",
|
||||
/* 12 */ "Flag not allocated",
|
||||
/* 13 */ "Value not defined",
|
||||
/* 14 */ "Invalid error code",
|
||||
/* 15 */ "RPC data structure not allocated",
|
||||
/* 16 */ "RPC internal error",
|
||||
/* 17 */ "Multiple outstanding RPC requests",
|
||||
/* 18 */ "Invalid power step",
|
||||
/* 19 */ "Maximum allowable frequency exceeded",
|
||||
/* 20 */ "No active RPC request",
|
||||
/* 21 */ "Stale RPC request",
|
||||
/* 22 */ "Communicator undefined",
|
||||
/* 23 */ "Illegal reduction operator",
|
||||
/* 24 */ "Illegal data type",
|
||||
/* 25 */ "Memory allocation error",
|
||||
/* 26 */ "Communicator initialization error",
|
||||
/* 27 */ "Multicast is not supported in remote-put/local-get mode"
|
||||
};
|
||||
// GLOBAL VARIABLES USED BY THE LIBRARY
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_error_string
|
||||
//--------------------------------------------------------------------------------------
|
||||
// RCCE_error_string returns a descriptive error string
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_error_string(
|
||||
int err_no, // number of error to be described
|
||||
char *error_string, // copy of error string
|
||||
int *string_length // length of error string
|
||||
) {
|
||||
|
||||
if (err_no != RCCE_SUCCESS) {
|
||||
err_no -= RCCE_ERROR_BASE;
|
||||
if (err_no < 1 || err_no > MAX_ERROR_NUMBER) {
|
||||
strcpy(error_string,"");
|
||||
*string_length=0;
|
||||
return(RCCE_error_return(RCCE_debug_debug,RCCE_ERROR_INVALID_ERROR_CODE));
|
||||
}
|
||||
}
|
||||
strcpy(error_string,RCCE_estrings[err_no]);
|
||||
*string_length = strlen(RCCE_estrings[err_no]);
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_print_error
|
||||
//--------------------------------------------------------------------------------------
|
||||
// prints diagnostic error string, governed by input flag, also returns the error code
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_error_return(
|
||||
int debug_flag, // flag that controls diagnostic printing
|
||||
int err_no // number of error to be printed
|
||||
) {
|
||||
char error_string[RCCE_MAX_ERROR_STRING];
|
||||
int string_length;
|
||||
|
||||
if (debug_flag && err_no) {
|
||||
RCCE_error_string(err_no, error_string, &string_length);
|
||||
fprintf(STDERR,"Error on UE %d: %s\n", RCCE_IAM, error_string); fflush(NULL);
|
||||
}
|
||||
return(err_no);
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_debug_set
|
||||
//--------------------------------------------------------------------------------------
|
||||
// turns on debugging of a certain library feature
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_debug_set(
|
||||
int flag // flag that controls which library feaure is instrumented
|
||||
){
|
||||
|
||||
switch(flag) {
|
||||
case(RCCE_DEBUG_ALL): RCCE_debug_synch=1;
|
||||
RCCE_debug_comm=1;
|
||||
RCCE_debug_debug=1;
|
||||
RCCE_debug_RPC=1;
|
||||
return(RCCE_SUCCESS);
|
||||
case(RCCE_DEBUG_SYNCH): RCCE_debug_synch=1;
|
||||
return(RCCE_SUCCESS);
|
||||
case(RCCE_DEBUG_COMM): RCCE_debug_comm=1;
|
||||
return(RCCE_SUCCESS);
|
||||
case(RCCE_DEBUG_DEBUG): RCCE_debug_debug=1;
|
||||
return(RCCE_SUCCESS);
|
||||
case(RCCE_DEBUG_RPC): RCCE_debug_RPC=1;
|
||||
return(RCCE_SUCCESS);
|
||||
default: return(RCCE_error_return(RCCE_debug_debug,
|
||||
RCCE_ERROR_DEBUG_FLAG));
|
||||
}
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_debug_unset
|
||||
//--------------------------------------------------------------------------------------
|
||||
// turns off debugging of a certain library feature
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_debug_unset(
|
||||
int flag // flag that controls which library feaure is uninstrumented
|
||||
){
|
||||
|
||||
switch(flag) {
|
||||
case(RCCE_DEBUG_ALL): RCCE_debug_synch=0;
|
||||
RCCE_debug_comm=0;
|
||||
RCCE_debug_debug=0;
|
||||
RCCE_debug_RPC=0;
|
||||
return(RCCE_SUCCESS);
|
||||
case(RCCE_DEBUG_SYNCH): RCCE_debug_synch=0;
|
||||
return(RCCE_SUCCESS);
|
||||
case(RCCE_DEBUG_COMM): RCCE_debug_comm=0;
|
||||
return(RCCE_SUCCESS);
|
||||
case(RCCE_DEBUG_DEBUG): RCCE_debug_debug=0;
|
||||
return(RCCE_SUCCESS);
|
||||
case(RCCE_DEBUG_RPC): RCCE_debug_RPC=0;
|
||||
return(RCCE_SUCCESS);
|
||||
default: return(RCCE_error_return(RCCE_debug_debug,
|
||||
RCCE_ERROR_DEBUG_FLAG));
|
||||
}
|
||||
}
|
26
hermit/usr/ircce/RCCE_debug.h
Normal file
26
hermit/usr/ircce/RCCE_debug.h
Normal file
|
@ -0,0 +1,26 @@
|
|||
/**************************************************************
|
||||
* Change the RCCE_debug_xxx values to get debug info. *
|
||||
* Change RCCE_comm_init_val to 1 to see what happens if *
|
||||
* the comm buffers are not properly initialized at startup . *
|
||||
**************************************************************/
|
||||
|
||||
int RCCE_debug_synch=0;
|
||||
int RCCE_debug_comm=0;
|
||||
int RCCE_debug_debug=0;
|
||||
int RCCE_debug_RPC=0;
|
||||
int RCCE_comm_init_val=0;
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
488
hermit/usr/ircce/RCCE_flags.c
Normal file
488
hermit/usr/ircce/RCCE_flags.c
Normal file
|
@ -0,0 +1,488 @@
|
|||
//**************************************************************************************
|
||||
// Flag manipulation and access functions.
|
||||
// Single-bit and whole-cache-line flags are sufficiently different that we provide
|
||||
// separate implementations of all the flag routines for each case
|
||||
//**************************************************************************************
|
||||
//
|
||||
// Author: Rob F. Van der Wijngaart
|
||||
// Intel Corporation
|
||||
// Date: 008/30/2010
|
||||
//
|
||||
//**************************************************************************************
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// [2012-09-07] added support for "tagged" flags
|
||||
// by Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
#include "RCCE_lib.h"
|
||||
#if defined(COPPERRIDGE) && !defined(__hermit__)
|
||||
#include "scc_memcpy.h"
|
||||
#else
|
||||
#define memcpy_scc memcpy
|
||||
#endif
|
||||
|
||||
#ifdef USE_BYTE_FLAGS
|
||||
#include "RCCE_byte_flags.c"
|
||||
#else
|
||||
|
||||
#ifdef SINGLEBITFLAGS
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// LOCKING SYNCHRONIZATION USING ONE BIT PER FLAG
|
||||
//////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
//......................................................................................
|
||||
// GLOBAL VARIABLES USED BY THE LIBRARY
|
||||
//......................................................................................
|
||||
// single bit flags are accessed with the granularity of integers. Compute the
|
||||
// number of flags per integer
|
||||
int WORDSIZE = sizeof(int)*8;
|
||||
int LEFTMOSTBIT = sizeof(int)*8-1;
|
||||
//......................................................................................
|
||||
// END GLOBAL VARIABLES USED BY THE LIBRARY
|
||||
//......................................................................................
|
||||
|
||||
RCCE_FLAG_LINE RCCE_flags =
|
||||
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
||||
{{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
|
||||
NULL, 0, NULL};
|
||||
|
||||
// next three utility functions are only used by the library, not the user. We assume
|
||||
// there will never be errrors, so we do not return any error code. "location" of a
|
||||
// flag bit // inside a cache line is reckoned from the most significant (leftmost)
|
||||
// bit. Within a word, flag zero is also in the leftmost bit
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_bit_value
|
||||
//--------------------------------------------------------------------------------------
|
||||
// return status of single bit flag at a specific location within cache line
|
||||
//--------------------------------------------------------------------------------------
|
||||
#if 0
|
||||
// BUGGY VERSION (by Intel):
|
||||
RCCE_FLAG_STATUS RCCE_bit_value(t_vcharp line_address, int location) {
|
||||
t_vintp character = (t_vintp) (line_address + location/WORDSIZE);
|
||||
int bit_position = (LEFTMOSTBIT-(location%WORDSIZE));
|
||||
unsigned int mask = 1<<bit_position;
|
||||
return (((*character) & mask)>>bit_position);
|
||||
}
|
||||
#else
|
||||
// FIXED VERSION (by LfBS):
|
||||
RCCE_FLAG_STATUS RCCE_bit_value(t_vcharp line_address, int location) {
|
||||
t_vcharp character = (t_vcharp) (line_address + location/8);
|
||||
int bit_position = 7 - location%8;
|
||||
unsigned char mask = 1<<bit_position;
|
||||
return (((*character) & mask)>>bit_position);
|
||||
}
|
||||
#endif
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_flip_bit_value
|
||||
//--------------------------------------------------------------------------------------
|
||||
// flip single bit in cache line and return value of changed bit. The location is that
|
||||
// of the bit inside the line. To find the word it is in, divide by WORDSIZE.
|
||||
//--------------------------------------------------------------------------------------
|
||||
#if 0
|
||||
// BUGGY VERSION (by Intel):
|
||||
RCCE_FLAG_STATUS RCCE_flip_bit_value(t_vcharp line_address, int location) {
|
||||
t_vintp character = (t_vintp) (line_address + location/WORDSIZE);
|
||||
int bit_position = (LEFTMOSTBIT-(location%WORDSIZE));
|
||||
unsigned int mask = 1<<bit_position;
|
||||
(*character) ^= mask;
|
||||
return ((mask & (*character))>>bit_position);
|
||||
}
|
||||
#else
|
||||
// FIXED VERSION (by LfBS):
|
||||
RCCE_FLAG_STATUS RCCE_flip_bit_value(t_vcharp line_address, int location) {
|
||||
t_vcharp character = (t_vcharp) (line_address + location/8);
|
||||
int bit_position = 7 - location%8;
|
||||
unsigned char mask = 1<<bit_position;
|
||||
(*character) ^= mask;
|
||||
return ((mask & (*character))>>bit_position);
|
||||
}
|
||||
#endif
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_write_bit_value
|
||||
//--------------------------------------------------------------------------------------
|
||||
// write single bit in cache line and return value of changed bit. The location is that
|
||||
// of the bit inside the line. To find the word it is in, divide by WORDSIZE.
|
||||
//--------------------------------------------------------------------------------------
|
||||
#if 0
|
||||
// BUGGY VERSION (by Intel):
|
||||
int RCCE_write_bit_value(t_vcharp line_address, int location, RCCE_FLAG_STATUS val) {
|
||||
t_vintp character = (t_vintp)(line_address + location/WORDSIZE);
|
||||
int bit_position = (LEFTMOSTBIT-(location%WORDSIZE));
|
||||
unsigned int mask;
|
||||
switch (val) {
|
||||
case RCCE_FLAG_UNSET: mask = ~(1<<bit_position);
|
||||
(*character) &= mask;
|
||||
break;
|
||||
case RCCE_FLAG_SET: mask = 1<<bit_position;
|
||||
(*character) |= mask;
|
||||
break;
|
||||
}
|
||||
return (RCCE_SUCCESS);
|
||||
}
|
||||
#else
|
||||
// FIXED VERSION (by LfBS):
|
||||
int RCCE_write_bit_value(t_vcharp line_address, int location, RCCE_FLAG_STATUS val) {
|
||||
t_vcharp character = (t_vcharp)(line_address + location/8);
|
||||
int bit_position = 7 - location%8;
|
||||
unsigned char mask;
|
||||
switch (val) {
|
||||
case RCCE_FLAG_UNSET: mask = ~(1<<bit_position);
|
||||
(*character) &= mask;
|
||||
break;
|
||||
case RCCE_FLAG_SET: mask = 1<<bit_position;
|
||||
(*character) |= mask;
|
||||
break;
|
||||
}
|
||||
return (RCCE_SUCCESS);
|
||||
}
|
||||
#endif
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_flag_alloc
|
||||
//--------------------------------------------------------------------------------------
|
||||
// allocate space for single bit flag. Since 256 fit on a single cache line, we only
|
||||
// need to allocate new MPB space when the all existing lines are completely filled. A
|
||||
// flag line is a data structure that contains an array of size RCCE_LINE_SIZE
|
||||
// characters called "flag." Each bit in field "flag" corresponds to a flag being in use
|
||||
// (bit is 1) or not (bit is 0). The actual value of the flag is stored in the MPB
|
||||
// line pointed to be the field "line_address," at the corresponding bit location as in
|
||||
// field "flag."
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_flag_alloc(RCCE_FLAG *flag) {
|
||||
RCCE_FLAG_LINE *flagp;
|
||||
int c, loc;
|
||||
|
||||
// find the head of the data structure that administers the flag variables
|
||||
flagp = &RCCE_flags;
|
||||
while (flagp->members == 256 && flagp->next) {
|
||||
flagp = flagp->next;
|
||||
}
|
||||
|
||||
// if this is a new flag line, need to allocate MPB for it
|
||||
if (!flagp->line_address) flagp->line_address = RCCE_malloc(RCCE_LINE_SIZE);
|
||||
if (!flagp->line_address) return(RCCE_error_return(RCCE_debug_synch,
|
||||
RCCE_ERROR_FLAG_NOT_ALLOCATED));
|
||||
|
||||
if (flagp->members < 256) {
|
||||
// there is space in this line for a new flag; find first open slot
|
||||
for (loc=0; loc<RCCE_LINE_SIZE*8; loc++)
|
||||
if (!RCCE_bit_value((t_vcharp)(flagp->flag),loc)) {
|
||||
RCCE_flip_bit_value((t_vcharp)(flagp->flag),loc);
|
||||
flagp->members++;
|
||||
flag->location = loc;
|
||||
flag->line_address = flagp->line_address;
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// must create new flag line if last one was full
|
||||
flagp->next = (RCCE_FLAG_LINE *) malloc(sizeof(RCCE_FLAG_LINE));
|
||||
if (!(flagp->next)) return(RCCE_error_return(RCCE_debug_synch,
|
||||
RCCE_ERROR_FLAG_NOT_ALLOCATED));
|
||||
flagp = flagp->next;
|
||||
flagp->line_address = RCCE_malloc(RCCE_LINE_SIZE);
|
||||
if (!(flagp->line_address)) return(RCCE_error_return(RCCE_debug_synch,
|
||||
RCCE_ERROR_FLAG_NOT_ALLOCATED));
|
||||
// initialize the flag line
|
||||
flagp->members=1;
|
||||
flagp->next = NULL;
|
||||
for (c=0; c<RCCE_LINE_SIZE; c++) flagp->flag[c] &= (unsigned int) 0;
|
||||
|
||||
// flip the very first bit field to indicate that flag is not in use
|
||||
RCCE_flip_bit_value((t_vcharp)(flagp->flag),0);
|
||||
flag->location = 0;
|
||||
flag->line_address = flagp->line_address;
|
||||
}
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_flag_free
|
||||
//--------------------------------------------------------------------------------------
|
||||
// free space for single bit flag. Since 256 fit on a single cache line, we only
|
||||
// need to free claimed MPB space when the all existing lines are completely emptied.
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_flag_free(RCCE_FLAG *flag) {
|
||||
|
||||
RCCE_FLAG_LINE *flagp, *flagpminus1 = NULL;
|
||||
|
||||
// check wether flag exists, and whether the location field is valid
|
||||
if (!flag || flag->location < 0)
|
||||
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED));
|
||||
// find flag line in globally maintained structure
|
||||
flagp = &RCCE_flags;
|
||||
while (flagp->next && flag->line_address != flagp->line_address) {
|
||||
flagpminus1 = flagp;
|
||||
flagp = flagp->next;
|
||||
}
|
||||
if (flag->line_address != flagp->line_address)
|
||||
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED));
|
||||
|
||||
// error checking is done
|
||||
flagp->members--;
|
||||
RCCE_flip_bit_value((t_vcharp)(flagp->flag),flag->location);
|
||||
// something special happens if we've emptied an entire line
|
||||
if (flagp->members==0) {
|
||||
if (flagpminus1) {
|
||||
// there is a predecessor; splice out current flag line from linked list
|
||||
RCCE_free(flagp->line_address);
|
||||
flagpminus1->next = flagp->next;
|
||||
free(flagp);
|
||||
}
|
||||
// if there is a successor but no predecessor, do nothing
|
||||
}
|
||||
// invalidate location field to make sure we won't free again by mistake
|
||||
flag->location = -1;
|
||||
flag->line_address = NULL;
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_flag_write
|
||||
//--------------------------------------------------------------------------------------
|
||||
// This is the core flag manipulation routine. It requires locking to guarantee atomic
|
||||
// access while updating one of a line of flags.
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_flag_write(RCCE_FLAG *flag, RCCE_FLAG_STATUS val, int ID) {
|
||||
t_vchar val_array[RCCE_LINE_SIZE];
|
||||
int error;
|
||||
|
||||
#ifdef GORY
|
||||
// check input parameters
|
||||
if (!flag || flag->location < 0 || flag->location > 255)
|
||||
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED));
|
||||
if (error = (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET))
|
||||
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED));
|
||||
#endif
|
||||
|
||||
// acquire lock to make sure nobody else fiddles with the flags on the target core
|
||||
RCCE_acquire_lock(ID);
|
||||
// copy entire MPB cache line containing flag to local space
|
||||
if (error = RCCE_get(val_array, flag->line_address, RCCE_LINE_SIZE, ID))
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
|
||||
// overwrite single bit within local copy of cache line
|
||||
RCCE_write_bit_value(val_array, flag->location, val);
|
||||
|
||||
// write copy back to the MPB
|
||||
error = RCCE_put(flag->line_address, val_array, RCCE_LINE_SIZE, ID);
|
||||
|
||||
// release write lock for the flags on the target core
|
||||
RCCE_release_lock(ID);
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_flag_read
|
||||
//--------------------------------------------------------------------------------------
|
||||
// This routine is rarely needed. We typically only read a flag when we're waiting for
|
||||
// it to change value (function RCCE_wait_until). Reading does not require locking. The
|
||||
// moment the target flag we're trying to read changes value, it is OK to read and
|
||||
// return that value
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_flag_read(RCCE_FLAG flag, RCCE_FLAG_STATUS *val, int ID) {
|
||||
volatile unsigned char val_array[RCCE_LINE_SIZE];
|
||||
int error;
|
||||
|
||||
#ifdef GORY
|
||||
if (flag.location < 0 || flag.location > 255)
|
||||
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED));
|
||||
if (!val) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_VAL_UNDEFINED));
|
||||
#endif
|
||||
|
||||
// Should be able to use same technique as in RCCE_wait_until, i.e., should not need
|
||||
// to copy out of MPB first. However, this function is not time critical
|
||||
if(error=RCCE_get(val_array, flag.line_address, RCCE_LINE_SIZE, ID))
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
*val = RCCE_bit_value(val_array, flag.location);
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// LOCKLESS SYNCHRONIZATION USING ONE WHOLE CACHE LINE PER FLAG //
|
||||
//////////////////////////////////////////////////////////////////
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_flag_alloc
|
||||
//--------------------------------------------------------------------------------------
|
||||
// there is no internal structure to whole-cache-line flags; a new flag simply means a
|
||||
// newly allocated line in the MPB
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_flag_alloc(RCCE_FLAG *flag) {
|
||||
*flag = (RCCE_FLAG) RCCE_malloc(RCCE_LINE_SIZE);
|
||||
if (!(*flag)) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED));
|
||||
else return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_flag_free
|
||||
//--------------------------------------------------------------------------------------
|
||||
// there is no internal structure to whole-cache-line flags; deleting a flag simply
|
||||
// means deallocating line in the MPB
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_flag_free(RCCE_FLAG *flag) {
|
||||
if (!flag) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED));
|
||||
else RCCE_free((t_vcharp)(*flag));
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_flag_write
|
||||
//--------------------------------------------------------------------------------------
|
||||
// This is the core flag manipulation routine. No locking required. We simple write the
|
||||
// flag value into the first word of a local (private) buffer of the size of a cache
|
||||
// line and copy it to the corresponding location in the NPB
|
||||
// access while updating one of a line of flags.
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_flag_write(RCCE_FLAG *flag, RCCE_FLAG_STATUS val, int ID) {
|
||||
int error;
|
||||
#ifndef USE_FLAG_EXPERIMENTAL
|
||||
volatile unsigned char val_array[RCCE_LINE_SIZE] =
|
||||
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
||||
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
||||
|
||||
#ifdef GORY
|
||||
// check input parameters
|
||||
if (!flag || !(*flag)) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED));
|
||||
if (error = (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET))
|
||||
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED));
|
||||
#endif
|
||||
|
||||
#ifndef USE_REVERTED_FLAGS
|
||||
*(int *) val_array = val;
|
||||
#else
|
||||
*(int *) &val_array[RCCE_LINE_SIZE-sizeof(int)] = val;
|
||||
#endif
|
||||
|
||||
error = RCCE_put((t_vcharp)(*flag), val_array, RCCE_LINE_SIZE, ID);
|
||||
|
||||
#else
|
||||
//*flag = val;
|
||||
volatile unsigned char value = val;
|
||||
|
||||
error = RCCE_put_flag(*flag, &value, 1, ID);
|
||||
#endif
|
||||
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
}
|
||||
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
int RCCE_flag_write_tagged(RCCE_FLAG *flag, RCCE_FLAG_STATUS val, int ID, void* tag, int len) {
|
||||
|
||||
unsigned char val_array[RCCE_LINE_SIZE] =
|
||||
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
||||
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
||||
|
||||
int error, i, j;
|
||||
|
||||
#ifndef USE_REVERTED_FLAGS
|
||||
*(int *) val_array = val;
|
||||
#else
|
||||
*(int *) &val_array[RCCE_LINE_SIZE-sizeof(int)] = val;
|
||||
#endif
|
||||
|
||||
if(tag)
|
||||
{
|
||||
if( len > ( RCCE_LINE_SIZE - sizeof(int) ) ) len = RCCE_LINE_SIZE - sizeof(int);
|
||||
#ifndef USE_REVERTED_FLAGS
|
||||
memcpy_scc(&val_array[sizeof(int)], tag, len);
|
||||
#else
|
||||
memcpy_scc(&val_array[0], tag, len);
|
||||
#endif
|
||||
}
|
||||
|
||||
error = RCCE_put((t_vcharp)(*flag), val_array, RCCE_LINE_SIZE, ID);
|
||||
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
}
|
||||
#endif
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_flag_read
|
||||
//--------------------------------------------------------------------------------------
|
||||
// This routine is rarely needed. We typically only read a flag when we're waiting for
|
||||
// it to change value (function RCCE_wait_until). Reading requires copying the whole
|
||||
// MPB cache line containing the flag to a private buffer and returning the first int.
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_flag_read(RCCE_FLAG flag, RCCE_FLAG_STATUS *val, int ID) {
|
||||
int error;
|
||||
#ifndef USE_FLAG_EXPERIMENTAL
|
||||
volatile unsigned char val_array[RCCE_LINE_SIZE];
|
||||
#ifdef GORY
|
||||
if (!flag) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED));
|
||||
if (!val) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_VAL_UNDEFINED));
|
||||
#endif
|
||||
|
||||
if(error=RCCE_get(val_array, (t_vcharp)flag, RCCE_LINE_SIZE, ID))
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
|
||||
#ifndef USE_REVERTED_FLAGS
|
||||
if(val) *val = *(int *)val_array;
|
||||
#else
|
||||
if(val) *val = *(int *)&val_array[RCCE_LINE_SIZE-sizeof(int)];
|
||||
#endif
|
||||
|
||||
#else
|
||||
volatile unsigned char value;
|
||||
|
||||
if(error=RCCE_get_flag(&value, (t_vcharp)flag, 1, ID))
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
|
||||
if(val) *val = value;
|
||||
|
||||
#endif
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
int RCCE_flag_read_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS *val, int ID, void *tag, int len) {
|
||||
|
||||
unsigned char val_array[RCCE_LINE_SIZE];
|
||||
int error, i, j;
|
||||
|
||||
if(error=RCCE_get(val_array, (t_vcharp)flag, RCCE_LINE_SIZE, ID))
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
|
||||
#ifndef USE_REVERTED_FLAGS
|
||||
if(val) *val = *(int *)val_array;
|
||||
#else
|
||||
if(val) *val = *(int *)&val_array[RCCE_LINE_SIZE-sizeof(int)];
|
||||
#endif
|
||||
|
||||
if( (val) && (*val) && (tag) ) {
|
||||
if( len > ( RCCE_LINE_SIZE - sizeof(int) ) ) len = RCCE_LINE_SIZE - sizeof(int);
|
||||
#ifndef USE_REVERTED_FLAGS
|
||||
memcpy_scc(tag, &val_array[sizeof(int)], len);
|
||||
#else
|
||||
memcpy_scc(tag, &val_array[0], len);
|
||||
#endif
|
||||
}
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
169
hermit/usr/ircce/RCCE_get.c
Normal file
169
hermit/usr/ircce/RCCE_get.c
Normal file
|
@ -0,0 +1,169 @@
|
|||
//***************************************************************************************
|
||||
// Get data from communication buffer.
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Author: Rob F. Van der Wijngaart
|
||||
// Intel Corporation
|
||||
// Date: 008/30/2010
|
||||
//
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// [2010-11-03] switched to SCC-optimized memcpy() functions in scc_memcpy.h:
|
||||
// - memcpy_to_mpb()
|
||||
// - memcpy_from_mpb()
|
||||
// by Stefan Lankes, Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
#include "RCCE_lib.h"
|
||||
|
||||
#ifdef COPPERRIDGE
|
||||
#ifdef __hermit__
|
||||
#define memcpy_from_mpb memcpy
|
||||
#else
|
||||
#include "scc_memcpy.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
void *RCCE_memcpy_get(void *dest, const void *src, size_t count)
|
||||
{ // function wrapper for external usage of improved memcpy()...
|
||||
#ifdef COPPERRIDGE
|
||||
return memcpy_from_mpb(dest, src, count);
|
||||
#else
|
||||
return memcpy(dest, src, count);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef COPPERRIDGE
|
||||
#define RCCE_memcpy_get(a,b,c) memcpy_from_mpb(a,b,c)
|
||||
#else
|
||||
#define RCCE_memcpy_get(a,b,c) memcpy(a,b,c)
|
||||
#endif
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_get
|
||||
//--------------------------------------------------------------------------------------
|
||||
// copy data from address "source" in the remote MPB to address "target" in either the
|
||||
// local MPB, or in the calling UE's private memory. We do not test to see if a move
|
||||
// into the calling UE's private memory stays within allocated memory *
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_get(
|
||||
t_vcharp target, // target buffer, MPB or private memory
|
||||
t_vcharp source, // source buffer, MPB
|
||||
int num_bytes, // number of bytes to copy (must be multiple of cache line size
|
||||
int ID // rank of source UE
|
||||
) {
|
||||
|
||||
// printf("UE %d at top of RCCE_get\n", RCCE_IAM); fflush(NULL);
|
||||
|
||||
#ifdef GORY
|
||||
// we only need to do tests in GORY mode; in non-GORY mode ths function is never
|
||||
// called by the user, but only be the library
|
||||
int copy_mode;
|
||||
|
||||
// check validity of parameters
|
||||
if (!target) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_TARGET));
|
||||
if (!source) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_SOURCE));
|
||||
|
||||
if (ID<0 || ID>=RCCE_NP) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
|
||||
|
||||
if (num_bytes <0 || num_bytes%RCCE_LINE_SIZE!=0)
|
||||
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_MESSAGE_LENGTH));
|
||||
|
||||
// determine if source data is in MPB; check using local buffer boundaries
|
||||
if (source - RCCE_comm_buffer[RCCE_IAM] >=0 &&
|
||||
source+num_bytes - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<=0)
|
||||
// shift source address to point to remote MPB
|
||||
source = RCCE_comm_buffer[ID]+(source-RCCE_comm_buffer[RCCE_IAM]);
|
||||
else return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_SOURCE));
|
||||
|
||||
// target can be either local MPB or private memory
|
||||
if (target -RCCE_comm_buffer[RCCE_IAM] >= 0 &&
|
||||
target+num_bytes - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<=0)
|
||||
copy_mode = BOTH_IN_COMM_BUFFER;
|
||||
else
|
||||
copy_mode = TARGET_IN_PRIVATE_MEMORY;
|
||||
|
||||
// make sure that if the copy is between locations within the same MPB
|
||||
// there is no overlap between source and target address ranges
|
||||
if ( copy_mode == BOTH_IN_COMM_BUFFER) {
|
||||
if (((source-target)>0 && (source+num_bytes-target)<0) ||
|
||||
((target-source)>0 && (target+num_bytes-source)<0)) {
|
||||
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_DATA_OVERLAP));
|
||||
}
|
||||
}
|
||||
|
||||
// ascertain that the start of the buffer is cache line aligned
|
||||
int start_index = source-RCCE_comm_buffer[ID];
|
||||
if (start_index%RCCE_LINE_SIZE!=0)
|
||||
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ALIGNMENT));
|
||||
|
||||
// only verify alignment of the target if it is in the MPB
|
||||
if (copy_mode == BOTH_IN_COMM_BUFFER) {
|
||||
start_index = target-RCCE_comm_buffer[ID];
|
||||
if (start_index%RCCE_LINE_SIZE!=0)
|
||||
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ALIGNMENT));
|
||||
}
|
||||
#else
|
||||
// in non-GORY mode we only need to retain the MPB source shift; we
|
||||
// already know the source is in the MPB, not private memory
|
||||
source = RCCE_comm_buffer[ID]+(source-RCCE_comm_buffer[RCCE_IAM]);
|
||||
#endif
|
||||
|
||||
// printf("UE %d; target = %x, source = %x, nbytes= %d\n", RCCE_IAM, target, source, num_bytes);
|
||||
fflush(NULL);
|
||||
|
||||
// do the actual copy, making sure we copy fresh data
|
||||
#ifdef _OPENMP
|
||||
#pragma omp flush
|
||||
#endif
|
||||
RC_cache_invalidate();
|
||||
|
||||
RCCE_memcpy_get((void *)target, (void *)source, num_bytes);
|
||||
|
||||
if (RCCE_debug_synch)
|
||||
fprintf(STDERR,"UE %d get data: %d from address %X \n", RCCE_IAM,*target,source);
|
||||
|
||||
// printf("UE %d finished the memcopy\n", RCCE_IAM);
|
||||
|
||||
// flush data to make sure it is visible to all threads; cannot use a flush list
|
||||
// because it concerns malloced space
|
||||
#ifdef _OPENMP
|
||||
#pragma omp flush
|
||||
#endif
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
#ifdef USE_FLAG_EXPERIMENTAL
|
||||
int RCCE_get_flag(
|
||||
t_vcharp target, // target buffer, private memory
|
||||
t_vcharp source, // source buffer, MPB ncm mapped
|
||||
int num_bytes, // number of bytes to copy (must be multiple of cache line size
|
||||
int ID // rank of source UE
|
||||
) {
|
||||
|
||||
source = RCCE_flag_buffer[ID]+(source-RCCE_comm_buffer[RCCE_IAM]);
|
||||
|
||||
//memcpy((void*)target, (void*)source, num_bytes);
|
||||
|
||||
*target = *source;
|
||||
|
||||
if (RCCE_debug_synch)
|
||||
fprintf(STDERR,"UE %d get flag: %x from address %X \n", RCCE_IAM,*target,source);
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
#endif
|
382
hermit/usr/ircce/RCCE_lib.h
Normal file
382
hermit/usr/ircce/RCCE_lib.h
Normal file
|
@ -0,0 +1,382 @@
|
|||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef RCCE_LIB_H
|
||||
#define RCCE_LIB_H
|
||||
#include "RCCE.h"
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
#include <string.h>
|
||||
|
||||
//#define AIR
|
||||
|
||||
#undef USE_FLAG_EXPERIMENTAL
|
||||
#undef USE_RCCE_COMM
|
||||
#undef USE_FAT_BARRIER
|
||||
#undef USE_PIPELINE_FLAGS
|
||||
#undef USE_PROBE_FLAGS
|
||||
#undef USE_TAGGED_FLAGS
|
||||
#undef USE_TAGGED_FOR_SHORT
|
||||
#undef USE_REVERTED_FLAGS
|
||||
#undef USE_REMOTE_PUT_LOCAL_GET
|
||||
#undef USE_PROBE_FLAGS_SHORTCUT
|
||||
#define USE_SYNCH_FOR_ZERO_BYTE
|
||||
|
||||
// override certain settings for SCC-MPICH:
|
||||
//#include "scc-mpich-defs.h"
|
||||
|
||||
// adjust settings automatically?
|
||||
#undef AUTO_ADJUST_SETTINGS
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef AUTO_ADJUST_SETTINGS
|
||||
|
||||
#ifdef SINGLEBITFLAGS
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
#warning TAGGED FLAGS CANNOT BE USED WITH SINGLEBITFLAGS! (#undef USE_TAGGED_FLAGS)
|
||||
#undef USE_TAGGED_FLAGS
|
||||
#undef USE_TAGGED_FOR_SHORT
|
||||
#undef USE_PROBE_FLAGS_SHORTCUT
|
||||
#endif
|
||||
#ifdef USE_FAT_BARRIER
|
||||
#warning FAT BARRIER CANNOT BE USED WITH SINGLEBITFLAGS! (#undef USE_FAT_BARRIER)
|
||||
#undef USE_FAT_BARRIER
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef USE_PROBE_FLAGS_SHORTCUT
|
||||
#ifndef USE_PROBE_FLAGS
|
||||
#warning THE PROBE FLAGS SHORTCUT REQUIRES PROBE FLAGS! (#define USE_PROBE_FLAGS)
|
||||
#define USE_PROBE_FLAGS
|
||||
#endif
|
||||
#ifndef USE_TAGGED_FOR_SHORT
|
||||
#warning THE PROBE FLAGS SHORTCUT REQUIRES TAGGED FLAGS! (#define USE_TAGGED_FLAGS)
|
||||
#define USE_TAGGED_FLAGS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef USE_TAGGED_FOR_SHORT
|
||||
#ifndef USE_TAGGED_FLAGS
|
||||
#warning TAGGED SHORT MESSAGES REQUIRE TAGGED FLAGS! (#define USE_TAGGED_FLAGS)
|
||||
#define USE_TAGGED_FLAGS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef USE_REMOTE_PUT_LOCAL_GET
|
||||
#ifndef USE_PROBE_FLAGS
|
||||
#warning PROBING FOR MESSAGES IN REMOTE-PUT/LOCAL-GET NEEDS ADDITIONAL PROBE FLAGS! (#define USE_PROBE_FLAGS)
|
||||
#define USE_PROBE_FLAGS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef SCC_COUPLED_SYSTEMS
|
||||
#ifndef USE_REVERTED_FLAGS
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
#warning COUPLED SYSTEMS REQUIRE REVERTED FLAGS WHEN USING TAGGED FLAGS! (#define USE_REVERTED_FLAGS)
|
||||
#define USE_REVERTED_FLAGS
|
||||
#endif
|
||||
#endif
|
||||
#ifndef USE_REMOTE_PUT_LOCAL_GET
|
||||
#warning COUPLED SYSTEMS SHOULD USE REMOTE-PUT/LOCAL-GET! (#define USE_REMOTE_PUT_LOCAL_GET)
|
||||
#define USE_REMOTE_PUT_LOCAL_GET
|
||||
#endif
|
||||
#else
|
||||
#ifdef USE_PROBE_FLAGS
|
||||
#warning NON-COUPLED SYSTEMS SHOULD NOT USE ADDITIONAL PROBE FLAGS! (#undef USE_PROBE_FLAGS)
|
||||
#undef USE_PROBE_FLAGS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef USE_PROBE_FLAGS
|
||||
#ifdef USE_FAT_BARRIER
|
||||
#warning PROBABLY TOO LITTLE MPB SPACE FOR USING FAT BARRIER WITH PROBE FLAGS ENABLED! (#undef USE_FAT_BARRIER)
|
||||
#undef USE_FAT_BARRIER
|
||||
#endif
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#else // !AUTO_ADJUST_SETTINGS
|
||||
|
||||
#ifdef SINGLEBITFLAGS
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
#error TAGGED FLAGS CANNOT BE USED WITH SINGLEBITFLAGS! (#undef USE_TAGGED_FLAGS)
|
||||
#endif
|
||||
#undef USE_TAGGED_FLAGS
|
||||
#undef USE_TAGGED_FOR_SHORT
|
||||
#undef USE_PROBE_FLAGS_SHORTCUT
|
||||
#ifdef USE_FAT_BARRIER
|
||||
#error FAT BARRIER CANNOT BE USED WITH SINGLEBITFLAGS! (#undef USE_FAT_BARRIER)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef USE_PROBE_FLAGS_SHORTCUT
|
||||
#ifndef USE_PROBE_FLAGS
|
||||
#error THE PROBE FLAGS SHORTCUT REQUIRES PROBE FLAGS! (#define USE_PROBE_FLAGS)
|
||||
#endif
|
||||
#ifndef USE_TAGGED_FOR_SHORT
|
||||
#error THE PROBE FLAGS SHORTCUT REQUIRES TAGGED FLAGS! (#define USE_TAGGED_FLAGS)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef USE_TAGGED_FOR_SHORT
|
||||
#ifndef USE_TAGGED_FLAGS
|
||||
#error TAGGED SHORT MESSAGES REQUIRE TAGGED FLAGS! (#define USE_TAGGED_FLAGS)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef USE_REMOTE_PUT_LOCAL_GET
|
||||
#ifndef USE_PROBE_FLAGS
|
||||
#warning PROBING FOR MESSAGES IN REMOTE-PUT/LOCAL-GET NEEDS ADDITIONAL PROBE FLAGS! (#define USE_PROBE_FLAGS)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef SCC_COUPLED_SYSTEMS
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
#ifndef USE_REVERTED_FLAGS
|
||||
#error COUPLED SYSTEMS REQUIRE REVERTED FLAGS WHEN USING TAGGED FLAGS! (#define USE_REVERTED_FLAGS)
|
||||
#endif
|
||||
#endif
|
||||
#ifndef USE_REMOTE_PUT_LOCAL_GET
|
||||
#warning COUPLED SYSTEMS SHOULD USE REMOTE-PUT/LOCAL-GET! (#define USE_REMOTE_PUT_LOCAL_GET)
|
||||
#endif
|
||||
#else
|
||||
#ifdef USE_PROBE_FLAGS
|
||||
#warning NON-COUPLED SYSTEMS SHOULD NOT USE ADDITIONAL PROBE FLAGS! (#undef USE_PROBE_FLAGS)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef USE_PROBE_FLAGS
|
||||
#ifdef USE_FAT_BARRIER
|
||||
#warning PROBABLY TOO LITTLE MPB SPACE FOR USING FAT BARRIER WITH PROBE FLAGS ENABLED! (#undef USE_FAT_BARRIER)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#endif // !AUTO_ADJUST_SETTINGS
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
/* PAD32byte is used to compute a cacheline padded length of n (input) bytes */
|
||||
#define PAD32byte(n) ((n)%32==0 ? (n) : (n) + 32 - (n)%32)
|
||||
|
||||
//#define BITSPERCHAR 8
|
||||
|
||||
#define BOTH_IN_COMM_BUFFER 12
|
||||
#define SOURCE_IN_PRIVATE_MEMORY 34
|
||||
#define TARGET_IN_PRIVATE_MEMORY 56
|
||||
|
||||
#ifdef SINGLEBITFLAGS
|
||||
#define RCCE_FLAGS_PER_BYTE 8
|
||||
#else
|
||||
#define RCCE_FLAGS_PER_BYTE 1
|
||||
#endif
|
||||
#define RCCE_FLAGS_PER_LINE (RCCE_LINE_SIZE*RCCE_FLAGS_PER_BYTE)
|
||||
|
||||
#define RCCE_SUM_INT (RCCE_SUM+(RCCE_NUM_OPS)*(RCCE_INT))
|
||||
#define RCCE_SUM_LONG (RCCE_SUM+(RCCE_NUM_OPS)*(RCCE_LONG))
|
||||
#define RCCE_SUM_FLOAT (RCCE_SUM+(RCCE_NUM_OPS)*(RCCE_FLOAT))
|
||||
#define RCCE_SUM_DOUBLE (RCCE_SUM+(RCCE_NUM_OPS)*(RCCE_DOUBLE))
|
||||
#define RCCE_MAX_INT (RCCE_MAX+(RCCE_NUM_OPS)*(RCCE_INT))
|
||||
#define RCCE_MAX_LONG (RCCE_MAX+(RCCE_NUM_OPS)*(RCCE_LONG))
|
||||
#define RCCE_MAX_FLOAT (RCCE_MAX+(RCCE_NUM_OPS)*(RCCE_FLOAT))
|
||||
#define RCCE_MAX_DOUBLE (RCCE_MAX+(RCCE_NUM_OPS)*(RCCE_DOUBLE))
|
||||
#define RCCE_MIN_INT (RCCE_MIN+(RCCE_NUM_OPS)*(RCCE_INT))
|
||||
#define RCCE_MIN_LONG (RCCE_MIN+(RCCE_NUM_OPS)*(RCCE_LONG))
|
||||
#define RCCE_MIN_FLOAT (RCCE_MIN+(RCCE_NUM_OPS)*(RCCE_FLOAT))
|
||||
#define RCCE_MIN_DOUBLE (RCCE_MIN+(RCCE_NUM_OPS)*(RCCE_DOUBLE))
|
||||
#define RCCE_PROD_INT (RCCE_PROD+(RCCE_NUM_OPS)*(RCCE_INT))
|
||||
#define RCCE_PROD_LONG (RCCE_PROD+(RCCE_NUM_OPS)*(RCCE_LONG))
|
||||
#define RCCE_PROD_FLOAT (RCCE_PROD+(RCCE_NUM_OPS)*(RCCE_FLOAT))
|
||||
#define RCCE_PROD_DOUBLE (RCCE_PROD+(RCCE_NUM_OPS)*(RCCE_DOUBLE))
|
||||
|
||||
#define RCCE_COMM_INITIALIZED 45328976
|
||||
#define RCCE_COMM_NOT_INITIALIZED -45328976
|
||||
|
||||
// auxiliary MPB pointer type
|
||||
typedef volatile unsigned int* t_vintp;
|
||||
// Also need dereferenced types
|
||||
typedef volatile unsigned char t_vchar;
|
||||
typedef volatile unsigned int t_vint;
|
||||
|
||||
typedef struct rcce_block {
|
||||
t_vcharp space; // pointer to space for data in block
|
||||
size_t free_size; // actual free space in block (0 or whole block)
|
||||
size_t size; // size of an allocated block
|
||||
struct rcce_block *next; // pointer to next block in circular linked list
|
||||
} RCCE_BLOCK;
|
||||
|
||||
#if defined(SINGLEBITFLAGS) || defined(USE_BYTE_FLAGS)
|
||||
typedef struct rcce_flag_line {
|
||||
char flag[RCCE_FLAGS_PER_LINE];
|
||||
t_vcharp line_address;
|
||||
int members;
|
||||
struct rcce_flag_line *next;
|
||||
} RCCE_FLAG_LINE;
|
||||
#endif
|
||||
|
||||
|
||||
typedef struct {
|
||||
RCCE_BLOCK *tail; // "last" block in linked list of blocks
|
||||
} RCCE_BLOCK_S;
|
||||
|
||||
#ifdef AIR
|
||||
#define FPGA_BASE 0xf9000000
|
||||
#define BACKOFF_MIN 8
|
||||
#define BACKOFF_MAX 256
|
||||
typedef volatile struct _RCCE_AIR {
|
||||
int * counter;
|
||||
int * init;
|
||||
} RCCE_AIR;
|
||||
#endif
|
||||
|
||||
#ifndef GORY
|
||||
extern RCCE_FLAG RCCE_sent_flag[RCCE_MAXNP];
|
||||
extern RCCE_FLAG RCCE_ready_flag[RCCE_MAXNP];
|
||||
#ifdef USE_PIPELINE_FLAGS
|
||||
extern RCCE_FLAG RCCE_sent_flag_pipe[RCCE_MAXNP];
|
||||
extern RCCE_FLAG RCCE_ready_flag_pipe[RCCE_MAXNP];
|
||||
#endif
|
||||
#ifdef USE_PROBE_FLAGS
|
||||
extern RCCE_FLAG RCCE_probe_flag[RCCE_MAXNP];
|
||||
#endif
|
||||
extern t_vcharp RCCE_buff_ptr;
|
||||
extern size_t RCCE_chunk;
|
||||
extern t_vcharp RCCE_flags_start;
|
||||
#ifndef USE_REMOTE_PUT_LOCAL_GET
|
||||
extern RCCE_SEND_REQUEST* RCCE_send_queue;
|
||||
extern RCCE_RECV_REQUEST* RCCE_recv_queue[RCCE_MAXNP];
|
||||
#else
|
||||
extern RCCE_SEND_REQUEST* RCCE_send_queue[RCCE_MAXNP];
|
||||
extern RCCE_RECV_REQUEST* RCCE_recv_queue;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//#ifdef USE_FLAG_EXPERIMENTAL
|
||||
extern t_vcharp RCCE_flag_buffer[RCCE_MAXNP];
|
||||
//#endif
|
||||
|
||||
#ifndef __hermit__
|
||||
extern t_vcharp RCCE_fool_write_combine_buffer;
|
||||
#endif
|
||||
extern t_vcharp RCCE_comm_buffer[RCCE_MAXNP];
|
||||
extern int RCCE_NP;
|
||||
extern int RCCE_BUFF_SIZE;
|
||||
#ifndef COPPERRIDGE
|
||||
extern omp_lock_t RCCE_corelock[RCCE_MAXNP];
|
||||
extern t_vchar RC_comm_buffer[RCCE_MAXNP*RCCE_BUFF_SIZE_MAX];
|
||||
extern t_vchar RC_shm_buffer[RCCE_SHM_SIZE_MAX];
|
||||
#endif
|
||||
extern int RC_MY_COREID;
|
||||
extern int RC_COREID[RCCE_MAXNP];
|
||||
extern double RC_REFCLOCKGHZ;
|
||||
extern int RCCE_IAM;
|
||||
extern int RCCE_debug_synch;
|
||||
extern int RCCE_debug_comm;
|
||||
extern int RCCE_debug_debug;
|
||||
extern int RCCE_debug_RPC;
|
||||
#ifdef SINGLEBITFLAGS
|
||||
extern RCCE_FLAG_LINE RCCE_flags;
|
||||
extern int WORDSIZE;
|
||||
extern int LEFTMOSTBIT;
|
||||
RCCE_FLAG_STATUS RCCE_bit_value(t_vcharp, int);
|
||||
RCCE_FLAG_STATUS RCCE_flip_bit_value(t_vcharp, int);
|
||||
int RCCE_write_bit_value(t_vcharp, int, RCCE_FLAG_STATUS);
|
||||
#endif
|
||||
|
||||
extern int RCCE_comm_init_val;
|
||||
|
||||
void RCCE_malloc_init(t_vcharp, size_t);
|
||||
void RCCE_shmalloc_init(t_vcharp, size_t);
|
||||
int RCCE_qsort(char *, size_t, size_t, int (*)(const void*, const void*));
|
||||
int id_compare(const void *, const void *);
|
||||
#if 0
|
||||
int RCCE_probe(RCCE_FLAG);
|
||||
#endif
|
||||
int RCCE_error_return(int, int);
|
||||
#ifdef __hermit__
|
||||
#define RC_cache_invalidate() {}
|
||||
#else
|
||||
void RC_cache_invalidate(void);
|
||||
#endif
|
||||
int RCCE_acquire_treelock(RCCE_COMM*);
|
||||
int RCCE_release_treelock(RCCE_COMM*);
|
||||
int RCCE_TNS_barrier(RCCE_COMM*);
|
||||
int RCCE_acquire_lock(int);
|
||||
int RCCE_try_lock(int);
|
||||
int RCCE_backoff_lock(int);
|
||||
int RCCE_release_lock(int);
|
||||
int RCCE_global_color(int, void *);
|
||||
t_vcharp RC_COMM_BUFFER_START(int);
|
||||
//#ifdef USE_FLAG_EXPERIMENTAL
|
||||
t_vcharp RC_FLAG_BUFFER_START(int);
|
||||
//#endif
|
||||
|
||||
#ifndef GORY
|
||||
t_vcharp RCCE_malloc(size_t);
|
||||
t_vcharp RCCE_malloc_request(size_t, size_t *);
|
||||
t_vcharp RCCE_palloc(size_t, int);
|
||||
void RCCE_free(t_vcharp);
|
||||
int RCCE_put(t_vcharp, t_vcharp, int, int);
|
||||
int RCCE_get(t_vcharp, t_vcharp, int, int);
|
||||
int RCCE_wait_until(RCCE_FLAG, RCCE_FLAG_STATUS);
|
||||
int RCCE_test_flag(RCCE_FLAG, RCCE_FLAG_STATUS, int *);
|
||||
int RCCE_flag_alloc(RCCE_FLAG *);
|
||||
int RCCE_flag_free(RCCE_FLAG *);
|
||||
int RCCE_flag_write(RCCE_FLAG *, RCCE_FLAG_STATUS, int);
|
||||
int RCCE_flag_read(RCCE_FLAG, RCCE_FLAG_STATUS *, int);
|
||||
#ifdef USE_FLAG_EXPERIMENTAL
|
||||
int RCCE_put_flag(t_vcharp, t_vcharp, int, int);
|
||||
int RCCE_get_flag(t_vcharp, t_vcharp, int, int);
|
||||
#endif
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
int RCCE_flag_write_tagged(RCCE_FLAG *, RCCE_FLAG_STATUS, int, void*, int);
|
||||
int RCCE_flag_read_tagged(RCCE_FLAG, RCCE_FLAG_STATUS *, int, void*, int);
|
||||
int RCCE_wait_tagged(RCCE_FLAG, RCCE_FLAG_STATUS, void *, int);
|
||||
int RCCE_test_tagged(RCCE_FLAG, RCCE_FLAG_STATUS, int *, void *, int);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp threadprivate (RC_COREID, RC_MY_COREID, RC_REFCLOCKGHZ)
|
||||
#pragma omp threadprivate (RCCE_comm_buffer)
|
||||
#pragma omp threadprivate (RCCE_BUFF_SIZE)
|
||||
#pragma omp threadprivate (RCCE_IAM, RCCE_NP)
|
||||
#pragma omp threadprivate (RCCE_debug_synch, RCCE_debug_comm, RCCE_debug_debug)
|
||||
#ifdef SINGLEBITFLAGS
|
||||
#pragma omp threadprivate (RCCE_flags, WORDSIZE, LEFTMOSTBIT)
|
||||
#endif
|
||||
#ifndef GORY
|
||||
#pragma omp threadprivate (RCCE_send_queue, RCCE_recv_queue)
|
||||
#pragma omp threadprivate (RCCE_sent_flag, RCCE_ready_flag)
|
||||
#ifdef USE_PROBE_FLAGS
|
||||
#pragma omp threadprivate (RCCE_probe_flag)
|
||||
#endif
|
||||
#ifdef USE_PIPELINE_FLAGS
|
||||
#pragma omp threadprivate (RCCE_sent_flag_pipe, RCCE_ready_flag_pipe)
|
||||
#endif
|
||||
#pragma omp threadprivate (RCCE_buff_ptr, RCCE_chunk)
|
||||
#pragma omp threadprivate (RCCE_flags_start)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef SHMADD
|
||||
unsigned int getCOREID();
|
||||
unsigned int readTILEID();
|
||||
unsigned int readLUT(unsigned int);
|
||||
void writeLUT(unsigned int, unsigned int);
|
||||
#endif
|
||||
|
||||
#endif
|
255
hermit/usr/ircce/RCCE_malloc.c
Normal file
255
hermit/usr/ircce/RCCE_malloc.c
Normal file
|
@ -0,0 +1,255 @@
|
|||
//***************************************************************************************
|
||||
// MPB memory allocation routines.
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Author: Rob F. Van der Wijngaart
|
||||
// Intel Corporation
|
||||
// Date: 008/30/2010
|
||||
//
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "RCCE_lib.h"
|
||||
|
||||
//......................................................................................
|
||||
// GLOBAL VARIABLES USED BY THE LIBRARY
|
||||
//......................................................................................
|
||||
static RCCE_BLOCK_S RCCE_space; // data structure used for trscking MPB memory blocks
|
||||
static RCCE_BLOCK_S *RCCE_spacep; // pointer to RCCE_space
|
||||
#ifdef _OPENMP
|
||||
#pragma omp threadprivate (RCCE_space, RCCE_spacep)
|
||||
#endif
|
||||
|
||||
// END GLOBAL VARIABLES USED BY THE LIBRARY
|
||||
//......................................................................................
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_malloc_init
|
||||
//--------------------------------------------------------------------------------------
|
||||
// initialize memory allocator
|
||||
//--------------------------------------------------------------------------------------
|
||||
void RCCE_malloc_init(
|
||||
t_vcharp mem, // pointer to MPB space that is to be managed by allocator
|
||||
size_t size // size (bytes) of managed space
|
||||
) {
|
||||
|
||||
#ifndef GORY
|
||||
|
||||
// in the simplified API MPB memory allocation merely uses running pointers
|
||||
RCCE_flags_start = mem;
|
||||
RCCE_chunk = size;
|
||||
RCCE_buff_ptr = mem;
|
||||
|
||||
#else
|
||||
|
||||
// create one block containing all memory for truly dynamic memory allocator
|
||||
RCCE_spacep = &RCCE_space;
|
||||
RCCE_spacep->tail = (RCCE_BLOCK *) malloc(sizeof(RCCE_BLOCK));
|
||||
RCCE_spacep->tail->free_size = size;
|
||||
RCCE_spacep->tail->space = mem;
|
||||
/* make a circular list by connecting tail to itself */
|
||||
RCCE_spacep->tail->next = RCCE_spacep->tail;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_malloc
|
||||
//--------------------------------------------------------------------------------------
|
||||
// Allocate memory inside MPB. In restricted mode we only use it to allocate new
|
||||
// flags prompted by the creation of new communicators. Since communicators are never
|
||||
// deleted, we do not need to deallocate MPB memory, so we can simply keep running
|
||||
// pointers of where the next flag will be stored, and where payload data can go. In
|
||||
// GORY mode we need to support fully dynamic memory allocation and deallocation.
|
||||
//--------------------------------------------------------------------------------------
|
||||
t_vcharp RCCE_malloc(
|
||||
size_t size // requested space
|
||||
) {
|
||||
|
||||
t_vcharp result;
|
||||
|
||||
#ifndef GORY
|
||||
|
||||
// new flag takes exactly one cache line, whether using single bit flags are not
|
||||
if (size != RCCE_LINE_SIZE) {
|
||||
fprintf(stderr, "ERROR in RCCE_malloc(): size != RCCE_LINE_SIZE!\n");
|
||||
exit(-1);
|
||||
return(0);
|
||||
}
|
||||
|
||||
// if chunk size becomes zero, we have allocated too many flags
|
||||
if (!(RCCE_chunk-RCCE_LINE_SIZE)) {
|
||||
fprintf(stderr, "ERROR in RCCE_malloc(): No more MPB space left!\n");
|
||||
exit(-1);
|
||||
return(0);
|
||||
}
|
||||
|
||||
result = RCCE_flags_start;
|
||||
|
||||
// reduce maximum size of message payload chunk
|
||||
RCCE_chunk -= RCCE_LINE_SIZE;
|
||||
|
||||
// move running pointer to next available flags line
|
||||
RCCE_flags_start += RCCE_LINE_SIZE;
|
||||
|
||||
// move running pointer to new start of payload data area
|
||||
RCCE_buff_ptr += RCCE_LINE_SIZE;
|
||||
return(result);
|
||||
|
||||
#else
|
||||
|
||||
// simple memory allocator, loosely based on public domain code developed by
|
||||
// Michael B. Allen and published on "The Scripts--IT /Developers Network".
|
||||
// Approach:
|
||||
// - maintain linked list of pointers to memory. A block is either completely
|
||||
// malloced (free_size = 0), or completely free (free_size > 0).
|
||||
// The space field always points to the beginning of the block
|
||||
// - malloc: traverse linked list for first block that has enough space
|
||||
// - free: Check if pointer exists. If yes, check if the new block should be
|
||||
// merged with neighbors. Could be one or two neighbors.
|
||||
|
||||
RCCE_BLOCK *b1, *b2, *b3; // running pointers for blocks
|
||||
|
||||
if (size==0 || size%RCCE_LINE_SIZE!=0) return 0;
|
||||
|
||||
// always first check if the tail block has enough space, because that
|
||||
// is the most likely. If it does and it is exactly enough, we still
|
||||
// create a new block that will be the new tail, whose free space is
|
||||
// zero. This acts as a marker of where free space of predecessor ends
|
||||
b1 = RCCE_spacep->tail;
|
||||
if (b1->free_size >= size) {
|
||||
// need to insert new block; new order is: b1->b2 (= new tail)
|
||||
b2 = (RCCE_BLOCK *) malloc(sizeof(RCCE_BLOCK));
|
||||
b2->next = b1->next;
|
||||
b1->next = b2;
|
||||
b2->free_size = b1->free_size-size;
|
||||
b2->space = b1->space + size;
|
||||
b1->free_size = 0;
|
||||
// need to update the tail
|
||||
RCCE_spacep->tail = b2;
|
||||
return(b1->space);
|
||||
}
|
||||
|
||||
// tail didn't have enough space; loop over whole list from beginning
|
||||
while (b1->next->free_size < size) {
|
||||
if (b1->next == RCCE_spacep->tail) {
|
||||
return NULL; // we came full circle
|
||||
}
|
||||
b1 = b1->next;
|
||||
}
|
||||
|
||||
b2 = b1->next;
|
||||
if (b2->free_size > size) { // split block; new block order: b1->b2->b3
|
||||
b3 = (RCCE_BLOCK *) malloc(sizeof(RCCE_BLOCK));
|
||||
b3->next = b2->next; // reconnect pointers to add block b3
|
||||
b2->next = b3; // " " " " " "
|
||||
b3->free_size = b2->free_size - size; // b3 gets remainder free space
|
||||
b3->space = b2->space + size; // need to shift space pointer
|
||||
}
|
||||
b2->free_size = 0; // block b2 is completely used
|
||||
return (b2->space);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
t_vcharp RCCE_palloc(
|
||||
size_t size, // requested space
|
||||
int CoreID // location
|
||||
) {
|
||||
|
||||
t_vcharp result = RCCE_malloc(size);
|
||||
|
||||
if (result)
|
||||
result = RCCE_comm_buffer[CoreID]+(result-RCCE_comm_buffer[RCCE_IAM]);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_free
|
||||
//--------------------------------------------------------------------------------------
|
||||
// Deallocate memory in MPB; only used in GORY mode
|
||||
//--------------------------------------------------------------------------------------
|
||||
void RCCE_free(
|
||||
t_vcharp ptr // pointer to data to be freed
|
||||
) {
|
||||
|
||||
RCCE_BLOCK *b1, *b2, *b3; // running block pointers
|
||||
int j1, j2; // booleans determining merging of blocks
|
||||
|
||||
// loop over whole list from the beginning until we locate space ptr
|
||||
b1 = RCCE_spacep->tail;
|
||||
while (b1->next->space != ptr && b1->next != RCCE_spacep->tail) {
|
||||
b1 = b1->next;
|
||||
}
|
||||
|
||||
// b2 is target block whose space must be freed
|
||||
b2 = b1->next;
|
||||
// tail either has zero free space, or hasn't been malloc'ed
|
||||
if (b2 == RCCE_spacep->tail) return;
|
||||
|
||||
// reset free space for target block (entire block)
|
||||
b3 = b2->next;
|
||||
b2->free_size = b3->space - b2->space;
|
||||
|
||||
// determine with what non-empty blocks the target block can be merged
|
||||
j1 = (b1->free_size>0 && b1!=RCCE_spacep->tail); // predecessor block
|
||||
j2 = (b3->free_size>0 || b3==RCCE_spacep->tail); // successor block
|
||||
|
||||
if (j1) {
|
||||
if (j2) { // splice all three blocks together: (b1,b2,b3) into b1
|
||||
b1->next = b3->next;
|
||||
b1->free_size += b3->free_size + b2->free_size;
|
||||
if (b3==RCCE_spacep->tail) RCCE_spacep->tail = b1;
|
||||
free(b3);
|
||||
}
|
||||
else { // only merge (b1,b2) into b1
|
||||
b1->free_size += b2->free_size;
|
||||
b1->next = b3;
|
||||
}
|
||||
free(b2);
|
||||
}
|
||||
else {
|
||||
if (j2) { // only merge (b2,b3) into b2
|
||||
b2->next = b3->next;
|
||||
b2->free_size += b3->free_size;
|
||||
if (b3==RCCE_spacep->tail) RCCE_spacep->tail = b2;
|
||||
free(b3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_malloc_request
|
||||
//--------------------------------------------------------------------------------------
|
||||
// this function tries to return a (padded) amount of space in the MPB of size
|
||||
// "size" bytes. If not available, the function keeps halving space until it fits
|
||||
//--------------------------------------------------------------------------------------
|
||||
t_vcharp RCCE_malloc_request(
|
||||
size_t size, // requested number of bytes
|
||||
size_t *chunk // number of bytes of space returned
|
||||
) {
|
||||
|
||||
t_vcharp combuf;
|
||||
|
||||
combuf = 0;
|
||||
*chunk = PAD32byte(size);
|
||||
while (!combuf && *chunk >= RCCE_LINE_SIZE) {
|
||||
combuf = RCCE_malloc(*chunk);
|
||||
if (!combuf) *chunk = PAD32byte(*chunk/2);
|
||||
}
|
||||
return (combuf);
|
||||
}
|
165
hermit/usr/ircce/RCCE_put.c
Normal file
165
hermit/usr/ircce/RCCE_put.c
Normal file
|
@ -0,0 +1,165 @@
|
|||
//***************************************************************************************
|
||||
// Put data into communication buffer.
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Author: Rob F. Van der Wijngaart
|
||||
// Intel Corporation
|
||||
// Date: 008/30/2010
|
||||
//
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// [2010-11-03] switched to SCC-optimized memcpy() functions in scc_memcpy.h:
|
||||
// - memcpy_to_mpb()
|
||||
// - memcpy_from_mpb()
|
||||
// by Stefan Lankes, Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
#include "RCCE_lib.h"
|
||||
|
||||
#if defined(COPPERRIDGE) && !defined(__hermit__)
|
||||
#include "scc_memcpy.h"
|
||||
#endif
|
||||
|
||||
void *RCCE_memcpy_put(void *dest, const void *src, size_t count)
|
||||
{ // function wrapper for external usage of improved memcpy()...
|
||||
#if defined(COPPERRIDGE) && !defined(__hermit__)
|
||||
return memcpy_to_mpb(dest, src, count);
|
||||
#else
|
||||
return memcpy(dest, src, count);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(COPPERRIDGE) && !defined(__hermit__)
|
||||
#define RCCE_memcpy_put(a,b,c) memcpy_to_mpb(a, b, c)
|
||||
#else
|
||||
#define RCCE_memcpy_put(a,b,c) memcpy(a, b, c)
|
||||
#endif
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_put
|
||||
//--------------------------------------------------------------------------------------
|
||||
// copy data from address "source" in the local MPB or the calling UE's private memory
|
||||
// to address "target" in the remote MPB. We do not test to see if a move from the
|
||||
// calling UE's private memory stays within allocated memory
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_put(
|
||||
t_vcharp target, // target buffer, MPB
|
||||
t_vcharp source, // source buffer, MPB or private memory
|
||||
int num_bytes,
|
||||
int ID
|
||||
) {
|
||||
|
||||
#ifdef GORY
|
||||
// we only need to do tests in GORY mode; in non-GORY mode ths function is never
|
||||
// called by the user, but only be the library
|
||||
int copy_mode;
|
||||
|
||||
// check validity of parameters
|
||||
if (!target) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_TARGET));
|
||||
if (!source) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_SOURCE));
|
||||
if (ID<0 ||
|
||||
ID>=RCCE_NP) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
|
||||
if (num_bytes < 0 || num_bytes%RCCE_LINE_SIZE!=0)
|
||||
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_MESSAGE_LENGTH));
|
||||
// determine if target data is in MPB; check using local buffer boundaries
|
||||
if (target - RCCE_comm_buffer[RCCE_IAM]>=0 &&
|
||||
target+num_bytes - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<=0)
|
||||
// shift target address to point to remote MPB
|
||||
target = RCCE_comm_buffer[ID]+(target-RCCE_comm_buffer[RCCE_IAM]);
|
||||
else return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_TARGET));
|
||||
|
||||
// source can be either local MPB or private memory
|
||||
if (source - RCCE_comm_buffer[RCCE_IAM] >= 0 &&
|
||||
source+num_bytes - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<=0)
|
||||
copy_mode = BOTH_IN_COMM_BUFFER;
|
||||
else
|
||||
copy_mode = SOURCE_IN_PRIVATE_MEMORY;
|
||||
|
||||
// make sure that if the copy is between locations within the same MPB
|
||||
// there is no overlap between source and target address ranges
|
||||
if ( copy_mode == BOTH_IN_COMM_BUFFER) {
|
||||
if (((source-target)>0 && (source+num_bytes-target)<0) ||
|
||||
((target-source)>0 && (target+num_bytes-source)<0)) {
|
||||
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_DATA_OVERLAP));
|
||||
}
|
||||
}
|
||||
|
||||
// ascertain that the start of the buffer is cache line aligned
|
||||
int start_index = target-RCCE_comm_buffer[ID];
|
||||
if (start_index%RCCE_LINE_SIZE!=0)
|
||||
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ALIGNMENT));
|
||||
|
||||
// only verify alignment of the target if it is in the MPB
|
||||
if (copy_mode == BOTH_IN_COMM_BUFFER) {
|
||||
start_index = source-RCCE_comm_buffer[ID];
|
||||
if (start_index%RCCE_LINE_SIZE!=0)
|
||||
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ALIGNMENT));
|
||||
}
|
||||
#else
|
||||
// in non-GORY mode we only need to retain the MPB target shift; we
|
||||
// already know the target is in the MPB, not private memory
|
||||
target = RCCE_comm_buffer[ID]+(target-RCCE_comm_buffer[RCCE_IAM]);
|
||||
#endif
|
||||
|
||||
// make sure that any data that has been put in our MPB by another UE is visible
|
||||
#ifdef _OPENMP
|
||||
#pragma omp flush
|
||||
#endif
|
||||
|
||||
// do the actual copy
|
||||
RC_cache_invalidate();
|
||||
|
||||
RCCE_memcpy_put((void *)target, (void *)source, num_bytes);
|
||||
|
||||
// flush data to make it visible to all threads; cannot use flush list because it
|
||||
// concerns malloced space
|
||||
#ifdef _OPENMP
|
||||
#pragma omp flush
|
||||
#endif
|
||||
|
||||
#ifdef USE_FLAG_EXPERIMENTAL
|
||||
if(RCCE_debug_synch)
|
||||
fprintf(STDERR,"UE %d put data: %d address %X \n", RCCE_IAM,*source,target);
|
||||
#endif
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
#ifdef USE_FLAG_EXPERIMENTAL
|
||||
int RCCE_put_flag(
|
||||
t_vcharp target, // target buffer, MPB
|
||||
t_vcharp source, // source buffer, MPB or private memory
|
||||
int num_bytes,
|
||||
int ID
|
||||
) {
|
||||
|
||||
target = RCCE_flag_buffer[ID]+(target-RCCE_comm_buffer[RCCE_IAM]);
|
||||
|
||||
if (RCCE_debug_synch)
|
||||
fprintf(STDERR,"UE %d put flag: %x address %X \n", RCCE_IAM,*source,target);
|
||||
|
||||
//if( num_bytes == 1 ) {
|
||||
*target = *source;
|
||||
return(RCCE_SUCCESS);
|
||||
//}
|
||||
|
||||
//RCCE_memcpy_put((void *)target, (void *)source, 1);
|
||||
|
||||
//*RCCE_fool_write_combine_buffer = 1;
|
||||
//return(RCCE_ERROR_DEBUG_FLAG);
|
||||
}
|
||||
#endif
|
131
hermit/usr/ircce/RCCE_qsort.c
Normal file
131
hermit/usr/ircce/RCCE_qsort.c
Normal file
|
@ -0,0 +1,131 @@
|
|||
//***************************************************************************************
|
||||
// Sorting-related routines
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Author: Rob F. Van der Wijngaart
|
||||
// Intel Corporation
|
||||
// Date: 008/30/2010
|
||||
//
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: id_compare
|
||||
//--------------------------------------------------------------------------------------
|
||||
// comparison function used in routine to sort core IDs
|
||||
//--------------------------------------------------------------------------------------
|
||||
int id_compare(
|
||||
const void *e1, // first element to be compared
|
||||
const void *e2 // second element to be compared
|
||||
) {
|
||||
int v1 = *(int *)e1;
|
||||
int v2 = *(int *)e2;
|
||||
return(v1<v2) ? -1 : (v1>v2) ? 1 : 0;
|
||||
}
|
||||
|
||||
// qsort -- qsort interface implemented by faster quicksort.
|
||||
// J. L. Bentley and M. D. McIlroy, SPE 23 (1993) 1249-1265.
|
||||
// Copyright 1993, John Wiley.
|
||||
|
||||
/*assume sizeof(long) is a power of 2 */
|
||||
#define SWAPINIT(a, es) swaptype = \
|
||||
(a-(char*)0 | es) % sizeof(long) ? 2 : es > sizeof(long);
|
||||
#define swapcode(TYPE, parmi, parmj, n) { \
|
||||
register TYPE *pi = (TYPE *) (parmi); \
|
||||
register TYPE *pj = (TYPE *) (parmj); \
|
||||
do { \
|
||||
register TYPE t = *pi; \
|
||||
*pi++ = *pj; \
|
||||
*pj++ = t; \
|
||||
} while ((n -= sizeof(TYPE)) > 0); \
|
||||
}
|
||||
#include <stddef.h>
|
||||
static void swapfunc(char *a, char *b, size_t n, int swaptype)
|
||||
{ if (swaptype <= 1) swapcode(long, a, b, n)
|
||||
else swapcode(char, a, b, n)
|
||||
}
|
||||
#define swap(a, b) \
|
||||
if (swaptype == 0) { \
|
||||
t = *(long*)(a); \
|
||||
*(long*)(a) = *(long*)(b); \
|
||||
*(long*)(b) = t; \
|
||||
} else \
|
||||
swapfunc(a, b, es, swaptype)
|
||||
|
||||
#define PVINIT(pv, pm) \
|
||||
if (swaptype != 0) { pv = a; swap(pv, pm); } \
|
||||
else { pv = (char*)&v; *(long*)pv = *(long*)pm; }
|
||||
|
||||
#define vecswap(a, b, n) if (n > 0) swapfunc(a, b, n, swaptype)
|
||||
|
||||
#define min(x, y) ((x)<=(y) ? (x) : (y))
|
||||
|
||||
static char *med3(char *a, char *b, char *c, int (*cmp)(const void*, const void*))
|
||||
{ return cmp(a, b) < 0 ?
|
||||
(cmp(b, c) < 0 ? b : cmp(a, c) < 0 ? c : a)
|
||||
: (cmp(b, c) > 0 ? b : cmp(a, c) > 0 ? c : a);
|
||||
}
|
||||
|
||||
void RCCE_qsort(char *a, size_t n, size_t es, int (*cmp)(const void*, const void*))
|
||||
{
|
||||
char *pa, *pb, *pc, *pd, *pl, *pm, *pn, *pv;
|
||||
int r, swaptype;
|
||||
long t, v;
|
||||
size_t s;
|
||||
|
||||
SWAPINIT(a, es);
|
||||
if (n < 7) { /* Insertion sort on smallest arrays */
|
||||
for (pm = a + es; pm < a + n*es; pm += es)
|
||||
for (pl = pm; pl > a && cmp(pl-es, pl) > 0; pl -= es)
|
||||
swap(pl, pl-es);
|
||||
return;
|
||||
}
|
||||
pm = a + (n/2)*es; /* Small arrays, middle element */
|
||||
if (n > 7) {
|
||||
pl = a;
|
||||
pn = a + (n-1)*es;
|
||||
if (n > 40) { /* Big arrays, pseudomedian of 9 */
|
||||
s = (n/8)*es;
|
||||
pl = med3(pl, pl+s, pl+2*s, cmp);
|
||||
pm = med3(pm-s, pm, pm+s, cmp);
|
||||
pn = med3(pn-2*s, pn-s, pn, cmp);
|
||||
}
|
||||
pm = med3(pl, pm, pn, cmp); /* Mid-size, med of 3 */
|
||||
}
|
||||
PVINIT(pv, pm); /* pv points to partition value */
|
||||
pa = pb = a;
|
||||
pc = pd = a + (n-1)*es;
|
||||
for (;;) {
|
||||
while (pb <= pc && (r = cmp(pb, pv)) <= 0) {
|
||||
if (r == 0) { swap(pa, pb); pa += es; }
|
||||
pb += es;
|
||||
}
|
||||
while (pb <= pc && (r = cmp(pc, pv)) >= 0) {
|
||||
if (r == 0) { swap(pc, pd); pd -= es; }
|
||||
pc -= es;
|
||||
}
|
||||
if (pb > pc) break;
|
||||
swap(pb, pc);
|
||||
pb += es;
|
||||
pc -= es;
|
||||
}
|
||||
pn = a + n*es;
|
||||
s = min(pa-a, pb-pa ); vecswap(a, pb-s, s);
|
||||
s = min(pd-pc, pn-pd-es); vecswap(pb, pn-s, s);
|
||||
if ((s = pb-pa) > es) RCCE_qsort(a, s/es, es, cmp);
|
||||
if ((s = pd-pc) > es) RCCE_qsort(pn-s, s/es, es, cmp);
|
||||
}
|
||||
|
1350
hermit/usr/ircce/RCCE_recv.c
Normal file
1350
hermit/usr/ircce/RCCE_recv.c
Normal file
File diff suppressed because it is too large
Load diff
179
hermit/usr/ircce/RCCE_reduce.c
Normal file
179
hermit/usr/ircce/RCCE_reduce.c
Normal file
|
@ -0,0 +1,179 @@
|
|||
//***************************************************************************************
|
||||
// Reduction functions.
|
||||
//***************************************************************************************
|
||||
// Since reduction is the only message passing operation that depends on the data type,
|
||||
// it is carried as a parameter. Also, since only collective operations require
|
||||
// communication domains, they are the only ones that use communicators. All collectives
|
||||
// implementations are naive, linear operations. There may not be any overlap between
|
||||
// target and source.
|
||||
//
|
||||
// Author: Rob F. Van der Wijngaart
|
||||
// Intel Corporation
|
||||
// Date: 008/30/2010
|
||||
//
|
||||
//**************************************************************************************
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "RCCE_lib.h"
|
||||
#define MIN(x,y) ( (x) < (y) ? (x) : (y) )
|
||||
#define MAX(x,y) ( (x) > (y) ? (x) : (y) )
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_reduce_general
|
||||
//--------------------------------------------------------------------------------------
|
||||
// function used to implement both reduce and allreduce
|
||||
//--------------------------------------------------------------------------------------
|
||||
static int RCCE_reduce_general(
|
||||
char *inbuf, // source buffer for reduction datan
|
||||
char *outbuf, // target buffer for reduction data
|
||||
int num, // number of data elements to be reduced
|
||||
int type, // type of data elements
|
||||
int op, // reduction operation
|
||||
int root, // root of reduction tree, used for all reductions
|
||||
int all, // if 1, use allreduce, if 0, use reduce
|
||||
RCCE_COMM comm // communication domain within which to reduce
|
||||
) {
|
||||
|
||||
int ue, i, type_size, ierr;
|
||||
int *iin, *iout;
|
||||
long *lin, *lout;
|
||||
float *fin, *fout;
|
||||
double *din, *dout;
|
||||
// create aliases for source and target buffers to simplify arithmetic operations
|
||||
iin = (int *) inbuf; iout = (int *) outbuf;
|
||||
lin = (long *) inbuf; lout = (long *) outbuf;
|
||||
fin = (float *) inbuf; fout = (float *) outbuf;
|
||||
din = (double *) inbuf; dout = (double *) outbuf;
|
||||
|
||||
#ifdef GORY
|
||||
printf("Reduction only implemented for non-gory API\n");
|
||||
return(1);
|
||||
#else
|
||||
switch (op) {
|
||||
case RCCE_SUM:
|
||||
case RCCE_MAX:
|
||||
case RCCE_MIN:
|
||||
case RCCE_PROD: break;
|
||||
default: return(RCCE_ERROR_ILLEGAL_OP);
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case RCCE_INT: type_size = sizeof(int);
|
||||
break;
|
||||
case RCCE_LONG: type_size = sizeof(long);
|
||||
break;
|
||||
case RCCE_FLOAT: type_size = sizeof(float);
|
||||
break;
|
||||
case RCCE_DOUBLE: type_size = sizeof(double);
|
||||
break;
|
||||
default: return(RCCE_ERROR_ILLEGAL_TYPE);
|
||||
}
|
||||
|
||||
if (RCCE_IAM != comm.member[root]) {
|
||||
// non-root UEs send their source buffers to the root
|
||||
if (ierr=RCCE_send(inbuf, num*type_size, comm.member[root]))
|
||||
return(ierr);
|
||||
// in case of allreduce they also receive the reduced buffer
|
||||
if (all) if (ierr=RCCE_recv(outbuf, num*type_size, comm.member[root]))
|
||||
return(ierr);
|
||||
}
|
||||
else {
|
||||
// the root can copy directly from source to target buffer
|
||||
memcpy(outbuf, inbuf, num*type_size);
|
||||
for (ue=0; ue<comm.size; ue++) if (ue != root) {
|
||||
if (ierr=RCCE_recv(inbuf, num*type_size, comm.member[ue]))
|
||||
return(ierr);
|
||||
|
||||
// use combination of operation and data type to reduce number of switch statements
|
||||
switch (op+(RCCE_NUM_OPS)*(type)) {
|
||||
|
||||
case RCCE_SUM_INT: for (i=0; i<num; i++) iout[i] += iin[i]; break;
|
||||
case RCCE_MAX_INT: for (i=0; i<num; i++) iout[i] = MAX(iout[i],iin[i]); break;
|
||||
case RCCE_MIN_INT: for (i=0; i<num; i++) iout[i] = MIN(iout[i],iin[i]); break;
|
||||
case RCCE_PROD_INT: for (i=0; i<num; i++) iout[i] *= iin[i]; break;
|
||||
|
||||
case RCCE_SUM_LONG: for (i=0; i<num; i++) lout[i] += lin[i]; break;
|
||||
case RCCE_MAX_LONG: for (i=0; i<num; i++) lout[i] = MAX(lout[i],lin[i]); break;
|
||||
case RCCE_MIN_LONG: for (i=0; i<num; i++) lout[i] = MIN(lout[i],lin[i]); break;
|
||||
case RCCE_PROD_LONG: for (i=0; i<num; i++) lout[i] *= lin[i]; break;
|
||||
|
||||
case RCCE_SUM_FLOAT: for (i=0; i<num; i++) fout[i] += fin[i]; break;
|
||||
case RCCE_MAX_FLOAT: for (i=0; i<num; i++) fout[i] = MAX(fout[i],fin[i]); break;
|
||||
case RCCE_MIN_FLOAT: for (i=0; i<num; i++) fout[i] = MIN(fout[i],fin[i]); break;
|
||||
case RCCE_PROD_FLOAT: for (i=0; i<num; i++) fout[i] *= fin[i]; break;
|
||||
|
||||
case RCCE_SUM_DOUBLE: for (i=0; i<num; i++) dout[i] += din[i]; break;
|
||||
case RCCE_MAX_DOUBLE: for (i=0; i<num; i++) dout[i] = MAX(dout[i],din[i]); break;
|
||||
case RCCE_MIN_DOUBLE: for (i=0; i<num; i++) dout[i] = MIN(dout[i],din[i]); break;
|
||||
case RCCE_PROD_DOUBLE: for (i=0; i<num; i++) dout[i] *= din[i]; break;
|
||||
}
|
||||
}
|
||||
|
||||
// in case of allreduce the root sends the reduction results to all non-root UEs
|
||||
if (all) for (ue=0; ue<comm.size; ue++) if (ue != root)
|
||||
if(ierr=RCCE_send(outbuf, num*type_size, comm.member[ue]))
|
||||
return(ierr);
|
||||
}
|
||||
return(RCCE_SUCCESS);
|
||||
#endif GORY
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_allreduce
|
||||
//---------------------------------------------------------------------------------------
|
||||
// Reduction function which delivers the reduction results to all participating UEs
|
||||
//---------------------------------------------------------------------------------------
|
||||
int RCCE_allreduce(
|
||||
char *inbuf, // source buffer for reduction datan
|
||||
char *outbuf, // target buffer for reduction data
|
||||
int num, // number of data elements to be reduced
|
||||
int type, // type of data elements
|
||||
int op, // reduction operation
|
||||
RCCE_COMM comm // communication domain within which to reduce
|
||||
){
|
||||
|
||||
int root = 0, all = 1;
|
||||
return(RCCE_error_return(RCCE_debug_comm,
|
||||
RCCE_reduce_general(inbuf, outbuf, num, type, op, root, all, comm)));
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_reduce
|
||||
//---------------------------------------------------------------------------------------
|
||||
// Reduction function which delivers the reduction results to UE root
|
||||
//---------------------------------------------------------------------------------------
|
||||
int RCCE_reduce(
|
||||
char *inbuf, // source buffer for reduction datan
|
||||
char *outbuf, // target buffer for reduction data
|
||||
int num, // number of data elements to be reduced
|
||||
int type, // type of data elements
|
||||
int op, // reduction operation
|
||||
int root, // member of "comm" receiving reduction results
|
||||
RCCE_COMM comm // communication domain within which to reduce
|
||||
){
|
||||
|
||||
int ue, all = 0;
|
||||
// check to make sure root is member of the communicator
|
||||
if (root<0 || root >= comm.size)
|
||||
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
|
||||
|
||||
return(RCCE_error_return(RCCE_debug_comm,
|
||||
RCCE_reduce_general(inbuf, outbuf, num, type, op, root, all, comm)));
|
||||
}
|
||||
|
992
hermit/usr/ircce/RCCE_send.c
Normal file
992
hermit/usr/ircce/RCCE_send.c
Normal file
|
@ -0,0 +1,992 @@
|
|||
//***************************************************************************************
|
||||
// Synchronized receive routines.
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Author: Rob F. Van der Wijngaart
|
||||
// Intel Corporation
|
||||
// Date: 008/30/2010
|
||||
//
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// [2010-10-25] added support for non-blocking send/recv operations
|
||||
// - RCCE_isend(), ..._test(), ..._wait(), ..._push()
|
||||
// - RCCE_irecv(), ..._test(), ..._wait(), ..._push()
|
||||
// by Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
// [2012-09-10] added support for "tagged" flags
|
||||
// by Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
#include "RCCE_lib.h"
|
||||
#if defined(COPPERRIDGE) && !defined(__hermit__)
|
||||
#include "scc_memcpy.h"
|
||||
#else
|
||||
#define memcpy_scc memcpy
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_send_general
|
||||
//--------------------------------------------------------------------------------------
|
||||
// Synchronized send function (gory and non-gory mode)
|
||||
//--------------------------------------------------------------------------------------
|
||||
static int RCCE_send_general(
|
||||
char *privbuf, // source buffer in local private memory (send buffer)
|
||||
t_vcharp combuf, // intermediate buffer in MPB
|
||||
size_t chunk, // size of MPB available for this message (bytes)
|
||||
RCCE_FLAG *ready, // flag indicating whether receiver is ready
|
||||
RCCE_FLAG *sent, // flag indicating whether message has been sent by source
|
||||
size_t size, // size of message (bytes)
|
||||
int dest, // UE that will receive the message
|
||||
int copy, // set to 0 for synchronization only (no copying/sending)
|
||||
int pipe, // use pipelining?
|
||||
int mcast, // multicast?
|
||||
void* tag, // additional tag?
|
||||
int len, // length of additional tag
|
||||
RCCE_FLAG *probe // flag for probing for incoming messages
|
||||
) {
|
||||
|
||||
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
|
||||
size_t wsize, // offset within send buffer when putting in "chunk" bytes
|
||||
remainder, // bytes remaining to be sent
|
||||
nbytes; // number of bytes to be sent in single RCCE_put call
|
||||
char *bufptr; // running pointer inside privbuf for current location
|
||||
|
||||
#ifdef USE_REMOTE_PUT_LOCAL_GET
|
||||
if(mcast) return(RCCE_error_return(1, RCCE_ERROR_NO_MULTICAST_SUPPORT));
|
||||
#endif
|
||||
|
||||
if(probe)
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
RCCE_flag_write_tagged(probe, RCCE_FLAG_SET, dest, tag, len);
|
||||
#else
|
||||
RCCE_flag_write(probe, RCCE_FLAG_SET, dest);
|
||||
#endif
|
||||
|
||||
#ifdef USE_SYNCH_FOR_ZERO_BYTE
|
||||
// synchronize even in case of zero byte messages:
|
||||
if(size == 0) {
|
||||
#ifdef USE_REMOTE_PUT_LOCAL_GET
|
||||
RCCE_wait_until(*ready, RCCE_FLAG_SET);
|
||||
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
if(!probe)
|
||||
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
|
||||
else
|
||||
#endif
|
||||
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
|
||||
#else // LOCAL PUT / REMOTE GET: (standard)
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
if(!probe)
|
||||
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
|
||||
else
|
||||
#endif
|
||||
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
|
||||
RCCE_wait_until(*ready, RCCE_FLAG_SET);
|
||||
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
#endif // !USE_REMOTE_PUT_LOCAL_GET
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
#endif // USE_SYNCH_FOR_ZERO_BYTE
|
||||
|
||||
if(!pipe) {
|
||||
// send data in units of available chunk size of comm buffer
|
||||
for (wsize=0; wsize< (size/chunk)*chunk; wsize+=chunk) {
|
||||
bufptr = privbuf + wsize;
|
||||
nbytes = chunk;
|
||||
|
||||
#ifdef USE_REMOTE_PUT_LOCAL_GET
|
||||
|
||||
RCCE_wait_until(*ready, RCCE_FLAG_SET);
|
||||
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
// copy private data to remote comm buffer
|
||||
if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, dest);
|
||||
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
if( (wsize == 0) && (!probe) )
|
||||
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
|
||||
else
|
||||
#endif
|
||||
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
|
||||
|
||||
#else // LOCAL PUT / REMOTE GET: (standard)
|
||||
|
||||
// copy private data to own comm buffer
|
||||
if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, RCCE_IAM);
|
||||
|
||||
if(!mcast) {
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
if( (wsize == 0) && (!probe) )
|
||||
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
|
||||
else
|
||||
#endif
|
||||
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
|
||||
|
||||
// wait for the destination to be ready to receive a message
|
||||
RCCE_wait_until(*ready, RCCE_FLAG_SET);
|
||||
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
}
|
||||
else {
|
||||
RCCE_TNS_barrier(&RCCE_COMM_WORLD);
|
||||
RCCE_TNS_barrier(&RCCE_COMM_WORLD);
|
||||
}
|
||||
#endif // !USE_REMOTE_PUT_LOCAL_GET
|
||||
|
||||
} // for
|
||||
}
|
||||
else // if(!pipe) -> if(pipe)
|
||||
{
|
||||
// pipelined version of send/recv:
|
||||
size_t subchunk1, subchunk2;
|
||||
|
||||
for(wsize = 0; wsize < (size/chunk)*chunk; wsize+=chunk) {
|
||||
|
||||
if(wsize == 0) {
|
||||
// allign sub-chunks to cache line granularity:
|
||||
subchunk1 = ( (chunk / 2) / RCCE_LINE_SIZE ) * RCCE_LINE_SIZE;
|
||||
subchunk2 = chunk - subchunk1;
|
||||
}
|
||||
|
||||
bufptr = privbuf + wsize;
|
||||
nbytes = subchunk1;
|
||||
|
||||
#ifdef USE_REMOTE_PUT_LOCAL_GET
|
||||
|
||||
RCCE_wait_until(*ready, RCCE_FLAG_SET);
|
||||
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
// copy private data chunk 1 to remote comm buffer
|
||||
if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, dest);
|
||||
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
if( (wsize == 0) && (!probe) )
|
||||
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
|
||||
else
|
||||
#endif
|
||||
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
|
||||
|
||||
#else // LOCAL PUT / REMOTE GET: (standard)
|
||||
|
||||
// copy private data chunk 1 to own comm buffer
|
||||
if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, RCCE_IAM);
|
||||
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
if( (wsize == 0) && (!probe) )
|
||||
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
|
||||
else
|
||||
#endif
|
||||
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
|
||||
|
||||
RCCE_wait_until(*ready, RCCE_FLAG_SET);
|
||||
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
#endif // !USE_REMOTE_PUT_LOCAL_GET
|
||||
|
||||
bufptr = privbuf + wsize + subchunk1;
|
||||
nbytes = subchunk2;
|
||||
|
||||
#ifdef USE_REMOTE_PUT_LOCAL_GET
|
||||
|
||||
RCCE_wait_until(*ready, RCCE_FLAG_SET);
|
||||
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
// copy private data chunk 2 to remote comm buffer
|
||||
if(copy) RCCE_put(combuf + subchunk1, (t_vcharp) bufptr, nbytes, dest);
|
||||
|
||||
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
|
||||
|
||||
#else // LOCAL PUT / REMOTE GET: (standard)
|
||||
|
||||
// copy private data chunk 2 to own comm buffer
|
||||
if(copy) RCCE_put(combuf + subchunk1, (t_vcharp) bufptr, nbytes, RCCE_IAM);
|
||||
|
||||
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
|
||||
|
||||
RCCE_wait_until(*ready, RCCE_FLAG_SET);
|
||||
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
#endif // !USE_REMOTE_PUT_LOCAL_GET
|
||||
|
||||
} //for
|
||||
|
||||
} // if(pipe)
|
||||
|
||||
remainder = size%chunk;
|
||||
// if nothing is left over, we are done
|
||||
if (!remainder) return(RCCE_SUCCESS);
|
||||
|
||||
// send remainder of data--whole cache lines
|
||||
bufptr = privbuf + (size/chunk)*chunk;
|
||||
nbytes = remainder - remainder%RCCE_LINE_SIZE;
|
||||
|
||||
if (nbytes) {
|
||||
|
||||
#ifdef USE_REMOTE_PUT_LOCAL_GET
|
||||
|
||||
RCCE_wait_until(*ready, RCCE_FLAG_SET);
|
||||
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
// copy private data to remote comm buffer
|
||||
if(copy) RCCE_put(combuf, (t_vcharp) bufptr, nbytes, dest);
|
||||
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
if( (wsize == 0) && (!probe) )
|
||||
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
|
||||
else
|
||||
#endif
|
||||
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
|
||||
|
||||
#else // LOCAL PUT / REMOTE GET: (standard)
|
||||
|
||||
// copy private data to own comm buffer
|
||||
if(copy) RCCE_put(combuf, (t_vcharp)bufptr, nbytes, RCCE_IAM);
|
||||
|
||||
if(!mcast) {
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
if( (wsize == 0) && (!probe) )
|
||||
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
|
||||
else
|
||||
#endif
|
||||
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
|
||||
|
||||
// wait for the destination to be ready to receive a message
|
||||
RCCE_wait_until(*ready, RCCE_FLAG_SET);
|
||||
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
}
|
||||
else {
|
||||
RCCE_TNS_barrier(&RCCE_COMM_WORLD);
|
||||
RCCE_TNS_barrier(&RCCE_COMM_WORLD);
|
||||
}
|
||||
#endif // !USE_REMOTE_PUT_LOCAL_GET
|
||||
|
||||
} // if(nbytes)
|
||||
|
||||
remainder = remainder%RCCE_LINE_SIZE;
|
||||
if (!remainder) return(RCCE_SUCCESS);
|
||||
|
||||
// remainder is less than a cache line. This must be copied into appropriately sized
|
||||
// intermediate space before it can be sent to the receiver
|
||||
bufptr = privbuf + (size/chunk)*chunk + nbytes;
|
||||
nbytes = RCCE_LINE_SIZE;
|
||||
|
||||
if(copy) {
|
||||
#ifdef COPPERRIDGE
|
||||
memcpy_scc(padline,bufptr,remainder);
|
||||
#else
|
||||
memcpy(padline,bufptr,remainder);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef USE_REMOTE_PUT_LOCAL_GET
|
||||
|
||||
RCCE_wait_until(*ready, RCCE_FLAG_SET);
|
||||
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
// copy private data to remote comm buffer
|
||||
if(copy) RCCE_put(combuf, (t_vcharp) padline, nbytes, dest);
|
||||
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
if( (wsize == 0) && (!probe) )
|
||||
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
|
||||
else
|
||||
#endif
|
||||
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
|
||||
|
||||
#else // LOCAL PUT / REMOTE GET: (standard)
|
||||
|
||||
// copy private data to own comm buffer
|
||||
if(copy) RCCE_put(combuf, (t_vcharp)padline, nbytes, RCCE_IAM);
|
||||
|
||||
if(!mcast) {
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
if( (wsize == 0) && (!probe) )
|
||||
RCCE_flag_write_tagged(sent, RCCE_FLAG_SET, dest, tag, len);
|
||||
else
|
||||
#endif
|
||||
RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
|
||||
|
||||
// wait for the destination to be ready to receive a message
|
||||
RCCE_wait_until(*ready, RCCE_FLAG_SET);
|
||||
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
}
|
||||
else {
|
||||
RCCE_TNS_barrier(&RCCE_COMM_WORLD);
|
||||
RCCE_TNS_barrier(&RCCE_COMM_WORLD);
|
||||
}
|
||||
|
||||
#endif // !USE_REMOTE_PUT_LOCAL_GET
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
static int RCCE_push_send_request(RCCE_SEND_REQUEST *request) {
|
||||
|
||||
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
|
||||
int test; // flag for calling RCCE_test_flag()
|
||||
|
||||
if(request->finished) return(RCCE_SUCCESS);
|
||||
|
||||
if(request->label == 1) goto label1;
|
||||
if(request->label == 2) goto label2;
|
||||
if(request->label == 3) goto label3;
|
||||
if(request->label == 4) goto label4;
|
||||
|
||||
if(request->probe)
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
RCCE_flag_write_tagged(request->probe, RCCE_FLAG_SET, request->dest, request->tag, request->len);
|
||||
#else
|
||||
RCCE_flag_write(request->probe, RCCE_FLAG_SET, request->dest);
|
||||
#endif
|
||||
|
||||
#ifdef USE_SYNCH_FOR_ZERO_BYTE
|
||||
// synchronize even in case of zero byte messages:
|
||||
if(request->size == 0) {
|
||||
#ifdef USE_REMOTE_PUT_LOCAL_GET
|
||||
label1:
|
||||
RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
|
||||
if(!test) {
|
||||
request->label = 1;
|
||||
return(RCCE_PENDING);
|
||||
}
|
||||
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
if(!request->probe)
|
||||
RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len);
|
||||
else
|
||||
#endif
|
||||
RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest);
|
||||
#else // LOCAL PUT / REMOTE GET: (standard)
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
if(!request->probe)
|
||||
RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len);
|
||||
else
|
||||
#endif
|
||||
RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest);
|
||||
label1:
|
||||
RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
|
||||
if(!test) {
|
||||
request->label = 1;
|
||||
return(RCCE_PENDING);
|
||||
}
|
||||
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
#endif // !USE_REMOTE_PUT_LOCAL_GET
|
||||
request->finished = 1;
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
#endif // USE_SYNCH_FOR_ZERO_BYTE
|
||||
|
||||
// send data in units of available chunk size of comm buffer
|
||||
for (; request->wsize < (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) {
|
||||
request->bufptr = request->privbuf + request->wsize;
|
||||
request->nbytes = request->chunk;
|
||||
|
||||
#ifdef USE_REMOTE_PUT_LOCAL_GET
|
||||
|
||||
// wait for the destination to be ready to receive a message
|
||||
label2:
|
||||
RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
|
||||
if(!test) {
|
||||
request->label = 2;
|
||||
return(RCCE_PENDING);
|
||||
}
|
||||
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
// copy private data to remote comm buffer
|
||||
if(request->copy) RCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, request->dest);
|
||||
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
if( (request->wsize == 0) && (!request->probe) )
|
||||
RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len);
|
||||
else
|
||||
#endif
|
||||
RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest);
|
||||
|
||||
#else // LOCAL PUT / REMOTE GET: (standard)
|
||||
|
||||
// copy private data to own comm buffer
|
||||
if(request->copy) RCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, RCCE_IAM);
|
||||
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
if( (request->wsize == 0) && (!request->probe) )
|
||||
RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len);
|
||||
else
|
||||
#endif
|
||||
RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest);
|
||||
|
||||
// wait for the destination to be ready to receive a message
|
||||
label2:
|
||||
RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
|
||||
if(!test) {
|
||||
request->label = 2;
|
||||
return(RCCE_PENDING);
|
||||
}
|
||||
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
#endif // !USE_REMOTE_PUT_LOCAL_GET
|
||||
|
||||
} // for
|
||||
|
||||
request->remainder = request->size % request->chunk;
|
||||
// if nothing is left over, we are done
|
||||
if (!request->remainder) {
|
||||
request->finished = 1;
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
// send remainder of data--whole cache lines
|
||||
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk;
|
||||
request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE;
|
||||
|
||||
if (request->nbytes) {
|
||||
|
||||
#ifdef USE_REMOTE_PUT_LOCAL_GET
|
||||
|
||||
// wait for the destination to be ready to receive a message
|
||||
label3:
|
||||
RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
|
||||
if(!test) {
|
||||
request->label = 3;
|
||||
return(RCCE_PENDING);
|
||||
}
|
||||
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
// copy private data to remote comm buffer
|
||||
if(request->copy) RCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, request->dest);
|
||||
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
if( (request->wsize == 0) && (!request->probe) )
|
||||
RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len);
|
||||
else
|
||||
#endif
|
||||
RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest);
|
||||
|
||||
#else // LOCAL PUT / REMOTE GET: (standard)
|
||||
|
||||
// copy private data to own comm buffer
|
||||
if(request->copy) RCCE_put(request->combuf, (t_vcharp)request->bufptr, request->nbytes, RCCE_IAM);
|
||||
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
if( (request->wsize == 0) && (!request->probe) )
|
||||
RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len);
|
||||
else
|
||||
#endif
|
||||
RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest);
|
||||
|
||||
// wait for the destination to be ready to receive a message
|
||||
label3:
|
||||
RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
|
||||
if(!test) {
|
||||
request->label = 3;
|
||||
return(RCCE_PENDING);
|
||||
}
|
||||
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
#endif // !USE_REMOTE_PUT_LOCAL_GET
|
||||
|
||||
} // if(request->nbytes)
|
||||
|
||||
request->remainder = request->size % request->chunk;
|
||||
request->remainder = request->remainder%RCCE_LINE_SIZE;
|
||||
|
||||
// if nothing is left over, we are done
|
||||
if (!request->remainder)
|
||||
{
|
||||
request->finished = 1;
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
// remainder is less than a cache line. This must be copied into appropriately sized
|
||||
// intermediate space before it can be sent to the receiver
|
||||
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes;
|
||||
request->nbytes = RCCE_LINE_SIZE;
|
||||
|
||||
#ifdef USE_REMOTE_PUT_LOCAL_GET
|
||||
|
||||
// wait for the destination to be ready to receive a message
|
||||
label4:
|
||||
RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
|
||||
if(!test) {
|
||||
request->label = 4;
|
||||
return(RCCE_PENDING);
|
||||
}
|
||||
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
// copy private data to remote comm buffer
|
||||
if(request->copy) {
|
||||
#ifdef COPPERRIDGE
|
||||
memcpy_scc(padline,request->bufptr,request->remainder);
|
||||
#else
|
||||
memcpy(padline,request->bufptr,request->remainder);
|
||||
#endif
|
||||
RCCE_put(request->combuf, (t_vcharp) padline, request->nbytes, request->dest);
|
||||
}
|
||||
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
#ifdef USE_PROBE_FLAGS_SHORTCUT
|
||||
if(request->privbuf == NULL)
|
||||
{
|
||||
request->finished = 1;
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
#endif
|
||||
if( (request->wsize == 0) && (!request->probe) )
|
||||
RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len);
|
||||
else
|
||||
#endif
|
||||
RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest);
|
||||
|
||||
#else // LOCAL PUT / REMOTE GET: (standard)
|
||||
|
||||
// copy private data to own comm buffer
|
||||
if(request->copy) {
|
||||
#ifdef COPPERRIDGE
|
||||
memcpy_scc(padline,request->bufptr,request->remainder);
|
||||
#else
|
||||
memcpy(padline,request->bufptr,request->remainder);
|
||||
#endif
|
||||
RCCE_put(request->combuf, (t_vcharp)padline, request->nbytes, RCCE_IAM);
|
||||
}
|
||||
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
if( (request->wsize == 0) && (!request->probe) )
|
||||
RCCE_flag_write_tagged(request->sent, RCCE_FLAG_SET, request->dest, request->tag, request->len);
|
||||
else
|
||||
#endif
|
||||
RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest);
|
||||
|
||||
// wait for the destination to be ready to receive a message
|
||||
label4:
|
||||
RCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
|
||||
if(!test) {
|
||||
request->label = 4;
|
||||
return(RCCE_PENDING);
|
||||
}
|
||||
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
#endif // !USE_REMOTE_PUT_LOCAL_GET
|
||||
|
||||
request->finished = 1;
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
static void RCCE_init_send_request(
|
||||
char *privbuf, // source buffer in local private memory (send buffer)
|
||||
t_vcharp combuf, // intermediate buffer in MPB
|
||||
size_t chunk, // size of MPB available for this message (bytes)
|
||||
RCCE_FLAG *ready, // flag indicating whether receiver is ready
|
||||
RCCE_FLAG *sent, // flag indicating whether message has been sent by source
|
||||
size_t size, // size of message (bytes)
|
||||
int dest, // UE that will receive the message
|
||||
int copy, // set to 0 for synchronization only (no copying/sending)
|
||||
void* tag, // additional tag?
|
||||
int len, // length of additional tag
|
||||
RCCE_FLAG *probe, // flag for probing for incoming messages
|
||||
RCCE_SEND_REQUEST *request
|
||||
) {
|
||||
|
||||
request->privbuf = privbuf;
|
||||
request->combuf = combuf;
|
||||
request->chunk = chunk;
|
||||
request->ready = ready;
|
||||
request->sent = sent;
|
||||
request->size = size;
|
||||
request->dest = dest;
|
||||
|
||||
request->copy = copy;
|
||||
request->tag = tag;
|
||||
request->len = len;
|
||||
request->probe = probe;
|
||||
|
||||
request->wsize = 0;
|
||||
request->remainder = 0;
|
||||
request->nbytes = 0;
|
||||
request->bufptr = NULL;
|
||||
|
||||
request->label = 0;
|
||||
|
||||
request->finished = 0;
|
||||
|
||||
request->next = NULL;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
#ifndef GORY
|
||||
// this is the LfBS-customized synchronized message passing API
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_send
|
||||
//--------------------------------------------------------------------------------------
|
||||
// send function for simplified API; use library-maintained variables for synchronization
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_send(char *privbuf, size_t size, int dest) {
|
||||
|
||||
#ifdef USE_PROBE_FLAGS
|
||||
RCCE_FLAG* probe = &RCCE_probe_flag[RCCE_IAM];
|
||||
#else
|
||||
RCCE_FLAG* probe = NULL;
|
||||
#endif
|
||||
|
||||
#ifndef USE_REMOTE_PUT_LOCAL_GET
|
||||
if(RCCE_send_queue != NULL)
|
||||
#else
|
||||
if(RCCE_send_queue[dest] != NULL)
|
||||
#endif
|
||||
return(RCCE_REJECTED);
|
||||
|
||||
#ifdef USE_TAGGED_FOR_SHORT
|
||||
if(size <= (RCCE_LINE_SIZE - sizeof(int)))
|
||||
{
|
||||
#ifdef USE_PROBE_FLAGS
|
||||
RCCE_flag_write_tagged(probe, RCCE_FLAG_SET, dest, privbuf, size);
|
||||
#endif
|
||||
|
||||
#ifdef USE_REMOTE_PUT_LOCAL_GET
|
||||
|
||||
RCCE_wait_until(RCCE_ready_flag[dest], RCCE_FLAG_SET);
|
||||
RCCE_flag_write(&RCCE_ready_flag[dest], RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
#ifndef USE_PROBE_FLAGS_SHORTCUT
|
||||
#ifdef USE_PROBE_FLAGS
|
||||
RCCE_flag_write(&RCCE_sent_flag[RCCE_IAM], RCCE_FLAG_SET, dest);
|
||||
#else
|
||||
RCCE_flag_write_tagged(&RCCE_sent_flag[RCCE_IAM], RCCE_FLAG_SET, dest, privbuf, size);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#else // LOCAL PUT / REMOTE GET: (standard)
|
||||
|
||||
#ifdef USE_PROBE_FLAGS
|
||||
RCCE_flag_write(&RCCE_sent_flag[RCCE_IAM], RCCE_FLAG_SET, dest);
|
||||
#else
|
||||
RCCE_flag_write_tagged(&RCCE_sent_flag[RCCE_IAM], RCCE_FLAG_SET, dest, privbuf, size);
|
||||
#endif
|
||||
|
||||
RCCE_wait_until(RCCE_ready_flag[dest], RCCE_FLAG_SET);
|
||||
RCCE_flag_write(&RCCE_ready_flag[dest], RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
#endif // !USE_REMOTE_PUT_LOCAL_GET
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
||||
return(RCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
|
||||
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
|
||||
size, dest,
|
||||
1, 0, 0, // copy, pipe, mcast
|
||||
NULL, 0, probe)); // tag, len
|
||||
}
|
||||
|
||||
int RCCE_send_tagged(char *privbuf, size_t size, int dest, void* tag, int len) {
|
||||
|
||||
#ifdef USE_PROBE_FLAGS
|
||||
RCCE_FLAG* probe = &RCCE_probe_flag[RCCE_IAM];
|
||||
#else
|
||||
RCCE_FLAG* probe = NULL;
|
||||
#endif
|
||||
|
||||
#ifndef USE_REMOTE_PUT_LOCAL_GET
|
||||
if(RCCE_send_queue != NULL)
|
||||
#else
|
||||
if(RCCE_send_queue[dest] != NULL)
|
||||
#endif
|
||||
return(RCCE_REJECTED);
|
||||
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
return(RCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
|
||||
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
|
||||
size, dest,
|
||||
1, 0, 0, // copy, pipe, mcast
|
||||
tag, len, probe)); // tag, len, probe
|
||||
#else
|
||||
|
||||
RCCE_send_general(tag, RCCE_buff_ptr, RCCE_chunk,
|
||||
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
|
||||
len, dest,
|
||||
1, 0, 0, // copy, pipe, mcast
|
||||
NULL, 0, probe); // tag, len, probe
|
||||
|
||||
return(RCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
|
||||
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
|
||||
size, dest,
|
||||
1, 0, 0, // copy, pipe, mcast
|
||||
NULL, 0, NULL)); // tag, len, probe
|
||||
#endif
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_send_pipe
|
||||
//--------------------------------------------------------------------------------------
|
||||
// send function for simplified API; use library-maintained variables for synchronization
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_send_pipe(char *privbuf, size_t size, int dest) {
|
||||
|
||||
#ifdef USE_PROBE_FLAGS
|
||||
RCCE_FLAG* probe = &RCCE_probe_flag[RCCE_IAM];
|
||||
#else
|
||||
RCCE_FLAG* probe = NULL;
|
||||
#endif
|
||||
|
||||
#ifndef USE_REMOTE_PUT_LOCAL_GET
|
||||
if(RCCE_send_queue != NULL)
|
||||
#else
|
||||
if(RCCE_send_queue[dest] != NULL)
|
||||
#endif
|
||||
return(RCCE_REJECTED);
|
||||
|
||||
#ifdef USE_PIPELINE_FLAGS
|
||||
return(RCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
|
||||
&RCCE_ready_flag_pipe[dest], &RCCE_sent_flag_pipe[RCCE_IAM],
|
||||
size, dest,
|
||||
1, 1, 0, // copy, pipe, mcast
|
||||
NULL, 0, probe)); // tag, len, probe
|
||||
#else
|
||||
return(RCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
|
||||
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
|
||||
size, dest,
|
||||
1, 1, 0, // copy, pipe, mcast
|
||||
NULL, 0, probe)); // tag, len, probe
|
||||
#endif
|
||||
}
|
||||
|
||||
int RCCE_send_mcast(char *privbuf, size_t size) {
|
||||
|
||||
#ifdef USE_PROBE_FLAGS
|
||||
RCCE_FLAG* probe = &RCCE_probe_flag[RCCE_IAM];
|
||||
#else
|
||||
RCCE_FLAG* probe = NULL;
|
||||
#endif
|
||||
|
||||
#ifndef USE_REMOTE_PUT_LOCAL_GET
|
||||
if(RCCE_send_queue != NULL)
|
||||
#else
|
||||
if(RCCE_send_queue != NULL)
|
||||
#endif
|
||||
return(RCCE_REJECTED);
|
||||
|
||||
return(RCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
|
||||
NULL, NULL,
|
||||
size, -1,
|
||||
1, 0, 1, // copy, pipe, mcast
|
||||
NULL, 0, probe)); // tag, len
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_isend
|
||||
//--------------------------------------------------------------------------------------
|
||||
// non-blocking send function; returns an handle of type RCCE_SEND_REQUEST
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_isend(char *privbuf, size_t size, int dest, RCCE_SEND_REQUEST *request) {
|
||||
|
||||
#ifdef USE_PROBE_FLAGS
|
||||
RCCE_FLAG* probe = &RCCE_probe_flag[RCCE_IAM];
|
||||
#else
|
||||
RCCE_FLAG* probe = NULL;
|
||||
#endif
|
||||
|
||||
#ifdef USE_TAGGED_FOR_SHORT
|
||||
if(size <= (RCCE_LINE_SIZE - sizeof(int)))
|
||||
{
|
||||
RCCE_init_send_request(NULL, RCCE_buff_ptr, RCCE_chunk,
|
||||
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
|
||||
size, dest, 0, privbuf, size, probe, request);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
||||
RCCE_init_send_request(privbuf, RCCE_buff_ptr, RCCE_chunk,
|
||||
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
|
||||
size, dest, 1, NULL, 0, probe, request);
|
||||
|
||||
#ifndef USE_REMOTE_PUT_LOCAL_GET
|
||||
if(RCCE_send_queue == NULL) {
|
||||
#else
|
||||
if(RCCE_send_queue[dest] == NULL) {
|
||||
#endif
|
||||
|
||||
if(RCCE_push_send_request(request) == RCCE_SUCCESS) {
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
else {
|
||||
#ifndef USE_REMOTE_PUT_LOCAL_GET
|
||||
RCCE_send_queue = request;
|
||||
#else
|
||||
RCCE_send_queue[dest] = request;
|
||||
#endif
|
||||
return(RCCE_PENDING);
|
||||
}
|
||||
}
|
||||
else {
|
||||
#ifndef USE_REMOTE_PUT_LOCAL_GET
|
||||
if(RCCE_send_queue->next == NULL) {
|
||||
RCCE_send_queue->next = request;
|
||||
}
|
||||
#else
|
||||
if(RCCE_send_queue[dest]->next == NULL) {
|
||||
RCCE_send_queue[dest]->next = request;
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
#ifndef USE_REMOTE_PUT_LOCAL_GET
|
||||
RCCE_SEND_REQUEST *run = RCCE_send_queue;
|
||||
#else
|
||||
RCCE_SEND_REQUEST *run = RCCE_send_queue[dest];
|
||||
#endif
|
||||
while(run->next != NULL) run = run->next;
|
||||
run->next = request;
|
||||
}
|
||||
return(RCCE_RESERVED);
|
||||
}
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_isend_test
|
||||
//--------------------------------------------------------------------------------------
|
||||
// test function for completion of the requestes non-blocking send operation
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_isend_test(RCCE_SEND_REQUEST *request, int *test) {
|
||||
|
||||
if(request->finished) {
|
||||
(*test) = 1;
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
#ifndef USE_REMOTE_PUT_LOCAL_GET
|
||||
if(RCCE_send_queue != request) {
|
||||
#else
|
||||
if(RCCE_send_queue[request->dest] != request) {
|
||||
#endif
|
||||
(*test) = 0;
|
||||
return(RCCE_RESERVED);
|
||||
}
|
||||
|
||||
RCCE_push_send_request(request);
|
||||
|
||||
if(request->finished) {
|
||||
#ifndef USE_REMOTE_PUT_LOCAL_GET
|
||||
RCCE_send_queue = request->next;
|
||||
#else
|
||||
RCCE_send_queue[request->dest] = request->next;
|
||||
#endif
|
||||
|
||||
(*test) = 1;
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
(*test) = 0;
|
||||
return(RCCE_PENDING);
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_isend_push
|
||||
//--------------------------------------------------------------------------------------
|
||||
// progress function for pending requests in the isend queue
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_isend_push(int dest) {
|
||||
|
||||
#ifndef USE_REMOTE_PUT_LOCAL_GET
|
||||
RCCE_SEND_REQUEST *request = RCCE_send_queue;
|
||||
#else
|
||||
RCCE_SEND_REQUEST *request = RCCE_send_queue[dest];
|
||||
#endif
|
||||
|
||||
if(request == NULL) {
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
if(request->finished) {
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
RCCE_push_send_request(request);
|
||||
|
||||
if(request->finished) {
|
||||
#ifndef USE_REMOTE_PUT_LOCAL_GET
|
||||
RCCE_send_queue = request->next;
|
||||
#else
|
||||
RCCE_send_queue[request->dest] = request->next;
|
||||
#endif
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
return(RCCE_PENDING);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_isend_wait
|
||||
//--------------------------------------------------------------------------------------
|
||||
// just wait for completion of the requested non-blocking send operation
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_isend_wait(RCCE_SEND_REQUEST *request) {
|
||||
|
||||
int ue;
|
||||
|
||||
#ifndef USE_REMOTE_PUT_LOCAL_GET
|
||||
while(!request->finished) {
|
||||
|
||||
RCCE_isend_push(-1);
|
||||
|
||||
if(!request->finished) {
|
||||
|
||||
for(ue=0; ue<RCCE_NP; ue++) {
|
||||
RCCE_irecv_push(ue);
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
while(!request->finished) {
|
||||
|
||||
RCCE_isend_push(request->dest);
|
||||
|
||||
if(!request->finished) {
|
||||
|
||||
RCCE_irecv_push(-1);
|
||||
|
||||
for(ue=0; ue<RCCE_NP; ue++) {
|
||||
RCCE_isend_push(ue);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
#else
|
||||
// this is the gory synchronized message passing API
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_send
|
||||
//--------------------------------------------------------------------------------------
|
||||
// send function for simplified API; use user-supplied variables for synchronization
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_send(char *privbuf, t_vcharp combuf, size_t chunk, RCCE_FLAG *ready,
|
||||
RCCE_FLAG *sent, size_t size, int dest) {
|
||||
return(RCCE_send_general(privbuf, combuf, chunk, ready, sent,
|
||||
size, dest,
|
||||
1, 0, 0, // copy, pipe, mcast
|
||||
NULL, 0, NULL)); // tag, len, probe
|
||||
}
|
||||
#endif
|
613
hermit/usr/ircce/RCCE_synch.c
Normal file
613
hermit/usr/ircce/RCCE_synch.c
Normal file
|
@ -0,0 +1,613 @@
|
|||
///*************************************************************************************
|
||||
// Synchronization functions.
|
||||
// Single-bit and whole-cache-line flags are sufficiently different that we provide
|
||||
// separate implementations of the synchronization routines for each case
|
||||
//**************************************************************************************
|
||||
//
|
||||
// Author: Rob F. Van der Wijngaart
|
||||
// Intel Corporation
|
||||
// Date: 008/30/2010
|
||||
//
|
||||
//**************************************************************************************
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "RCCE_lib.h"
|
||||
#if defined(COPPERRIDGE) && !defined(__hermit__)
|
||||
#include "scc_memcpy.h"
|
||||
#else
|
||||
#define memcpy_scc memcpy
|
||||
#endif
|
||||
|
||||
#ifdef USE_BYTE_FLAGS
|
||||
#include "RCCE_byte_synch.c"
|
||||
#else
|
||||
|
||||
#ifdef SINGLEBITFLAGS
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// LOCKING SYNCHRONIZATION USING ONE BIT PER FLAG
|
||||
//////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_wait_until
|
||||
//--------------------------------------------------------------------------------------
|
||||
// wait until flag in local MPB becomes set or unset. To avoid reading stale data from
|
||||
// the cache instead of new flag value from the MPB, issue MPB cache invalidation before
|
||||
// each read, including within the spin cycle
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_wait_until(RCCE_FLAG flag, RCCE_FLAG_STATUS val) {
|
||||
t_vcharp cflag;
|
||||
|
||||
cflag = flag.line_address;
|
||||
|
||||
// avoid tests if we use the simplified API
|
||||
#ifdef GORY
|
||||
if (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET)
|
||||
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED));
|
||||
if (!cflag)
|
||||
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_ALLOCATED));
|
||||
// check to see if flag is properly contained in the local comm buffer
|
||||
if (cflag - RCCE_comm_buffer[RCCE_IAM]>=0 &&
|
||||
cflag+RCCE_LINE_SIZE - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<0){}
|
||||
else {
|
||||
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_IN_COMM_BUFFER));
|
||||
}
|
||||
#endif
|
||||
|
||||
// always flush/invalidate to ensure we read the most recent value of *flag
|
||||
// keep reading it until it has the required value
|
||||
do {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp flush
|
||||
#endif
|
||||
RC_cache_invalidate();
|
||||
}
|
||||
while ((RCCE_bit_value(cflag, flag.location) != val));
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
int RCCE_test_flag(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result) {
|
||||
t_vcharp cflag;
|
||||
|
||||
cflag = flag.line_address;
|
||||
|
||||
// avoid tests if we use the simplified API
|
||||
#ifdef GORY
|
||||
if (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET)
|
||||
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED));
|
||||
if (!cflag)
|
||||
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_ALLOCATED));
|
||||
// check to see if flag is properly contained in the local comm buffer
|
||||
if (cflag - RCCE_comm_buffer[RCCE_IAM]>=0 &&
|
||||
cflag+RCCE_LINE_SIZE - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<0){}
|
||||
else {
|
||||
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_IN_COMM_BUFFER));
|
||||
}
|
||||
#endif
|
||||
|
||||
// always flush/invalidate to ensure we read the most recent value of *flag
|
||||
// keep reading it until it has the required value
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp flush
|
||||
#endif
|
||||
RC_cache_invalidate();
|
||||
|
||||
if(RCCE_bit_value(cflag, flag.location) != val) {
|
||||
(*result) = 0;
|
||||
}
|
||||
else {
|
||||
(*result) = 1;
|
||||
}
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_barrier
|
||||
//--------------------------------------------------------------------------------------
|
||||
// very simple, linear barrier
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_barrier(RCCE_COMM *comm) {
|
||||
|
||||
int counter, i, error;
|
||||
int ROOT = 0;
|
||||
t_vchar cyclechar[RCCE_LINE_SIZE];
|
||||
t_vchar valchar [RCCE_LINE_SIZE];
|
||||
t_vcharp gatherp, releasep;
|
||||
RCCE_FLAG_STATUS cycle;
|
||||
|
||||
counter = 0;
|
||||
gatherp = comm->gather.line_address;
|
||||
if (RCCE_debug_synch)
|
||||
fprintf(STDERR,"UE %d has checked into barrier\n", RCCE_IAM);
|
||||
// flip local barrier variable
|
||||
if (error = RCCE_get(cyclechar, gatherp, RCCE_LINE_SIZE, RCCE_IAM))
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
cycle = RCCE_flip_bit_value(cyclechar, comm->gather.location);
|
||||
if (error = RCCE_put(comm->gather.line_address, cyclechar, RCCE_LINE_SIZE, RCCE_IAM))
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
|
||||
if (RCCE_IAM==comm->member[ROOT]) {
|
||||
// read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size),
|
||||
// we know all UEs have reached the barrier
|
||||
while (counter != comm->size) {
|
||||
// skip the first member (#0), because that is the ROOT
|
||||
for (counter=i=1; i<comm->size; i++) {
|
||||
// copy flag values out of comm buffer
|
||||
if (error = RCCE_get(valchar, comm->gather.line_address, RCCE_LINE_SIZE,
|
||||
comm->member[i]))
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
if (RCCE_bit_value(valchar, comm->gather.location) == cycle) counter++;
|
||||
}
|
||||
}
|
||||
// set release flags
|
||||
for (i=1; i<comm->size; i++)
|
||||
if (error = RCCE_flag_write(&(comm->release), cycle, comm->member[i]))
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
}
|
||||
else {
|
||||
if (error = RCCE_wait_until(comm->release, cycle))
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
}
|
||||
if (RCCE_debug_synch) fprintf(STDERR,"UE %d has cleared barrier\n", RCCE_IAM);
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// LOCKLESS SYNCHRONIZATION USING ONE WHOLE CACHE LINE PER FLAG //
|
||||
//////////////////////////////////////////////////////////////////
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_wait_until
|
||||
//--------------------------------------------------------------------------------------
|
||||
// wait until flag in local MPB becomes set or unset. To avoid reading stale data from
|
||||
// the cache instead of new flag value from the MPB, issue MPB cache invalidation before
|
||||
// each read, including within the spin cycle
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_wait_until(RCCE_FLAG flag, RCCE_FLAG_STATUS val) {
|
||||
t_vcharp cflag;
|
||||
|
||||
cflag = (t_vcharp) flag;
|
||||
#ifdef GORY
|
||||
if (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET)
|
||||
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED));
|
||||
if (!cflag)
|
||||
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_ALLOCATED));
|
||||
// check to see if flag is properly contained in the local comm buffer
|
||||
if (cflag - RCCE_comm_buffer[RCCE_IAM]>=0 &&
|
||||
cflag+RCCE_LINE_SIZE - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<0){}
|
||||
else {
|
||||
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_IN_COMM_BUFFER));
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_REVERTED_FLAGS
|
||||
flag = flag + RCCE_LINE_SIZE / sizeof(int) - 1;
|
||||
#endif
|
||||
|
||||
// always flush/invalidate to ensure we read the most recent value of *flag
|
||||
// keep reading it until it has the required value. We only need to read the
|
||||
// first int of the MPB cache line containing the flag
|
||||
#ifndef USE_FLAG_EXPERIMENTAL
|
||||
do {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp flush
|
||||
#endif
|
||||
RC_cache_invalidate();
|
||||
} while ((*flag) != val);
|
||||
#else
|
||||
if (RCCE_debug_synch)
|
||||
fprintf(STDERR,"UE %d wait flag: %x from address %X \n", RCCE_IAM,val,flag);
|
||||
flag = RCCE_flag_buffer[RCCE_IAM]+(flag-RCCE_comm_buffer[RCCE_IAM]);
|
||||
while ((*flag) != val);
|
||||
#endif
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
int RCCE_wait_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS val, void *tag, int len) {
|
||||
|
||||
int i, j;
|
||||
RCCE_FLAG flag_pos;
|
||||
|
||||
#ifndef USE_REVERTED_FLAGS
|
||||
flag_pos = flag;
|
||||
#else
|
||||
flag_pos = flag + RCCE_LINE_SIZE / sizeof(int) - 1;
|
||||
#endif
|
||||
|
||||
do {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp flush
|
||||
#endif
|
||||
RC_cache_invalidate();
|
||||
} while ((*flag_pos) != val);
|
||||
|
||||
if(tag) {
|
||||
if( len > ( RCCE_LINE_SIZE - sizeof(int) ) ) len = RCCE_LINE_SIZE - sizeof(int);
|
||||
#ifndef USE_REVERTED_FLAGS
|
||||
memcpy_scc(tag, &((char*)flag)[sizeof(int)], len);
|
||||
#else
|
||||
memcpy_scc(tag, &((char*)flag)[0], len);
|
||||
#endif
|
||||
}
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
#endif
|
||||
|
||||
int RCCE_test_flag(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result) {
|
||||
t_vcharp cflag;
|
||||
|
||||
cflag = (t_vcharp) flag;
|
||||
#ifdef GORY
|
||||
if (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET)
|
||||
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED));
|
||||
if (!cflag)
|
||||
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_ALLOCATED));
|
||||
// check to see if flag is properly contained in the local comm buffer
|
||||
if (cflag - RCCE_comm_buffer[RCCE_IAM]>=0 &&
|
||||
cflag+RCCE_LINE_SIZE - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<0){}
|
||||
else {
|
||||
return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_IN_COMM_BUFFER));
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_REVERTED_FLAGS
|
||||
flag = flag + RCCE_LINE_SIZE / sizeof(int) - 1;
|
||||
#endif
|
||||
|
||||
// always flush/invalidate to ensure we read the most recent value of *flag
|
||||
// keep reading it until it has the required value. We only need to read the
|
||||
// first int of the MPB cache line containing the flag
|
||||
#ifdef _OPENMP
|
||||
#pragma omp flush
|
||||
#endif
|
||||
#ifndef USE_FLAG_EXPERIMENTAL
|
||||
RC_cache_invalidate();
|
||||
#endif
|
||||
if((*flag) != val) {
|
||||
(*result) = 0;
|
||||
}
|
||||
else {
|
||||
(*result) = 1;
|
||||
}
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
#ifdef USE_TAGGED_FLAGS
|
||||
int RCCE_test_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result, void *tag, int len) {
|
||||
|
||||
int i, j;
|
||||
RCCE_FLAG flag_pos;
|
||||
|
||||
#ifndef USE_REVERTED_FLAGS
|
||||
flag_pos = flag;
|
||||
#else
|
||||
flag_pos = flag + RCCE_LINE_SIZE / sizeof(int) -1;
|
||||
#endif
|
||||
|
||||
RC_cache_invalidate();
|
||||
|
||||
if((*flag_pos) != val) {
|
||||
(*result) = 0;
|
||||
}
|
||||
else {
|
||||
(*result) = 1;
|
||||
}
|
||||
|
||||
if((*result) && tag) {
|
||||
if( len > ( RCCE_LINE_SIZE - sizeof(int) ) ) len = RCCE_LINE_SIZE - sizeof(int);
|
||||
#ifndef USE_REVERTED_FLAGS
|
||||
memcpy_scc(tag, &((char*)flag)[sizeof(int)], len);
|
||||
#else
|
||||
memcpy_scc(tag, &((char*)flag)[0], len);
|
||||
#endif
|
||||
}
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_barrier
|
||||
//--------------------------------------------------------------------------------------
|
||||
// very simple, linear barrier
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_barrier(RCCE_COMM *comm) {
|
||||
|
||||
int counter, i, error;
|
||||
int ROOT = 0;
|
||||
volatile unsigned char cyclechar[RCCE_LINE_SIZE];
|
||||
volatile unsigned char valchar[RCCE_LINE_SIZE];
|
||||
volatile char *cycle;
|
||||
volatile char *val;
|
||||
|
||||
counter = 0;
|
||||
cycle = (volatile char *)cyclechar;
|
||||
val = (volatile char *)valchar;
|
||||
|
||||
if (RCCE_debug_synch)
|
||||
fprintf(STDERR,"UE %d has checked into barrier\n", RCCE_IAM);
|
||||
|
||||
#ifdef USE_FAT_BARRIER
|
||||
|
||||
// flip local barrier variable
|
||||
#ifndef USE_FLAG_EXPERIMENTAL
|
||||
if ((error = RCCE_get(cyclechar, (t_vcharp)(comm->gather[RCCE_IAM]), RCCE_LINE_SIZE, RCCE_IAM)))
|
||||
#else
|
||||
if ((error = RCCE_get_flag(cyclechar, (t_vcharp)(comm->gather[RCCE_IAM]), RCCE_LINE_SIZE, RCCE_IAM)))
|
||||
#endif
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
*cycle = !(*cycle);
|
||||
#ifndef USE_FLAG_EXPERIMENTAL
|
||||
if ((error = RCCE_put((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM)))
|
||||
#else
|
||||
if ((error = RCCE_put_flag((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM)))
|
||||
#endif
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
if ((error = RCCE_put((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, comm->member[ROOT])))
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
|
||||
if (RCCE_IAM==comm->member[ROOT]) {
|
||||
// read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size),
|
||||
// we know all UEs have reached the barrier
|
||||
while (counter != comm->size) {
|
||||
// skip the first member (#0), because that is the ROOT
|
||||
for (counter=i=1; i<comm->size; i++) {
|
||||
/* copy flag values out of comm buffer */
|
||||
#ifndef USE_FLAG_EXPERIMENTAL
|
||||
if ((error = RCCE_get(valchar, (t_vcharp)(comm->gather[i]), RCCE_LINE_SIZE, RCCE_IAM)))
|
||||
#else
|
||||
if ((error = RCCE_get_flag(valchar, (t_vcharp)(comm->gather[i]), RCCE_LINE_SIZE, RCCE_IAM)))
|
||||
#endif
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
if (*val == *cycle) counter++;
|
||||
}
|
||||
}
|
||||
// set release flags
|
||||
for (i=1; i<comm->size; i++) {
|
||||
if ((error = RCCE_flag_write(&(comm->release), *cycle, comm->member[i])))
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
}
|
||||
}
|
||||
else {
|
||||
if ((error = RCCE_wait_until(comm->release, *cycle)))
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
}
|
||||
|
||||
#else // !USE_FAT_BARRIER
|
||||
|
||||
// flip local barrier variable
|
||||
#ifndef USE_FLAG_EXPERIMENTAL
|
||||
if (error = RCCE_get(cyclechar, (t_vcharp)(comm->gather), RCCE_LINE_SIZE, RCCE_IAM))
|
||||
#else
|
||||
if (error = RCCE_get_flag(cyclechar, (t_vcharp)(comm->gather), RCCE_LINE_SIZE, RCCE_IAM))
|
||||
#endif
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
*cycle = !(*cycle);
|
||||
#ifndef USE_FLAG_EXPERIMENTAL
|
||||
if (error = RCCE_put((t_vcharp)(comm->gather), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))
|
||||
#else
|
||||
if (error = RCCE_put_flag((t_vcharp)(comm->gather), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))
|
||||
#endif
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
|
||||
if (RCCE_IAM==comm->member[ROOT]) {
|
||||
// read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size),
|
||||
// we know all UEs have reached the barrier
|
||||
while (counter != comm->size) {
|
||||
// skip the first member (#0), because that is the ROOT
|
||||
for (counter=i=1; i<comm->size; i++) {
|
||||
/* copy flag values out of comm buffer */
|
||||
#ifndef USE_FLAG_EXPERIMENTAL
|
||||
if (error = RCCE_get(valchar, (t_vcharp)(comm->gather), RCCE_LINE_SIZE,
|
||||
comm->member[i]))
|
||||
#else
|
||||
if (error = RCCE_get_flag(valchar, (t_vcharp)(comm->gather), RCCE_LINE_SIZE,
|
||||
comm->member[i]))
|
||||
#endif
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
if (*val == *cycle) counter++;
|
||||
}
|
||||
}
|
||||
// set release flags
|
||||
for (i=1; i<comm->size; i++) {
|
||||
if (error = RCCE_flag_write(&(comm->release), *cycle, comm->member[i]))
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (error = RCCE_wait_until(comm->release, *cycle)) {
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
}
|
||||
}
|
||||
|
||||
#endif // !USE_FAT_BARRIER
|
||||
if (RCCE_debug_synch) fprintf(STDERR,"UE %d has cleared barrier\n", RCCE_IAM);
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: RCCE_nb_barrier
|
||||
//--------------------------------------------------------------------------------------
|
||||
// non-blocking version of the linear barrier
|
||||
//--------------------------------------------------------------------------------------
|
||||
int RCCE_nb_barrier(RCCE_COMM *comm) {
|
||||
|
||||
int i, error;
|
||||
int ROOT = 0;
|
||||
volatile unsigned char cyclechar[RCCE_LINE_SIZE];
|
||||
volatile unsigned char valchar[RCCE_LINE_SIZE];
|
||||
#ifdef USE_FLAG_EXPERIMENTAL
|
||||
volatile char *cycle;
|
||||
volatile char *val;
|
||||
cycle = (volatile char *)cyclechar;
|
||||
val = (volatile char *)valchar;
|
||||
#else
|
||||
volatile int *cycle;
|
||||
volatile int *val;
|
||||
cycle = (volatile int *)cyclechar;
|
||||
val = (volatile int *)valchar;
|
||||
#endif
|
||||
|
||||
if(comm->label == 1) goto label1;
|
||||
if(comm->label == 2) goto label2;
|
||||
|
||||
comm->count = 0;
|
||||
|
||||
if (RCCE_debug_synch)
|
||||
fprintf(STDERR,"UE %d has checked into barrier\n", RCCE_IAM);
|
||||
|
||||
#ifdef USE_FAT_BARRIER
|
||||
|
||||
// flip local barrier variable
|
||||
#ifndef USE_FLAG_EXPERIMENTAL
|
||||
if ((error = RCCE_get(cyclechar, (t_vcharp)(comm->gather[RCCE_IAM]), RCCE_LINE_SIZE, RCCE_IAM)))
|
||||
#else
|
||||
if ((error = RCCE_get_flag(cyclechar, (t_vcharp)(comm->gather[RCCE_IAM]), RCCE_LINE_SIZE, RCCE_IAM)))
|
||||
#endif
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
*cycle = !(*cycle);
|
||||
#ifndef USE_FLAG_EXPERIMENTAL
|
||||
if ((error = RCCE_put((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM)))
|
||||
#else
|
||||
if ((error = RCCE_put_flag((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM)))
|
||||
#endif
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
if ((error = RCCE_put((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, comm->member[ROOT])))
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
|
||||
if (RCCE_IAM==comm->member[ROOT]) {
|
||||
// read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size),
|
||||
// we know all UEs have reached the barrier
|
||||
comm->cycle = *cycle;
|
||||
label1:
|
||||
while (comm->count != comm->size) {
|
||||
// skip the first member (#0), because that is the ROOT
|
||||
for (comm->count=i=1; i<comm->size; i++) {
|
||||
/* copy flag values out of comm buffer */
|
||||
#ifndef USE_FLAG_EXPERIMENTAL
|
||||
if ((error = RCCE_get(valchar, (t_vcharp)(comm->gather[i]), RCCE_LINE_SIZE, RCCE_IAM)))
|
||||
#else
|
||||
if ((error = RCCE_get_flag(valchar, (t_vcharp)(comm->gather[i]), RCCE_LINE_SIZE, RCCE_IAM)))
|
||||
#endif
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
if (*val == comm->cycle) comm->count++;
|
||||
}
|
||||
if(comm->count != comm->size) {
|
||||
comm->label = 1;
|
||||
return(RCCE_PENDING);
|
||||
}
|
||||
}
|
||||
// set release flags
|
||||
for (i=1; i<comm->size; i++) {
|
||||
if ((error = RCCE_flag_write(&(comm->release), comm->cycle, comm->member[i])))
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
}
|
||||
}
|
||||
else {
|
||||
int test;
|
||||
comm->cycle = *cycle;
|
||||
label2:
|
||||
RCCE_test_flag(comm->release, comm->cycle, &test);
|
||||
if(!test) {
|
||||
comm->label = 2;
|
||||
return(RCCE_PENDING);
|
||||
}
|
||||
}
|
||||
|
||||
comm->label = 0;
|
||||
|
||||
#else // !USE_FAT_BARRIER
|
||||
|
||||
// flip local barrier variable
|
||||
#ifndef USE_FLAG_EXPERIMENTAL
|
||||
if (error = RCCE_get(cyclechar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE, RCCE_IAM))
|
||||
#else
|
||||
if (error = RCCE_get_flag(cyclechar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE, RCCE_IAM))
|
||||
#endif
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
*cycle = !(*cycle);
|
||||
#ifndef USE_FLAG_EXPERIMENTAL
|
||||
if (error = RCCE_put((t_vcharp)(comm->gather[0]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))
|
||||
#else
|
||||
if (error = RCCE_put_flag((t_vcharp)(comm->gather[0]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))
|
||||
#endif
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
|
||||
if (RCCE_IAM==comm->member[ROOT]) {
|
||||
// read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size),
|
||||
// we know all UEs have reached the barrier
|
||||
comm->cycle = *cycle;
|
||||
label1:
|
||||
while (comm->count != comm->size) {
|
||||
// skip the first member (#0), because that is the ROOT
|
||||
for (comm->count=i=1; i<comm->size; i++) {
|
||||
/* copy flag values out of comm buffer */
|
||||
#ifndef USE_FLAG_EXPERIMENTAL
|
||||
if (error = RCCE_get(valchar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE,
|
||||
comm->member[i]))
|
||||
#else
|
||||
if (error = RCCE_get_flag(valchar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE,
|
||||
comm->member[i]))
|
||||
#endif
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
if (*val == comm->cycle) comm->count++;
|
||||
}
|
||||
if(comm->count != comm->size) {
|
||||
comm->label = 1;
|
||||
return(RCCE_PENDING);
|
||||
}
|
||||
}
|
||||
// set release flags
|
||||
for (i=1; i<comm->size; i++) {
|
||||
if (error = RCCE_flag_write(&(comm->release), comm->cycle, comm->member[i]))
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
}
|
||||
}
|
||||
else {
|
||||
int test;
|
||||
comm->cycle = *cycle;
|
||||
label2:
|
||||
RCCE_test_flag(comm->release, comm->cycle, &test);
|
||||
if(!test) {
|
||||
comm->label = 2;
|
||||
return(RCCE_PENDING);
|
||||
}
|
||||
}
|
||||
|
||||
comm->label = 0;
|
||||
|
||||
#endif // !USE_FAT_BARRIER
|
||||
if (RCCE_debug_synch) fprintf(STDERR,"UE %d has cleared barrier\n", RCCE_IAM);
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void RCCE_fence() {
|
||||
return;
|
||||
}
|
||||
|
||||
#endif
|
290
hermit/usr/ircce/iRCCE.h
Normal file
290
hermit/usr/ircce/iRCCE.h
Normal file
|
@ -0,0 +1,290 @@
|
|||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// [2010-10-25] added support for non-blocking send/recv operations
|
||||
// - iRCCE_isend(), ..._test(), ..._wait(), ..._push()
|
||||
// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push()
|
||||
// by Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
// [2010-11-12] extracted non-blocking code into separate library
|
||||
// by Carsten Scholtes, University of Bayreuth
|
||||
//
|
||||
// [2010-12-09] added functions for a convenient handling of multiple
|
||||
// pending non-blocking requests
|
||||
// by Jacek Galowicz, Chair for Operating Systems
|
||||
// RWTH Aachen University
|
||||
//
|
||||
// [2011-04-19] added wildcard mechanism (iRCCE_ANY_SOURCE) for receiving
|
||||
// a message from an arbitrary remote rank
|
||||
// by Simon Pickartz, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
// [2011-06-16] iRCCE_ANY_LENGTH wildcard mechanism can only be used in
|
||||
// the SINGLEBITFLAGS=0 case (-> bigflags must be enabled!)
|
||||
//
|
||||
// [2011-06-27] merged iRCCE_ANY_SOURCE branch with trunk (iRCCE_ANY_LENGTH)
|
||||
//
|
||||
// [2011-11-03] - renamed blocking (pipelined) send/recv functions to
|
||||
// iRCCE_ssend() / iRCCE_srecv() (strictly synchronous!)
|
||||
// - added non-blocking by synchronous send/recv functions:
|
||||
// iRCCE_issend() / iRCCE_isrecv()
|
||||
// by Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
// [2012-10-29] - added functions for handling "Tagged Flags"
|
||||
// iRCCE_flag_read/write_tagged(), iRCCE_test/wait_tagged()
|
||||
// by Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
|
||||
#ifndef IRCCE_H
|
||||
#define IRCCE_H
|
||||
|
||||
#include "RCCE.h"
|
||||
|
||||
#define iRCCE_VERSION "2.0"
|
||||
#define iRCCE_FLAIR
|
||||
|
||||
#define iRCCE_SUCCESS RCCE_SUCCESS
|
||||
#define iRCCE_ERROR -1
|
||||
#define iRCCE_PENDING -2
|
||||
#define iRCCE_RESERVED -3
|
||||
#define iRCCE_NOT_ENQUEUED -4
|
||||
|
||||
#if !defined(SINGLEBITFLAGS) && !defined(RCCE_VERSION)
|
||||
#define _iRCCE_ANY_LENGTH_
|
||||
extern const int iRCCE_ANY_LENGTH;
|
||||
#endif
|
||||
|
||||
#if !defined(SINGLEBITFLAGS)
|
||||
#ifdef _OPENMP
|
||||
#define iRCCE_MAX_TAGGED_LEN (RCCE_LINE_SIZE - 2 * sizeof(int))
|
||||
#else
|
||||
#define iRCCE_MAX_TAGGED_LEN (RCCE_LINE_SIZE - sizeof(int))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
extern const int iRCCE_ANY_SOURCE;
|
||||
|
||||
typedef struct _iRCCE_SEND_REQUEST {
|
||||
char *privbuf; // source buffer in local private memory (send buffer)
|
||||
t_vcharp combuf; // intermediate buffer in MPB
|
||||
size_t chunk; // size of MPB available for this message (bytes)
|
||||
size_t subchunk1; // sub-chunks for the pipelined message transfe
|
||||
size_t subchunk2;
|
||||
RCCE_FLAG *ready; // flag indicating whether receiver is ready
|
||||
RCCE_FLAG *sent; // flag indicating whether message has been sent by source
|
||||
RCCE_FLAG_STATUS flag_set_value; // used for iRCCE_ANY_LENGTH wildcard
|
||||
size_t size; // size of message (bytes)
|
||||
int dest; // UE that will receive the message
|
||||
int sync; // flag indicating whether send is synchronous or not
|
||||
|
||||
size_t wsize; // offset within send buffer when putting in "chunk" bytes
|
||||
size_t remainder; // bytes remaining to be sent
|
||||
size_t nbytes; // number of bytes to be sent in single RCCE_put call
|
||||
char *bufptr; // running pointer inside privbuf for current location
|
||||
|
||||
int label; // jump/goto label for the reentrance of the respective poll function
|
||||
int finished; // flag that indicates whether the request has already been finished
|
||||
|
||||
struct _iRCCE_SEND_REQUEST *next;
|
||||
} iRCCE_SEND_REQUEST;
|
||||
|
||||
|
||||
typedef struct _iRCCE_RECV_REQUEST {
|
||||
char *privbuf; // source buffer in local private memory (send buffer)
|
||||
t_vcharp combuf; // intermediate buffer in MPB
|
||||
size_t chunk; // size of MPB available for this message (bytes)
|
||||
size_t subchunk1; // sub-chunks for the pipelined message transfe
|
||||
size_t subchunk2;
|
||||
RCCE_FLAG *ready; // flag indicating whether receiver is ready
|
||||
RCCE_FLAG *sent; // flag indicating whether message has been sent by source
|
||||
RCCE_FLAG_STATUS flag_set_value; // used for iRCCE_ANY_LENGTH wildcard
|
||||
size_t size; // size of message (bytes)
|
||||
int source; // UE that will send the message
|
||||
int sync; // flag indicating whether recv is synchronous or not
|
||||
|
||||
size_t wsize; // offset within send buffer when putting in "chunk" bytes
|
||||
size_t remainder; // bytes remaining to be sent
|
||||
size_t nbytes; // number of bytes to be sent in single RCCE_put call
|
||||
char *bufptr; // running pointer inside privbuf for current location
|
||||
|
||||
int label; // jump/goto label for the reentrance of the respective poll function
|
||||
int finished; // flag that indicates whether the request has already been finished
|
||||
int started; // flag that indicates whether message parts have already been received
|
||||
|
||||
struct _iRCCE_RECV_REQUEST *next;
|
||||
} iRCCE_RECV_REQUEST;
|
||||
|
||||
#define iRCCE_WAIT_LIST_RECV_TYPE 0
|
||||
#define iRCCE_WAIT_LIST_SEND_TYPE 1
|
||||
|
||||
typedef struct _iRCCE_WAIT_LISTELEM {
|
||||
int type;
|
||||
struct _iRCCE_WAIT_LISTELEM * next;
|
||||
void * req;
|
||||
} iRCCE_WAIT_LISTELEM;
|
||||
|
||||
typedef struct _iRCCE_WAIT_LIST {
|
||||
iRCCE_WAIT_LISTELEM * first;
|
||||
iRCCE_WAIT_LISTELEM * last;
|
||||
} iRCCE_WAIT_LIST;
|
||||
|
||||
#ifdef AIR
|
||||
typedef volatile struct _iRCCE_AIR {
|
||||
#ifndef _OPENMP
|
||||
int * counter;
|
||||
int * init;
|
||||
#else
|
||||
int counter;
|
||||
int init;
|
||||
#endif
|
||||
} iRCCE_AIR;
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
//
|
||||
// THE iRCCE API:
|
||||
//
|
||||
// Initialize function:
|
||||
int iRCCE_init(void);
|
||||
//
|
||||
// Non-blocking send/recv functions:
|
||||
int iRCCE_isend(char *, ssize_t, int, iRCCE_SEND_REQUEST *);
|
||||
int iRCCE_isend_test(iRCCE_SEND_REQUEST *, int *);
|
||||
int iRCCE_isend_wait(iRCCE_SEND_REQUEST *);
|
||||
int iRCCE_isend_push(void);
|
||||
int iRCCE_irecv(char *, ssize_t, int, iRCCE_RECV_REQUEST *);
|
||||
int iRCCE_irecv_test(iRCCE_RECV_REQUEST *, int *);
|
||||
int iRCCE_irecv_wait(iRCCE_RECV_REQUEST *);
|
||||
int iRCCE_irecv_push(void);
|
||||
//
|
||||
// Pipelined send/recv functions: (syncronous and blocking)
|
||||
int iRCCE_ssend(char *, ssize_t, int);
|
||||
int iRCCE_srecv(char *, ssize_t, int);
|
||||
int iRCCE_srecv_test(char *, ssize_t, int, int*);
|
||||
//
|
||||
// Non-blocking pipelined send/recv functions:
|
||||
int iRCCE_issend(char *, ssize_t, int, iRCCE_SEND_REQUEST *);
|
||||
int iRCCE_isrecv(char *, ssize_t, int, iRCCE_RECV_REQUEST *);
|
||||
//
|
||||
// SCC-customized put/get and memcpy functions:
|
||||
int iRCCE_put(t_vcharp, t_vcharp, int, int);
|
||||
int iRCCE_get(t_vcharp, t_vcharp, int, int);
|
||||
void* iRCCE_memcpy_put(void*, const void*, size_t);
|
||||
void* iRCCE_memcpy_get(void*, const void*, size_t);
|
||||
t_vcharp iRCCE_malloc(size_t);
|
||||
#define iRCCE_memcpy iRCCE_memcpy_put
|
||||
//
|
||||
// Blocking and non-blocking 'probe' functions for incommimg messages:
|
||||
int iRCCE_probe(int, int*);
|
||||
int iRCCE_iprobe(int, int*, int*);
|
||||
//
|
||||
// Wait/test-all/any functions:
|
||||
void iRCCE_init_wait_list(iRCCE_WAIT_LIST*);
|
||||
void iRCCE_add_to_wait_list(iRCCE_WAIT_LIST*, iRCCE_SEND_REQUEST *, iRCCE_RECV_REQUEST *);
|
||||
int iRCCE_test_all(iRCCE_WAIT_LIST*, int *);
|
||||
int iRCCE_wait_all(iRCCE_WAIT_LIST*);
|
||||
int iRCCE_test_any(iRCCE_WAIT_LIST*, iRCCE_SEND_REQUEST **, iRCCE_RECV_REQUEST **);
|
||||
int iRCCE_wait_any(iRCCE_WAIT_LIST*, iRCCE_SEND_REQUEST **, iRCCE_RECV_REQUEST **);
|
||||
//
|
||||
// Query functions for request handle parameters:
|
||||
int iRCCE_get_dest(iRCCE_SEND_REQUEST*);
|
||||
int iRCCE_get_source(iRCCE_RECV_REQUEST*);
|
||||
int iRCCE_get_size(iRCCE_SEND_REQUEST*, iRCCE_RECV_REQUEST*);
|
||||
int iRCCE_get_length(void);
|
||||
//
|
||||
// Cancel functions for yet not started non-blocking requests:
|
||||
int iRCCE_isend_cancel(iRCCE_SEND_REQUEST *, int *);
|
||||
int iRCCE_irecv_cancel(iRCCE_RECV_REQUEST *, int *);
|
||||
//
|
||||
// Functions for handling tagged flags: (need whole cache line per flag)
|
||||
#ifndef SINGLEBITFLAGS
|
||||
int iRCCE_flag_alloc_tagged(RCCE_FLAG *);
|
||||
int iRCCE_flag_write_tagged(RCCE_FLAG *, RCCE_FLAG_STATUS, int, void *, int);
|
||||
int iRCCE_flag_read_tagged(RCCE_FLAG, RCCE_FLAG_STATUS *, int, void *, int);
|
||||
int iRCCE_wait_tagged(RCCE_FLAG, RCCE_FLAG_STATUS, void *, int);
|
||||
int iRCCE_test_tagged(RCCE_FLAG, RCCE_FLAG_STATUS, int *, void *, int);
|
||||
int iRCCE_get_max_tagged_len(void);
|
||||
#endif
|
||||
//
|
||||
// Functions for handling Atomic Increment Registers (AIR):
|
||||
#ifdef AIR
|
||||
int iRCCE_atomic_alloc(iRCCE_AIR **);
|
||||
int iRCCE_atomic_inc(iRCCE_AIR*, int*);
|
||||
int iRCCE_atomic_read(iRCCE_AIR*, int*);
|
||||
int iRCCE_atomic_write(iRCCE_AIR*, int);
|
||||
#endif
|
||||
//
|
||||
// Improved Collectives:
|
||||
int iRCCE_barrier(RCCE_COMM*);
|
||||
int iRCCE_bcast(char *, size_t, int, RCCE_COMM);
|
||||
int iRCCE_mcast(char *, size_t, int);
|
||||
int iRCCE_msend(char *, ssize_t);
|
||||
int iRCCE_mrecv(char *, ssize_t, int);
|
||||
//
|
||||
// Functions form the GORY RCCE interface mapped to iRCCE:
|
||||
t_vcharp iRCCE_malloc(size_t);
|
||||
int iRCCE_flag_alloc(RCCE_FLAG *);
|
||||
int iRCCE_flag_write(RCCE_FLAG *, RCCE_FLAG_STATUS, int);
|
||||
int iRCCE_flag_read(RCCE_FLAG, RCCE_FLAG_STATUS *, int);
|
||||
int iRCCE_wait_until(RCCE_FLAG, RCCE_FLAG_STATUS);
|
||||
//
|
||||
// Please Note: Since we're running in NON-GORY mode, there are no "free()" functions!
|
||||
//
|
||||
///////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Just for convenience:
|
||||
#if 1
|
||||
#define RCCE_isend iRCCE_isend
|
||||
#define RCCE_isend_test iRCCE_isend_test
|
||||
#define RCCE_isend_wait iRCCE_isend_wait
|
||||
#define RCCE_isend_push iRCCE_isend_push
|
||||
#define RCCE_irecv iRCCE_irecv
|
||||
#define RCCE_irecv_test iRCCE_irecv_test
|
||||
#define RCCE_irecv_wait iRCCE_irecv_wait
|
||||
#define RCCE_irecv_push iRCCE_irecv_push
|
||||
#define RCCE_SEND_REQUEST iRCCE_SEND_REQUEST
|
||||
#define RCCE_RECV_REQUEST iRCCE_RECV_REQUEST
|
||||
#ifdef _iRCCE_TAGGED_FLAGS_
|
||||
#define RCCE_flag_write_tagged iRCCE_flag_write_tagged
|
||||
#define RCCE_flag_read_tagged iRCCE_flag_read_tagged
|
||||
#define RCCE_wait_tagged iRCCE_wait_tagged
|
||||
#define RCCE_test_tagged iRCCE_test_tagged
|
||||
#define RCCE_flag_alloc_tagged iRCCE_flag_alloc_tagged
|
||||
#define RCCE_flag_free_tagged iRCCE_flag_free_tagged
|
||||
#endif
|
||||
#endif
|
||||
//
|
||||
#if 1
|
||||
#define iRCCE_send iRCCE_ssend
|
||||
#define iRCCE_recv iRCCE_srecv
|
||||
#define iRCCE_recv_test iRCCE_srecv_test
|
||||
#endif
|
||||
//
|
||||
#if 1
|
||||
#define iRCCE_issend_test iRCCE_isend_test
|
||||
#define iRCCE_issend_wait iRCCE_isend_wait
|
||||
#define iRCCE_issend_push iRCCE_isend_push
|
||||
#define iRCCE_isrecv_test iRCCE_irecv_test
|
||||
#define iRCCE_isrecv_wait iRCCE_irecv_wait
|
||||
#define iRCCE_isrecv_push iRCCE_irecv_push
|
||||
#endif
|
||||
//
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
#endif
|
||||
|
195
hermit/usr/ircce/iRCCE_admin.c
Normal file
195
hermit/usr/ircce/iRCCE_admin.c
Normal file
|
@ -0,0 +1,195 @@
|
|||
//***************************************************************************************
|
||||
// Administrative routines.
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Author: Rob F. Van der Wijngaart
|
||||
// Intel Corporation
|
||||
// Date: 008/30/2010
|
||||
//
|
||||
//***************************************************************************************
|
||||
//
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// [2010-10-25] added support for non-blocking send/recv operations
|
||||
// - iRCCE_isend(), ..._test(), ..._wait(), ..._push()
|
||||
// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push()
|
||||
// by Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
// [2010-11-12] extracted non-blocking code into separate library
|
||||
// by Carsten Scholtes
|
||||
//
|
||||
// [2011-02-21] added support for multiple incoming queues
|
||||
// (one recv queue per remote rank)
|
||||
//
|
||||
// [2011-04-19] added wildcard mechanism (iRCCE_ANY_SOURCE) for receiving
|
||||
// a message from an arbitrary remote rank
|
||||
// by Simon Pickartz, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
// [2011-06-27] merged iRCCE_ANY_SOURCE branch with trunk (iRCCE_ANY_LENGTH)
|
||||
//
|
||||
|
||||
#include "RCCE.h"
|
||||
#if defined(SCC) && !defined(__hermit__)
|
||||
#include "SCC_API.h"
|
||||
#endif
|
||||
#include "iRCCE_lib.h"
|
||||
|
||||
// send request queue
|
||||
iRCCE_SEND_REQUEST* iRCCE_isend_queue;
|
||||
// recv request queue
|
||||
iRCCE_RECV_REQUEST* iRCCE_irecv_queue[RCCE_MAXNP];
|
||||
|
||||
// recv request queue for those with source = iRCCE_ANY_SOURCE
|
||||
iRCCE_RECV_REQUEST* iRCCE_irecv_any_source_queue;
|
||||
|
||||
// global variables for for inquiring recent source rank and recent message length
|
||||
int iRCCE_recent_source = -1;
|
||||
int iRCCE_recent_length = 0;
|
||||
|
||||
#ifdef _iRCCE_ANY_LENGTH_
|
||||
const int iRCCE_ANY_LENGTH = -1 >> 1;
|
||||
#endif
|
||||
|
||||
const int iRCCE_ANY_SOURCE = -1;
|
||||
|
||||
#ifdef AIR
|
||||
iRCCE_AIR iRCCE_atomic_inc_regs[2*RCCE_MAXNP];
|
||||
int iRCCE_atomic_alloc_counter = 0;
|
||||
iRCCE_AIR* iRCCE_atomic_barrier[2];
|
||||
#endif
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_init
|
||||
//--------------------------------------------------------------------------------------
|
||||
// initialize the library
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
#ifdef AIR
|
||||
#ifndef _OPENMP
|
||||
int * air_base = (int *) MallocConfigReg(FPGA_BASE + 0xE000);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
for(i=0; i<RCCE_MAXNP; i++) {
|
||||
iRCCE_irecv_queue[i] = NULL;
|
||||
}
|
||||
|
||||
iRCCE_isend_queue = NULL;
|
||||
|
||||
iRCCE_irecv_any_source_queue = NULL;
|
||||
|
||||
#ifdef AIR
|
||||
#ifndef _OPENMP
|
||||
// Assign and Initialize First Set of Atomic Increment Registers
|
||||
for (i = 0; i < RCCE_MAXNP; i++)
|
||||
{
|
||||
iRCCE_atomic_inc_regs[i].counter = air_base + 2*i;
|
||||
iRCCE_atomic_inc_regs[i].init = air_base + 2*i + 1;
|
||||
if(RCCE_IAM == 0)
|
||||
*iRCCE_atomic_inc_regs[i].init = 0;
|
||||
}
|
||||
// Assign and Initialize Second Set of Atomic Increment Registers
|
||||
air_base = (int *) MallocConfigReg(FPGA_BASE + 0xF000);
|
||||
for (i = 0; i < RCCE_MAXNP; i++)
|
||||
{
|
||||
iRCCE_atomic_inc_regs[RCCE_MAXNP+i].counter = air_base + 2*i;
|
||||
iRCCE_atomic_inc_regs[RCCE_MAXNP+i].init = air_base + 2*i + 1;
|
||||
if(RCCE_IAM == 0)
|
||||
*iRCCE_atomic_inc_regs[RCCE_MAXNP+i].init = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
// We need two AIRs for iRCCE_barrier();
|
||||
iRCCE_atomic_alloc(&iRCCE_atomic_barrier[0]);
|
||||
iRCCE_atomic_alloc(&iRCCE_atomic_barrier[1]);
|
||||
#endif
|
||||
|
||||
RCCE_barrier(&RCCE_COMM_WORLD);
|
||||
|
||||
return (iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// Functions form the GORY RCCE interface:
|
||||
//--------------------------------------------------------------------------------------
|
||||
// ... (more or less) just wrapped by respective iRCCE functions
|
||||
//--------------------------------------------------------------------------------------
|
||||
|
||||
t_vcharp iRCCE_malloc(size_t size)
|
||||
{
|
||||
t_vcharp result;
|
||||
int count;
|
||||
|
||||
// new flag takes exactly one cache line, whether using single bit flags or not
|
||||
if (size % RCCE_LINE_SIZE != 0) return NULL;
|
||||
|
||||
// if chunk size becomes zero, we have allocated too many flags
|
||||
if (size > RCCE_chunk) return NULL;
|
||||
|
||||
result = RCCE_flags_start;
|
||||
|
||||
// reduce maximum size of message payload chunk
|
||||
RCCE_chunk -= size;
|
||||
|
||||
// move running pointer to next available flags line
|
||||
RCCE_flags_start += size;
|
||||
|
||||
// move running pointer to new start of payload data area
|
||||
RCCE_buff_ptr += size;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int iRCCE_flag_alloc(RCCE_FLAG *flag)
|
||||
{
|
||||
#if !defined(SINGLEBITFLAGS)
|
||||
return iRCCE_flag_alloc_tagged(flag);
|
||||
#else
|
||||
return RCCE_flag_alloc(flag);
|
||||
#endif
|
||||
}
|
||||
|
||||
int iRCCE_flag_write(RCCE_FLAG *flag, RCCE_FLAG_STATUS val, int ID)
|
||||
{
|
||||
#if !defined(SINGLEBITFLAGS)
|
||||
return iRCCE_flag_write_tagged(flag, val, ID, NULL, 0);
|
||||
#else
|
||||
return RCCE_flag_write(flag, val, ID);
|
||||
#endif
|
||||
}
|
||||
|
||||
int iRCCE_flag_read(RCCE_FLAG flag, RCCE_FLAG_STATUS *val, int ID)
|
||||
{
|
||||
#if !defined(SINGLEBITFLAGS)
|
||||
return iRCCE_flag_read_tagged(flag, val, ID, NULL, 0);
|
||||
#else
|
||||
return RCCE_flag_read(flag, val, ID);
|
||||
#endif
|
||||
}
|
||||
|
||||
int iRCCE_wait_until(RCCE_FLAG flag, RCCE_FLAG_STATUS val)
|
||||
{
|
||||
#if !defined(SINGLEBITFLAGS)
|
||||
return iRCCE_wait_tagged(flag, val, NULL, 0);
|
||||
#else
|
||||
return iRCCE_wait_until(flag, val);
|
||||
#endif
|
||||
}
|
195
hermit/usr/ircce/iRCCE_atomic.c
Normal file
195
hermit/usr/ircce/iRCCE_atomic.c
Normal file
|
@ -0,0 +1,195 @@
|
|||
//***************************************************************************************
|
||||
// Functions for handling Atomic Increment Registers (AIR).
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Copyright 2012, Chair for Operating Systems, RWTH Aachen University
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
|
||||
#include "iRCCE_lib.h"
|
||||
|
||||
#ifdef AIR
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_atomic_alloc
|
||||
//--------------------------------------------------------------------------------------
|
||||
// Allocates a new AIR register; returns iRCCE_ERRO if all AIRs are already allocated
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_atomic_alloc(iRCCE_AIR** reg)
|
||||
{
|
||||
if(iRCCE_atomic_alloc_counter < 2 * RCCE_NP) {
|
||||
|
||||
int next_reg = RC_COREID[iRCCE_atomic_alloc_counter];
|
||||
|
||||
if(iRCCE_atomic_alloc_counter > RCCE_NP) next_reg += RCCE_MAXNP;
|
||||
|
||||
(*reg) = &iRCCE_atomic_inc_regs[next_reg];
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp master
|
||||
{
|
||||
iRCCE_atomic_alloc_counter++;
|
||||
}
|
||||
#pragma omp barrier
|
||||
#else
|
||||
iRCCE_atomic_alloc_counter++;
|
||||
#endif
|
||||
|
||||
iRCCE_atomic_write((*reg), 0);
|
||||
|
||||
return iRCCE_SUCCESS;
|
||||
}
|
||||
else {
|
||||
|
||||
return iRCCE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_atomic_inc
|
||||
//--------------------------------------------------------------------------------------
|
||||
// Increments an AIR register and returns its privious content
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_atomic_inc(iRCCE_AIR* reg, int* value)
|
||||
{
|
||||
int _value;
|
||||
if(value == NULL) value = &value;
|
||||
|
||||
#ifndef _OPENMP
|
||||
(*value) = (*reg->counter);
|
||||
#else
|
||||
#pragma omp critical
|
||||
{
|
||||
(*value) = reg->counter;
|
||||
reg->counter++;
|
||||
reg->init = reg->counter;
|
||||
}
|
||||
#endif
|
||||
|
||||
return iRCCE_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_atomic_read
|
||||
//--------------------------------------------------------------------------------------
|
||||
// Returns the current value of an AIR register
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_atomic_read(iRCCE_AIR* reg, int* value)
|
||||
{
|
||||
#ifndef _OPENMP
|
||||
(*value) = (*reg->init);
|
||||
#else
|
||||
#pragma omp critical
|
||||
{
|
||||
(*value) =reg->init;
|
||||
}
|
||||
#endif
|
||||
|
||||
return iRCCE_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_atomic_write
|
||||
//--------------------------------------------------------------------------------------
|
||||
// Initializes an AIR register by writing a start value
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_atomic_write(iRCCE_AIR* reg, int value)
|
||||
{
|
||||
#ifndef _OPENMP
|
||||
(*reg->init) = value;
|
||||
#else
|
||||
#pragma omp critical
|
||||
{
|
||||
reg->init = value;
|
||||
reg->counter = value;
|
||||
}
|
||||
#endif
|
||||
|
||||
return iRCCE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_barrier
|
||||
//--------------------------------------------------------------------------------------
|
||||
// A barrier version based on the Atomic Increment Registers (AIR); if AIRs are not
|
||||
// supported, the function makes a fall-back to the common RCCE_barrier().
|
||||
//--------------------------------------------------------------------------------------
|
||||
|
||||
static void RC_wait(int wait) {
|
||||
#ifndef _OPENMP
|
||||
asm volatile( "movl %%eax,%%ecx\n\t"
|
||||
"test:nop\n\t"
|
||||
"loop test"
|
||||
: /* no output registers */
|
||||
: "a" (wait)
|
||||
: "%ecx" );
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
static int idx = 0;
|
||||
static unsigned int rnd = 0;
|
||||
#ifdef _OPENMP
|
||||
#pragma omp threadprivate (idx, rnd)
|
||||
#endif
|
||||
|
||||
int iRCCE_barrier(RCCE_COMM *comm)
|
||||
{
|
||||
int backoff = BACKOFF_MIN, wait, i = 0;
|
||||
int counter;
|
||||
|
||||
if(comm == NULL) comm = &RCCE_COMM_WORLD;
|
||||
|
||||
if (comm == &RCCE_COMM_WORLD) {
|
||||
|
||||
iRCCE_atomic_inc(iRCCE_atomic_barrier[idx], &counter);
|
||||
if (counter < (comm->size-1))
|
||||
{
|
||||
iRCCE_atomic_read(iRCCE_atomic_barrier[idx], &counter);
|
||||
while (counter > 0)
|
||||
{
|
||||
rnd = rnd * 1103515245u + 12345u;
|
||||
wait = BACKOFF_MIN + (rnd % (backoff << i));
|
||||
RC_wait(wait);
|
||||
if (wait < BACKOFF_MAX) i++;
|
||||
|
||||
iRCCE_atomic_read(iRCCE_atomic_barrier[idx], &counter);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
iRCCE_atomic_write(iRCCE_atomic_barrier[idx], 0);
|
||||
}
|
||||
|
||||
idx = !idx;
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
else
|
||||
{
|
||||
return RCCE_barrier(comm);
|
||||
}
|
||||
}
|
||||
|
||||
#else // !AIR
|
||||
|
||||
int iRCCE_barrier(RCCE_COMM *comm)
|
||||
{
|
||||
if(comm == NULL) return RCCE_barrier(&RCCE_COMM_WORLD);
|
||||
else return RCCE_barrier(comm);
|
||||
}
|
||||
|
||||
#endif // !AIR
|
78
hermit/usr/ircce/iRCCE_get.c
Normal file
78
hermit/usr/ircce/iRCCE_get.c
Normal file
|
@ -0,0 +1,78 @@
|
|||
//***************************************************************************************
|
||||
// Get data from communication buffer.
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Author: Rob F. Van der Wijngaart
|
||||
// Intel Corporation
|
||||
// Date: 008/30/2010
|
||||
//
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// [2010-11-03] switched to SCC-optimized memcpy() functions in scc_memcpy.h:
|
||||
// - memcpy_to_mpb()
|
||||
// - memcpy_from_mpb()
|
||||
// by Stefan Lankes, Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
#include "iRCCE_lib.h"
|
||||
|
||||
#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__)
|
||||
#include "scc_memcpy.h"
|
||||
#endif
|
||||
|
||||
void* iRCCE_memcpy_get(void *dest, const void *src, size_t count)
|
||||
{
|
||||
#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__)
|
||||
return memcpy_from_mpb(dest, src, count);
|
||||
#else
|
||||
return memcpy(dest, src, count);
|
||||
#endif
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_get
|
||||
//--------------------------------------------------------------------------------------
|
||||
// copy data from address "source" in the remote MPB to address "target" in either the
|
||||
// local MPB, or in the calling UE's private memory. We do not test to see if a move
|
||||
// into the calling UE's private memory stays within allocated memory *
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_get(
|
||||
t_vcharp target, // target buffer, MPB or private memory
|
||||
t_vcharp source, // source buffer, MPB
|
||||
int num_bytes, // number of bytes to copy (must be multiple of cache line size
|
||||
int ID // rank of source UE
|
||||
) {
|
||||
|
||||
// in non-GORY mode we only need to retain the MPB source shift; we
|
||||
// already know the source is in the MPB, not private memory
|
||||
source = RCCE_comm_buffer[ID]+(source-RCCE_comm_buffer[RCCE_IAM]);
|
||||
|
||||
// do the actual copy, making sure we copy fresh data
|
||||
#ifdef _OPENMP
|
||||
#pragma omp flush
|
||||
#endif
|
||||
RC_cache_invalidate();
|
||||
|
||||
iRCCE_memcpy_get((void *)target, (void *)source, num_bytes);
|
||||
|
||||
// flush data to make sure it is visible to all threads; cannot use a flush list
|
||||
// because it concerns malloced space
|
||||
#ifdef _OPENMP
|
||||
#pragma omp flush
|
||||
#endif
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
709
hermit/usr/ircce/iRCCE_irecv.c
Normal file
709
hermit/usr/ircce/iRCCE_irecv.c
Normal file
|
@ -0,0 +1,709 @@
|
|||
//***************************************************************************************
|
||||
// Synchronized receive routines.
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Author: Rob F. Van der Wijngaart
|
||||
// Intel Corporation
|
||||
// Date: 008/30/2010
|
||||
//
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// [2010-10-25] added support for non-blocking send/recv operations
|
||||
// - iRCCE_isend(), ..._test(), ..._wait(), ..._push()
|
||||
// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push()
|
||||
// by Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
// [2010-11-12] extracted non-blocking code into separate library
|
||||
// by Carsten Scholtes
|
||||
//
|
||||
// [2010-12-09] added cancel functions for non-blocking send/recv requests
|
||||
// by Carsten Clauss
|
||||
//
|
||||
// [2011-02-21] added support for multiple incoming queues
|
||||
// (one recv queue per remote rank)
|
||||
//
|
||||
// [2011-04-19] added wildcard mechanism (iRCCE_ANY_SOURCE) for receiving
|
||||
// a message from an arbitrary remote rank
|
||||
// by Simon Pickartz, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
// [2011-06-27] merged iRCCE_ANY_SOURCE branch with trunk (iRCCE_ANY_LENGTH)
|
||||
//
|
||||
// [2011-08-02] added iRCCE_iprobe() function for probing for incomming messages
|
||||
//
|
||||
// [2011-11-03] added non-blocking by synchronous send/recv functions:
|
||||
// iRCCE_issend() / iRCCE_isrecv()
|
||||
//
|
||||
|
||||
#include "iRCCE_lib.h"
|
||||
|
||||
#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__)
|
||||
#include "scc_memcpy.h"
|
||||
#else
|
||||
#define memcpy_scc memcpy
|
||||
#endif
|
||||
|
||||
#ifdef SINGLEBITFLAGS
|
||||
#warning iRCCE_ANY_LENGTH: for using this wildcard, SINGLEBITFLAGS must be disabled! (make SINGLEBITFLAGS=0)
|
||||
#endif
|
||||
|
||||
#ifdef RCCE_VERSION
|
||||
#warning iRCCE_ANY_LENGTH: for using this wildcard, iRCCE must be built against RCCE release V1.0.13!
|
||||
#endif
|
||||
|
||||
static int iRCCE_push_recv_request(iRCCE_RECV_REQUEST *request) {
|
||||
|
||||
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
|
||||
int test; // flag for calling iRCCE_test_flag()
|
||||
|
||||
if(request->finished) return(iRCCE_SUCCESS);
|
||||
|
||||
if(request->sync) return iRCCE_push_srecv_request(request);
|
||||
|
||||
if(request->label == 1) goto label1;
|
||||
if(request->label == 2) goto label2;
|
||||
if(request->label == 3) goto label3;
|
||||
|
||||
#ifdef _iRCCE_ANY_LENGTH_
|
||||
RCCE_flag_read(*(request->sent), &(request->flag_set_value), RCCE_IAM);
|
||||
if(request->flag_set_value == 0) {
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
request->size = (size_t)request->flag_set_value;
|
||||
#endif
|
||||
|
||||
// receive data in units of available chunk size of MPB
|
||||
for (; request->wsize < (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) {
|
||||
request->bufptr = request->privbuf + request->wsize;
|
||||
request->nbytes = request->chunk;
|
||||
label1:
|
||||
iRCCE_test_flag(*(request->sent), request->flag_set_value, &test);
|
||||
if(!test) {
|
||||
request->label = 1;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
request->started = 1;
|
||||
|
||||
RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
// copy data from source's MPB space to private memory
|
||||
iRCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, request->source);
|
||||
|
||||
// tell the source I have moved data out of its comm buffer
|
||||
RCCE_flag_write(request->ready, request->flag_set_value, request->source);
|
||||
}
|
||||
|
||||
request->remainder = request->size % request->chunk;
|
||||
// if nothing is left over, we are done
|
||||
if (!request->remainder) {
|
||||
if(iRCCE_recent_source != request->source) iRCCE_recent_source = request->source;
|
||||
if(iRCCE_recent_length != request->size) iRCCE_recent_length = request->size;
|
||||
request->finished = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
// receive remainder of data--whole cache lines
|
||||
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk;
|
||||
request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE;
|
||||
if (request->nbytes) {
|
||||
label2:
|
||||
iRCCE_test_flag(*(request->sent), request->flag_set_value, &test);
|
||||
if(!test) {
|
||||
request->label = 2;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
request->started = 1;
|
||||
|
||||
RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
// copy data from source's MPB space to private memory
|
||||
iRCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, request->source);
|
||||
|
||||
// tell the source I have moved data out of its comm buffer
|
||||
RCCE_flag_write(request->ready, request->flag_set_value, request->source);
|
||||
}
|
||||
|
||||
request->remainder = request->size % request->chunk;
|
||||
request->remainder = request->remainder % RCCE_LINE_SIZE;
|
||||
if (!request->remainder) {
|
||||
if(iRCCE_recent_source != request->source) iRCCE_recent_source = request->source;
|
||||
if(iRCCE_recent_length != request->size) iRCCE_recent_length = request->size;
|
||||
request->finished = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
// remainder is less than cache line. This must be copied into appropriately sized
|
||||
// intermediate space before exact number of bytes get copied to the final destination
|
||||
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes;
|
||||
request->nbytes = RCCE_LINE_SIZE;
|
||||
label3:
|
||||
iRCCE_test_flag(*(request->sent), request->flag_set_value, &test);
|
||||
if(!test) {
|
||||
request->label = 3;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
request->started = 1;
|
||||
|
||||
RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
// copy data from source's MPB space to private memory
|
||||
iRCCE_get((t_vcharp)padline, request->combuf, request->nbytes, request->source);
|
||||
memcpy_scc(request->bufptr,padline,request->remainder);
|
||||
|
||||
// tell the source I have moved data out of its comm buffer
|
||||
RCCE_flag_write(request->ready, request->flag_set_value, request->source);
|
||||
|
||||
if(iRCCE_recent_source != request->source) iRCCE_recent_source = request->source;
|
||||
if(iRCCE_recent_length != request->size) iRCCE_recent_length = request->size;
|
||||
request->finished = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
static void iRCCE_init_recv_request(
|
||||
char *privbuf, // source buffer in local private memory (send buffer)
|
||||
t_vcharp combuf, // intermediate buffer in MPB
|
||||
size_t chunk, // size of MPB available for this message (bytes)
|
||||
RCCE_FLAG *ready, // flag indicating whether receiver is ready
|
||||
RCCE_FLAG *sent, // flag indicating whether message has been sent by source
|
||||
size_t size, // size of message (bytes)
|
||||
int source, // UE that will send the message
|
||||
int sync, // flag indicating whether recv is synchronous or not
|
||||
iRCCE_RECV_REQUEST *request
|
||||
) {
|
||||
|
||||
request->privbuf = privbuf;
|
||||
request->combuf = combuf;
|
||||
request->chunk = chunk;
|
||||
request->ready = ready;
|
||||
request->sent = sent;
|
||||
request->size = size;
|
||||
request->source = source;
|
||||
|
||||
request->sync = sync;
|
||||
request->subchunk1 = chunk / 2;
|
||||
request->subchunk1 = ( (chunk / 2) / RCCE_LINE_SIZE ) * RCCE_LINE_SIZE;
|
||||
request->subchunk2 = chunk - request->subchunk1;
|
||||
|
||||
request->wsize = 0;
|
||||
request->remainder = 0;
|
||||
request->nbytes = 0;
|
||||
request->bufptr = NULL;
|
||||
|
||||
request->label = 0;
|
||||
request->finished = 0;
|
||||
request->started = 0;
|
||||
|
||||
request->next = NULL;
|
||||
|
||||
#ifndef _iRCCE_ANY_LENGTH_
|
||||
request->flag_set_value = RCCE_FLAG_SET;
|
||||
#else
|
||||
request->flag_set_value = (RCCE_FLAG_STATUS)size;
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static int iRCCE_irecv_search_source() {
|
||||
int i, j;
|
||||
int res = iRCCE_ANY_SOURCE;
|
||||
|
||||
for( i=0; i<RCCE_NP*3; ++i ){
|
||||
j =i%RCCE_NP;
|
||||
if ( j == RCCE_IAM )
|
||||
continue;
|
||||
|
||||
// only take source if recv-queue is empty
|
||||
if(!iRCCE_irecv_queue[j]) {
|
||||
int test;
|
||||
iRCCE_test_flag(RCCE_sent_flag[j], 0, &test);
|
||||
if(!test) {
|
||||
res = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_irecv
|
||||
//--------------------------------------------------------------------------------------
|
||||
// non-blocking recv function; returns an handle of type iRCCE_RECV_REQUEST
|
||||
//--------------------------------------------------------------------------------------
|
||||
static iRCCE_RECV_REQUEST blocking_irecv_request;
|
||||
#ifdef _OPENMP
|
||||
#pragma omp threadprivate (blocking_irecv_request)
|
||||
#endif
|
||||
inline static int iRCCE_irecv_generic(char *privbuf, ssize_t size, int source, iRCCE_RECV_REQUEST *request, int sync) {
|
||||
|
||||
if(request == NULL){
|
||||
request = &blocking_irecv_request;
|
||||
|
||||
// find source (blocking)
|
||||
if( source == iRCCE_ANY_SOURCE ){
|
||||
int i;
|
||||
for( i=0;;i=(i+1)%RCCE_NP ){
|
||||
|
||||
if( (!iRCCE_irecv_queue[i]) && (i != RCCE_IAM) ) {
|
||||
int test;
|
||||
iRCCE_test_flag(RCCE_sent_flag[i], 0, &test);
|
||||
if(!test) {
|
||||
source = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(size == 0) {
|
||||
if(sync) {
|
||||
// just synchronize:
|
||||
size = 1;
|
||||
privbuf = (char*)&size;
|
||||
} else
|
||||
size = -1;
|
||||
}
|
||||
|
||||
if(size <= 0) {
|
||||
#ifdef _iRCCE_ANY_LENGTH_
|
||||
if(size != iRCCE_ANY_LENGTH)
|
||||
#endif
|
||||
{
|
||||
iRCCE_init_recv_request(privbuf, RCCE_buff_ptr, RCCE_chunk,
|
||||
&RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source],
|
||||
size, source, sync, request);
|
||||
request->finished = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
if( source == iRCCE_ANY_SOURCE ) {
|
||||
source = iRCCE_irecv_search_source(); // first try to find a source
|
||||
|
||||
if( source == iRCCE_ANY_SOURCE ){ // queue request if no source available
|
||||
|
||||
iRCCE_init_recv_request(privbuf, RCCE_buff_ptr, RCCE_chunk,
|
||||
&RCCE_ready_flag[RCCE_IAM], NULL,
|
||||
size, iRCCE_ANY_SOURCE, sync, request);
|
||||
|
||||
// put anysource-request in irecv_any_source_queue
|
||||
if( iRCCE_irecv_any_source_queue == NULL ){
|
||||
iRCCE_irecv_any_source_queue = request;
|
||||
}
|
||||
else {
|
||||
if( iRCCE_irecv_any_source_queue->next == NULL ) {
|
||||
iRCCE_irecv_any_source_queue->next = request;
|
||||
}
|
||||
else {
|
||||
iRCCE_RECV_REQUEST* run = iRCCE_irecv_any_source_queue;
|
||||
while( run->next != NULL ) run = run->next;
|
||||
run->next = request;
|
||||
}
|
||||
}
|
||||
return iRCCE_RESERVED;
|
||||
}
|
||||
}
|
||||
|
||||
if (source<0 || source >= RCCE_NP)
|
||||
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
|
||||
else {
|
||||
iRCCE_init_recv_request(privbuf, RCCE_buff_ptr, RCCE_chunk,
|
||||
&RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source],
|
||||
size, source, sync, request);
|
||||
|
||||
if(iRCCE_irecv_queue[source] == NULL) {
|
||||
|
||||
if(iRCCE_push_recv_request(request) == iRCCE_SUCCESS) {
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
else {
|
||||
iRCCE_irecv_queue[source] = request;
|
||||
|
||||
if(request == &blocking_irecv_request) {
|
||||
iRCCE_irecv_wait(request);
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(iRCCE_irecv_queue[source]->next == NULL) {
|
||||
iRCCE_irecv_queue[source]->next = request;
|
||||
}
|
||||
else {
|
||||
iRCCE_RECV_REQUEST *run = iRCCE_irecv_queue[source];
|
||||
while(run->next != NULL) run = run->next;
|
||||
run->next = request;
|
||||
}
|
||||
|
||||
if(request == &blocking_irecv_request) {
|
||||
iRCCE_irecv_wait(request);
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
return(iRCCE_RESERVED);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int iRCCE_irecv(char *privbuf, ssize_t size, int dest, iRCCE_RECV_REQUEST *request) {
|
||||
|
||||
return iRCCE_irecv_generic(privbuf, size, dest, request, 0);
|
||||
}
|
||||
|
||||
int iRCCE_isrecv(char *privbuf, ssize_t size, int dest, iRCCE_RECV_REQUEST *request) {
|
||||
|
||||
return iRCCE_irecv_generic(privbuf, size, dest, request, 1);
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_probe
|
||||
//--------------------------------------------------------------------------------------
|
||||
// probe for incomming messages (non-blocking / does not receive)
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_iprobe(int source, int* test_rank, int* test_flag)
|
||||
{
|
||||
// determine source of request if given source = iRCCE_ANY_SOURCE
|
||||
if( source == iRCCE_ANY_SOURCE ) {
|
||||
|
||||
source = iRCCE_irecv_search_source(); // first try to find a source
|
||||
}
|
||||
else {
|
||||
int res;
|
||||
iRCCE_test_flag(RCCE_sent_flag[source], RCCE_FLAG_SET, &res);
|
||||
|
||||
if(!res) source = iRCCE_ANY_SOURCE;
|
||||
}
|
||||
|
||||
if(source != iRCCE_ANY_SOURCE) { // message found:
|
||||
|
||||
if (test_rank != NULL) (*test_rank) = source;
|
||||
if (test_flag != NULL) (*test_flag) = 1;
|
||||
|
||||
#ifdef _iRCCE_ANY_LENGTH_
|
||||
{
|
||||
int size = iRCCE_ANY_LENGTH;
|
||||
RCCE_flag_read(RCCE_sent_flag[source], &size, RCCE_IAM);
|
||||
if(iRCCE_recent_length != size) iRCCE_recent_length = size;
|
||||
}
|
||||
#endif
|
||||
if(iRCCE_recent_source != source) iRCCE_recent_source = source;
|
||||
}
|
||||
else {
|
||||
if (test_rank != NULL) (*test_rank) = iRCCE_ANY_SOURCE;
|
||||
if (test_flag != NULL) (*test_flag) = 0;
|
||||
}
|
||||
|
||||
return iRCCE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_irecv_test
|
||||
//--------------------------------------------------------------------------------------
|
||||
// test function for completion of the requestes non-blocking recv operation
|
||||
// Just provide NULL instead of the testvar if you don't need it
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_irecv_test(iRCCE_RECV_REQUEST *request, int *test) {
|
||||
|
||||
int source;
|
||||
|
||||
if(request == NULL) {
|
||||
|
||||
if(iRCCE_irecv_push() == iRCCE_SUCCESS) {
|
||||
if (test) (*test) = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
else {
|
||||
if (test) (*test) = 0;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
}
|
||||
|
||||
// does request still have no source?
|
||||
if( request->source == iRCCE_ANY_SOURCE ) {
|
||||
request->source = iRCCE_irecv_search_source();
|
||||
|
||||
if( request->source == iRCCE_ANY_SOURCE ) {
|
||||
if (test) (*test) = 0;
|
||||
return iRCCE_RESERVED;
|
||||
}
|
||||
else { // take request out of wait_any_source-list
|
||||
|
||||
// find request in queue
|
||||
if( request == iRCCE_irecv_any_source_queue ) {
|
||||
iRCCE_irecv_any_source_queue = iRCCE_irecv_any_source_queue->next;
|
||||
}
|
||||
else {
|
||||
iRCCE_RECV_REQUEST* run = iRCCE_irecv_any_source_queue;
|
||||
while( run->next != request ) run = run->next;
|
||||
run->next = request->next;
|
||||
}
|
||||
|
||||
request->next = NULL;
|
||||
request->sent = &RCCE_sent_flag[request->source]; // set senders flag
|
||||
source = request->source;
|
||||
|
||||
// queue request in iRCCE_irecv_queue
|
||||
if(iRCCE_irecv_queue[source] == NULL) {
|
||||
|
||||
if(iRCCE_push_recv_request(request) == iRCCE_SUCCESS) {
|
||||
if (test) (*test) = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
else {
|
||||
iRCCE_irecv_queue[source] = request;
|
||||
|
||||
if(request == &blocking_irecv_request) {
|
||||
iRCCE_irecv_wait(request);
|
||||
if (test) (*test) = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
if (test) (*test) = 0;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(iRCCE_irecv_queue[source]->next == NULL) {
|
||||
iRCCE_irecv_queue[source]->next = request;
|
||||
}
|
||||
else {
|
||||
iRCCE_RECV_REQUEST *run = iRCCE_irecv_queue[source];
|
||||
while(run->next != NULL) run = run->next;
|
||||
run->next = request;
|
||||
}
|
||||
|
||||
if(request == &blocking_irecv_request) {
|
||||
iRCCE_irecv_wait(request);
|
||||
if (test) (*test) = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
if (test) (*test) = 1;
|
||||
return(iRCCE_RESERVED);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
||||
source = request->source;
|
||||
|
||||
if(request->finished) {
|
||||
if (test) (*test) = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
if(iRCCE_irecv_queue[source] != request) {
|
||||
if (test) (*test) = 0;
|
||||
return(iRCCE_RESERVED);
|
||||
}
|
||||
|
||||
iRCCE_push_recv_request(request);
|
||||
|
||||
if(request->finished) {
|
||||
iRCCE_irecv_queue[source] = request->next;
|
||||
|
||||
if (test) (*test) = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
if (test) (*test) = 0;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_irecv_push
|
||||
//--------------------------------------------------------------------------------------
|
||||
// progress function for pending requests in the irecv queue
|
||||
//--------------------------------------------------------------------------------------
|
||||
static int iRCCE_irecv_push_source(int source) {
|
||||
|
||||
iRCCE_RECV_REQUEST *request = iRCCE_irecv_queue[source];
|
||||
|
||||
if(request == NULL) {
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
if(request->finished) {
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
iRCCE_push_recv_request(request);
|
||||
|
||||
if(request->finished) {
|
||||
iRCCE_irecv_queue[source] = request->next;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
|
||||
int iRCCE_irecv_push(void) {
|
||||
iRCCE_RECV_REQUEST* help_request;
|
||||
|
||||
// first check sourceless requests
|
||||
if( iRCCE_irecv_any_source_queue != NULL) {
|
||||
while( iRCCE_irecv_any_source_queue != NULL ) {
|
||||
iRCCE_irecv_any_source_queue->source = iRCCE_irecv_search_source();
|
||||
|
||||
if( iRCCE_irecv_any_source_queue->source == iRCCE_ANY_SOURCE ) {
|
||||
|
||||
break;
|
||||
}
|
||||
// source found for first request in iRCCE_irecv_any_source_queue
|
||||
else {
|
||||
// set senders flag
|
||||
iRCCE_irecv_any_source_queue->sent = &RCCE_sent_flag[iRCCE_irecv_any_source_queue->source];
|
||||
|
||||
// take request out of irecv_any_source_queue
|
||||
help_request = iRCCE_irecv_any_source_queue;
|
||||
iRCCE_irecv_any_source_queue = iRCCE_irecv_any_source_queue->next;
|
||||
help_request->next = NULL;
|
||||
|
||||
// put request into irecv_queue
|
||||
if(iRCCE_irecv_queue[help_request->source] == NULL) {
|
||||
iRCCE_irecv_queue[help_request->source] = help_request;
|
||||
}
|
||||
else {
|
||||
iRCCE_RECV_REQUEST *run = iRCCE_irecv_queue[help_request->source];
|
||||
while(run->next != NULL) run = run->next;
|
||||
run->next = help_request;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int i, j;
|
||||
int retval = iRCCE_SUCCESS;
|
||||
|
||||
for(i=0; i<RCCE_NP; i++) {
|
||||
|
||||
j = iRCCE_irecv_push_source(i);
|
||||
|
||||
if(j != iRCCE_SUCCESS) {
|
||||
retval = j;
|
||||
}
|
||||
}
|
||||
|
||||
return (iRCCE_irecv_any_source_queue == NULL)? retval : iRCCE_RESERVED;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_irecv_wait
|
||||
//--------------------------------------------------------------------------------------
|
||||
// just wait for completion of the requested non-blocking send operation
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_irecv_wait(iRCCE_RECV_REQUEST *request) {
|
||||
|
||||
if(request != NULL) {
|
||||
while(!request->finished) {
|
||||
iRCCE_irecv_push();
|
||||
iRCCE_isend_push();
|
||||
}
|
||||
}
|
||||
else {
|
||||
do {
|
||||
iRCCE_isend_push();
|
||||
}
|
||||
while( iRCCE_irecv_push() != iRCCE_SUCCESS );
|
||||
}
|
||||
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_irecv_cancel
|
||||
//--------------------------------------------------------------------------------------
|
||||
// try to cancel a pending non-blocking recv request
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_irecv_cancel(iRCCE_RECV_REQUEST *request, int *test) {
|
||||
|
||||
int source;
|
||||
iRCCE_RECV_REQUEST *run;
|
||||
|
||||
if( (request == NULL) || (request->finished) ) {
|
||||
if (test) (*test) = 0;
|
||||
return iRCCE_NOT_ENQUEUED;
|
||||
}
|
||||
|
||||
|
||||
// does request have any source specified?
|
||||
if( request->source == iRCCE_ANY_SOURCE ) {
|
||||
for( run = iRCCE_irecv_any_source_queue; run->next != NULL; run = run->next ) {
|
||||
if( run->next == request ) {
|
||||
run->next = run->next->next;
|
||||
|
||||
if (test) (*test) = 1;
|
||||
return iRCCE_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
if (test) (*test) = 0;
|
||||
return iRCCE_NOT_ENQUEUED;
|
||||
}
|
||||
|
||||
|
||||
|
||||
source = request->source;
|
||||
|
||||
if(iRCCE_irecv_queue[source] == NULL) {
|
||||
if (test) (*test) = 0;
|
||||
return iRCCE_NOT_ENQUEUED;
|
||||
}
|
||||
|
||||
if(iRCCE_irecv_queue[source] == request) {
|
||||
|
||||
// have parts of the message already been received?
|
||||
if(request->started) {
|
||||
if (test) (*test) = 0;
|
||||
return iRCCE_PENDING;
|
||||
}
|
||||
else {
|
||||
// no, thus request can be canceld just in time:
|
||||
iRCCE_irecv_queue[source] = request->next;
|
||||
if (test) (*test) = 1;
|
||||
return iRCCE_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
for(run = iRCCE_irecv_queue[source]; run->next != NULL; run = run->next) {
|
||||
|
||||
// request found --> remove it from recv queue:
|
||||
if(run->next == request) {
|
||||
|
||||
run->next = run->next->next;
|
||||
|
||||
if (test) (*test) = 1;
|
||||
return iRCCE_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
if (test) (*test) = 0;
|
||||
return iRCCE_NOT_ENQUEUED;
|
||||
}
|
||||
|
||||
|
411
hermit/usr/ircce/iRCCE_isend.c
Normal file
411
hermit/usr/ircce/iRCCE_isend.c
Normal file
|
@ -0,0 +1,411 @@
|
|||
//***************************************************************************************
|
||||
// Non-blocking send routines.
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Author: Rob F. Van der Wijngaart
|
||||
// Intel Corporation
|
||||
// Date: 008/30/2010
|
||||
//
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// [2010-10-25] added support for non-blocking send/recv operations
|
||||
// - iRCCE_isend(), ..._test(), ..._wait(), ..._push()
|
||||
// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push()
|
||||
// by Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
// [2010-11-12] extracted non-blocking code into separate library
|
||||
// by Carsten Scholtes
|
||||
//
|
||||
// [2010-12-09] added cancel functions for non-blocking send/recv requests
|
||||
// by Carsten Clauss
|
||||
//
|
||||
// [2011-11-03] added non-blocking by synchronous send/recv functions:
|
||||
// iRCCE_issend() / iRCCE_isrecv()
|
||||
//
|
||||
|
||||
#ifdef GORY
|
||||
#error iRCCE _cannot_ be built in GORY mode!
|
||||
#endif
|
||||
|
||||
#include "iRCCE_lib.h"
|
||||
|
||||
#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__)
|
||||
#include "scc_memcpy.h"
|
||||
#else
|
||||
#define memcpy_scc memcpy
|
||||
#endif
|
||||
|
||||
static int iRCCE_push_send_request(iRCCE_SEND_REQUEST *request) {
|
||||
|
||||
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
|
||||
int test; // flag for calling iRCCE_test_flag()
|
||||
|
||||
if(request->finished) return(iRCCE_SUCCESS);
|
||||
|
||||
if(request->sync) return iRCCE_push_ssend_request(request);
|
||||
|
||||
if(request->label == 1) goto label1;
|
||||
if(request->label == 2) goto label2;
|
||||
if(request->label == 3) goto label3;
|
||||
|
||||
// send data in units of available chunk size of comm buffer
|
||||
for (; request->wsize< (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) {
|
||||
request->bufptr = request->privbuf + request->wsize;
|
||||
request->nbytes = request->chunk;
|
||||
// copy private data to own comm buffer
|
||||
iRCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, RCCE_IAM);
|
||||
RCCE_flag_write(request->sent, request->flag_set_value, request->dest);
|
||||
// wait for the destination to be ready to receive a message
|
||||
label1:
|
||||
iRCCE_test_flag(*(request->ready), request->flag_set_value, &test);
|
||||
if(!test) {
|
||||
request->label = 1;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
}
|
||||
|
||||
request->remainder = request->size % request->chunk;
|
||||
// if nothing is left over, we are done
|
||||
if (!request->remainder) {
|
||||
request->finished = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
// send remainder of data--whole cache lines
|
||||
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk;
|
||||
request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE;
|
||||
if (request->nbytes) {
|
||||
// copy private data to own comm buffer
|
||||
iRCCE_put(request->combuf, (t_vcharp)request->bufptr, request->nbytes, RCCE_IAM);
|
||||
RCCE_flag_write(request->sent, request->flag_set_value, request->dest);
|
||||
// wait for the destination to be ready to receive a message
|
||||
label2:
|
||||
iRCCE_test_flag(*(request->ready), request->flag_set_value, &test);
|
||||
if(!test) {
|
||||
request->label = 2;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
}
|
||||
|
||||
request->remainder = request->size % request->chunk;
|
||||
request->remainder = request->remainder%RCCE_LINE_SIZE;
|
||||
// if nothing is left over, we are done
|
||||
if (!request->remainder)
|
||||
{
|
||||
request->finished = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
// remainder is less than a cache line. This must be copied into appropriately sized
|
||||
// intermediate space before it can be sent to the receiver
|
||||
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes;
|
||||
request->nbytes = RCCE_LINE_SIZE;
|
||||
// copy private data to own comm buffer
|
||||
memcpy_scc(padline,request->bufptr,request->remainder);
|
||||
iRCCE_put(request->combuf, (t_vcharp)padline, request->nbytes, RCCE_IAM);
|
||||
RCCE_flag_write(request->sent, request->flag_set_value, request->dest);
|
||||
// wait for the destination to be ready to receive a message
|
||||
label3:
|
||||
iRCCE_test_flag(*(request->ready), request->flag_set_value, &test);
|
||||
if(!test) {
|
||||
request->label = 3;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
request->finished = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
static void iRCCE_init_send_request(
|
||||
char *privbuf, // source buffer in local private memory (send buffer)
|
||||
t_vcharp combuf, // intermediate buffer in MPB
|
||||
size_t chunk, // size of MPB available for this message (bytes)
|
||||
RCCE_FLAG *ready, // flag indicating whether receiver is ready
|
||||
RCCE_FLAG *sent, // flag indicating whether message has been sent by source
|
||||
size_t size, // size of message (bytes)
|
||||
int dest, // UE that will receive the message
|
||||
int sync, // flag indicating whether send is synchronous or not
|
||||
iRCCE_SEND_REQUEST *request
|
||||
) {
|
||||
|
||||
request->privbuf = privbuf;
|
||||
request->combuf = combuf;
|
||||
request->chunk = chunk;
|
||||
request->ready = ready;
|
||||
request->sent = sent;
|
||||
request->size = size;
|
||||
request->dest = dest;
|
||||
|
||||
request->sync = sync;
|
||||
request->subchunk1 = ( (chunk / 2) / RCCE_LINE_SIZE ) * RCCE_LINE_SIZE;
|
||||
request->subchunk2 = chunk - request->subchunk1;
|
||||
|
||||
request->wsize = 0;
|
||||
request->remainder = 0;
|
||||
request->nbytes = 0;
|
||||
request->bufptr = NULL;
|
||||
|
||||
request->label = 0;
|
||||
|
||||
request->finished = 0;
|
||||
|
||||
request->next = NULL;
|
||||
|
||||
#ifndef _iRCCE_ANY_LENGTH_
|
||||
request->flag_set_value = RCCE_FLAG_SET;
|
||||
#else
|
||||
request->flag_set_value = (RCCE_FLAG_STATUS)size;
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_isend
|
||||
//--------------------------------------------------------------------------------------
|
||||
// non-blocking send function; returns a handle of type iRCCE_SEND_REQUEST
|
||||
//--------------------------------------------------------------------------------------
|
||||
static iRCCE_SEND_REQUEST blocking_isend_request;
|
||||
#ifdef _OPENMP
|
||||
#pragma omp threadprivate (blocking_isend_request)
|
||||
#endif
|
||||
inline static int iRCCE_isend_generic(char *privbuf, ssize_t size, int dest, iRCCE_SEND_REQUEST *request, int sync) {
|
||||
|
||||
if(request == NULL) request = &blocking_isend_request;
|
||||
|
||||
if(size == 0) {
|
||||
if(sync) {
|
||||
// just synchronize:
|
||||
size = 1;
|
||||
privbuf = (char*)&size;
|
||||
} else
|
||||
size = -1;
|
||||
}
|
||||
|
||||
if(size < 0) {
|
||||
iRCCE_init_send_request(privbuf, RCCE_buff_ptr, RCCE_chunk,
|
||||
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
|
||||
size, dest, sync, request);
|
||||
request->finished = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
if (dest<0 || dest >= RCCE_NP)
|
||||
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
|
||||
else {
|
||||
iRCCE_init_send_request(privbuf, RCCE_buff_ptr, RCCE_chunk,
|
||||
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
|
||||
size, dest, sync, request);
|
||||
|
||||
if(iRCCE_isend_queue == NULL) {
|
||||
|
||||
if(iRCCE_push_send_request(request) == iRCCE_SUCCESS) {
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
else {
|
||||
iRCCE_isend_queue = request;
|
||||
|
||||
if(request == &blocking_isend_request) {
|
||||
iRCCE_isend_wait(request);
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(iRCCE_isend_queue->next == NULL) {
|
||||
iRCCE_isend_queue->next = request;
|
||||
}
|
||||
else {
|
||||
iRCCE_SEND_REQUEST *run = iRCCE_isend_queue;
|
||||
while(run->next != NULL) run = run->next;
|
||||
run->next = request;
|
||||
}
|
||||
|
||||
if(request == &blocking_isend_request) {
|
||||
iRCCE_isend_wait(request);
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
return(iRCCE_RESERVED);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int iRCCE_isend(char *privbuf, ssize_t size, int dest, iRCCE_SEND_REQUEST *request) {
|
||||
|
||||
return iRCCE_isend_generic(privbuf, size, dest, request, 0);
|
||||
}
|
||||
|
||||
int iRCCE_issend(char *privbuf, ssize_t size, int dest, iRCCE_SEND_REQUEST *request) {
|
||||
|
||||
return iRCCE_isend_generic(privbuf, size, dest, request, 1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_isend_push
|
||||
//--------------------------------------------------------------------------------------
|
||||
// progress function for pending requests in the isend queue
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_isend_push(void) {
|
||||
|
||||
iRCCE_SEND_REQUEST *request = iRCCE_isend_queue;
|
||||
|
||||
if(request == NULL) {
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
if(request->finished) {
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
iRCCE_push_send_request(request);
|
||||
|
||||
if(request->finished) {
|
||||
iRCCE_isend_queue = request->next;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_isend_test
|
||||
//--------------------------------------------------------------------------------------
|
||||
// test function for completion of the requestes non-blocking send operation
|
||||
// Just provide NULL instead of testvar if you don't need it
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_isend_test(iRCCE_SEND_REQUEST *request, int *test) {
|
||||
|
||||
if(request == NULL) {
|
||||
|
||||
iRCCE_isend_push();
|
||||
|
||||
if(iRCCE_isend_queue == NULL) {
|
||||
if (test) (*test) = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
else {
|
||||
if (test) (*test) = 0;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
}
|
||||
|
||||
if(request->finished) {
|
||||
if (test) (*test) = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
if(iRCCE_isend_queue != request) {
|
||||
|
||||
iRCCE_isend_push();
|
||||
|
||||
if(iRCCE_isend_queue != request) {
|
||||
if (test) (*test) = 0;
|
||||
return(iRCCE_RESERVED);
|
||||
}
|
||||
}
|
||||
|
||||
iRCCE_push_send_request(request);
|
||||
|
||||
if(request->finished) {
|
||||
iRCCE_isend_queue = request->next;
|
||||
|
||||
if (test) (*test) = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
if (test) (*test) = 0;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_isend_wait
|
||||
//--------------------------------------------------------------------------------------
|
||||
// just wait for completion of the requestes non-blocking send operation
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_isend_wait(iRCCE_SEND_REQUEST *request) {
|
||||
|
||||
if(request != NULL) {
|
||||
|
||||
while(!request->finished) {
|
||||
|
||||
iRCCE_isend_push();
|
||||
iRCCE_irecv_push();
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
||||
while(iRCCE_isend_queue != NULL) {
|
||||
|
||||
iRCCE_isend_push();
|
||||
iRCCE_irecv_push();
|
||||
}
|
||||
}
|
||||
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_isend_cancel
|
||||
//--------------------------------------------------------------------------------------
|
||||
// try to cancel a pending non-blocking send request
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_isend_cancel(iRCCE_SEND_REQUEST *request, int *test) {
|
||||
|
||||
iRCCE_SEND_REQUEST *run;
|
||||
|
||||
if( (request == NULL) || (request->finished) ) {
|
||||
if (test) (*test) = 0;
|
||||
return iRCCE_NOT_ENQUEUED;
|
||||
}
|
||||
|
||||
if(iRCCE_isend_queue == NULL) {
|
||||
if (test) (*test) = 0;
|
||||
return iRCCE_NOT_ENQUEUED;
|
||||
}
|
||||
|
||||
if(iRCCE_isend_queue == request) {
|
||||
if (test) (*test) = 0;
|
||||
return iRCCE_PENDING;
|
||||
}
|
||||
|
||||
for(run = iRCCE_isend_queue; run->next != NULL; run = run->next) {
|
||||
|
||||
// request found --> remove it from send queue:
|
||||
if(run->next == request) {
|
||||
|
||||
run->next = run->next->next;
|
||||
|
||||
if (test) (*test) = 1;
|
||||
return iRCCE_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
if (test) (*test) = 0;
|
||||
return iRCCE_NOT_ENQUEUED;
|
||||
}
|
62
hermit/usr/ircce/iRCCE_lib.h
Normal file
62
hermit/usr/ircce/iRCCE_lib.h
Normal file
|
@ -0,0 +1,62 @@
|
|||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// [2010-10-25] added support for non-blocking send/recv operations
|
||||
// - iRCCE_isend(), ..._test(), ..._wait(), ..._push()
|
||||
// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push()
|
||||
// by Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
// [2010-11-12] extracted non-blocking code into separate library
|
||||
// by Carsten Scholtes
|
||||
//
|
||||
// [2011-04-19] added wildcard mechanism (iRCCE_ANY_SOURCE) for receiving
|
||||
// a message from an arbitrary remote rank
|
||||
// by Simon Pickartz, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
// [2011-06-27] merged iRCCE_ANY_SOURCE branch with trunk (iRCCE_ANY_LENGTH)
|
||||
//
|
||||
|
||||
#ifndef IRCCE_LIB_H
|
||||
#define IRCCE_LIB_H
|
||||
|
||||
#include "RCCE_lib.h"
|
||||
#include "iRCCE.h"
|
||||
|
||||
#ifdef AIR
|
||||
#define FPGA_BASE 0xf9000000
|
||||
#define BACKOFF_MIN 8
|
||||
#define BACKOFF_MAX 256
|
||||
extern iRCCE_AIR iRCCE_atomic_inc_regs[];
|
||||
extern int iRCCE_atomic_alloc_counter;
|
||||
extern iRCCE_AIR* iRCCE_atomic_barrier[2];
|
||||
#endif
|
||||
|
||||
extern iRCCE_SEND_REQUEST* iRCCE_isend_queue;
|
||||
extern iRCCE_RECV_REQUEST* iRCCE_irecv_queue[RCCE_MAXNP];
|
||||
extern iRCCE_RECV_REQUEST* iRCCE_irecv_any_source_queue;
|
||||
extern int iRCCE_recent_source;
|
||||
extern int iRCCE_recent_length;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp threadprivate (iRCCE_isend_queue, iRCCE_irecv_queue, iRCCE_irecv_any_source_queue, iRCCE_recent_source, iRCCE_recent_length)
|
||||
#endif
|
||||
|
||||
int iRCCE_test_flag(RCCE_FLAG, RCCE_FLAG_STATUS, int *);
|
||||
int iRCCE_push_ssend_request(iRCCE_SEND_REQUEST *request);
|
||||
int iRCCE_push_srecv_request(iRCCE_RECV_REQUEST *request);
|
||||
|
||||
#endif
|
289
hermit/usr/ircce/iRCCE_mcast.c
Normal file
289
hermit/usr/ircce/iRCCE_mcast.c
Normal file
|
@ -0,0 +1,289 @@
|
|||
//***************************************************************************************
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// [2010-11-26] added xxx
|
||||
// by Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
|
||||
#include "iRCCE_lib.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__)
|
||||
#include "scc_memcpy.h"
|
||||
#else
|
||||
#define memcpy_scc memcpy
|
||||
#endif
|
||||
|
||||
static int iRCCE_msend_general(
|
||||
char *privbuf, // source buffer in local private memory (send buffer)
|
||||
t_vcharp combuf, // intermediate buffer in MPB
|
||||
size_t chunk, // size of MPB available for this message (bytes)
|
||||
RCCE_FLAG *sent, // flag indicating whether message has been sent by source
|
||||
ssize_t size // size of message (bytes)
|
||||
) {
|
||||
|
||||
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
|
||||
size_t wsize, // offset within send buffer when putting in "chunk" bytes
|
||||
remainder, // bytes remaining to be sent
|
||||
nbytes; // number of bytes to be sent in single iRCCE_put call
|
||||
char *bufptr; // running pointer inside privbuf for current location
|
||||
size_t subchunk1, subchunk2; // sub-chunks for the pipelined message transfer
|
||||
int ue;
|
||||
|
||||
#ifndef _iRCCE_ANY_LENGTH_
|
||||
#define FLAG_SET_VALUE RCCE_FLAG_SET
|
||||
#else
|
||||
RCCE_FLAG_STATUS FLAG_SET_VALUE = (RCCE_FLAG_STATUS)size;
|
||||
#endif
|
||||
// send data in units of available chunk size of comm buffer
|
||||
for (wsize=0; wsize< (size/chunk)*chunk; wsize+=chunk) {
|
||||
|
||||
bufptr = privbuf + wsize;
|
||||
nbytes = chunk;
|
||||
|
||||
// copy private data to own comm buffer
|
||||
RCCE_put(combuf, (t_vcharp) bufptr, nbytes, RCCE_IAM);
|
||||
|
||||
for(ue=0; ue<RCCE_NP; ue++)
|
||||
if(ue!=RCCE_IAM) RCCE_flag_write(sent, FLAG_SET_VALUE, ue);
|
||||
|
||||
iRCCE_barrier(NULL);
|
||||
}
|
||||
|
||||
remainder = size%chunk;
|
||||
// if nothing is left over, we are done
|
||||
if (!remainder) return(iRCCE_SUCCESS);
|
||||
|
||||
// send remainder of data--whole cache lines
|
||||
bufptr = privbuf + (size/chunk)*chunk;
|
||||
nbytes = remainder - remainder%RCCE_LINE_SIZE;
|
||||
if (nbytes) {
|
||||
// copy private data to own comm buffer
|
||||
iRCCE_put(combuf, (t_vcharp)bufptr, nbytes, RCCE_IAM);
|
||||
for(ue=0; ue<RCCE_NP; ue++)
|
||||
if(ue!=RCCE_IAM) RCCE_flag_write(sent, FLAG_SET_VALUE, ue);
|
||||
|
||||
iRCCE_barrier(NULL);
|
||||
}
|
||||
|
||||
remainder = remainder%RCCE_LINE_SIZE;
|
||||
if (!remainder) return(iRCCE_SUCCESS);
|
||||
|
||||
// remainder is less than a cache line. This must be copied into appropriately sized
|
||||
// intermediate space before it can be sent to the receiver
|
||||
bufptr = privbuf + (size/chunk)*chunk + nbytes;
|
||||
nbytes = RCCE_LINE_SIZE;
|
||||
|
||||
// copy private data to own comm buffer
|
||||
memcpy_scc(padline, bufptr, remainder);
|
||||
iRCCE_put(combuf, (t_vcharp)padline, nbytes, RCCE_IAM);
|
||||
|
||||
for(ue=0; ue<RCCE_NP; ue++)
|
||||
if(ue!=RCCE_IAM) RCCE_flag_write(sent, FLAG_SET_VALUE, ue);
|
||||
|
||||
iRCCE_barrier(NULL);
|
||||
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_msend
|
||||
//--------------------------------------------------------------------------------------
|
||||
// pipelined multicast send function (blocking and synchronous!)
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_msend(char *privbuf, ssize_t size) {
|
||||
|
||||
if(size <= 0) return(iRCCE_SUCCESS);
|
||||
|
||||
while(iRCCE_isend_queue != NULL) {
|
||||
|
||||
// wait for completion of pending non-blocking requests
|
||||
iRCCE_isend_push();
|
||||
iRCCE_irecv_push();
|
||||
}
|
||||
|
||||
return(iRCCE_msend_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
|
||||
&RCCE_sent_flag[RCCE_IAM], size));
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_mrecv_general
|
||||
//--------------------------------------------------------------------------------------
|
||||
// multicast receive function
|
||||
//--------------------------------------------------------------------------------------
|
||||
static int iRCCE_mrecv_general(
|
||||
char *privbuf, // destination buffer in local private memory (receive buffer)
|
||||
t_vcharp combuf, // intermediate buffer in MPB
|
||||
size_t chunk, // size of MPB available for this message (bytes)
|
||||
RCCE_FLAG *sent, // flag indicating whether message has been sent by source
|
||||
ssize_t size, // size of message (bytes)
|
||||
int source // UE that sent the message
|
||||
) {
|
||||
|
||||
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
|
||||
size_t wsize, // offset within receive buffer when pulling in "chunk" bytes
|
||||
remainder, // bytes remaining to be received
|
||||
nbytes; // number of bytes to be received in single iRCCE_get call
|
||||
char *bufptr; // running pointer inside privbuf for current location
|
||||
size_t subchunk1, subchunk2; // sub-chunks for the pipelined message transfer
|
||||
|
||||
#ifndef _iRCCE_ANY_LENGTH_
|
||||
#define FLAG_SET_VALUE RCCE_FLAG_SET
|
||||
#else
|
||||
RCCE_FLAG_STATUS FLAG_SET_VALUE;
|
||||
|
||||
while (1) {
|
||||
RCCE_flag_read(*sent, &size, RCCE_IAM);
|
||||
if(size!=0) break;
|
||||
}
|
||||
FLAG_SET_VALUE = (RCCE_FLAG_STATUS)size;
|
||||
#endif
|
||||
|
||||
if(iRCCE_recent_source != source) iRCCE_recent_source = source;
|
||||
if(iRCCE_recent_length != size) iRCCE_recent_length = size;
|
||||
|
||||
// receive data in units of available chunk size of MPB
|
||||
for (wsize=0; wsize< (size/chunk)*chunk; wsize+=chunk) {
|
||||
|
||||
bufptr = privbuf + wsize;
|
||||
nbytes = chunk;
|
||||
|
||||
RCCE_wait_until(*sent, RCCE_FLAG_SET);
|
||||
RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
// copy data from local MPB space to private memory
|
||||
RCCE_get((t_vcharp)bufptr, combuf, nbytes, source);
|
||||
|
||||
iRCCE_barrier(NULL);
|
||||
}
|
||||
|
||||
remainder = size%chunk;
|
||||
// if nothing is left over, we are done
|
||||
if (!remainder) return(iRCCE_SUCCESS);
|
||||
|
||||
// receive remainder of data--whole cache lines
|
||||
bufptr = privbuf + (size/chunk)*chunk;
|
||||
nbytes = remainder - remainder % RCCE_LINE_SIZE;
|
||||
if (nbytes) {
|
||||
|
||||
RCCE_wait_until(*sent, FLAG_SET_VALUE);
|
||||
RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
// copy data from local MPB space to private memory
|
||||
iRCCE_get((t_vcharp)bufptr, combuf, nbytes, source);
|
||||
|
||||
iRCCE_barrier(NULL);
|
||||
}
|
||||
|
||||
remainder = remainder % RCCE_LINE_SIZE;
|
||||
if (!remainder) return(iRCCE_SUCCESS);
|
||||
|
||||
// remainder is less than cache line. This must be copied into appropriately sized
|
||||
// intermediate space before exact number of bytes get copied to the final destination
|
||||
bufptr = privbuf + (size/chunk)*chunk + nbytes;
|
||||
nbytes = RCCE_LINE_SIZE;
|
||||
|
||||
RCCE_wait_until(*sent, FLAG_SET_VALUE);
|
||||
RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
// copy data from local MPB space to private memory
|
||||
iRCCE_get((t_vcharp)padline, combuf, nbytes, source);
|
||||
memcpy_scc(bufptr, padline, remainder);
|
||||
|
||||
iRCCE_barrier(NULL);
|
||||
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_mrecv
|
||||
//--------------------------------------------------------------------------------------
|
||||
// multicast recv function (blocking!)
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_mrecv(char *privbuf, ssize_t size, int source) {
|
||||
|
||||
int ignore = 0;
|
||||
|
||||
if(size <= 0) {
|
||||
#ifdef _iRCCE_ANY_LENGTH_
|
||||
if (size != iRCCE_ANY_LENGTH)
|
||||
#endif
|
||||
{
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
// determine source of request if given source = iRCCE_ANY_SOURCE
|
||||
if (source == iRCCE_ANY_SOURCE) {
|
||||
|
||||
// wait for completion of _all_ pending non-blocking requests:
|
||||
iRCCE_irecv_wait(NULL);
|
||||
|
||||
int i, res;
|
||||
for( i=0;;i=(i+1)%RCCE_NP ){
|
||||
iRCCE_test_flag(RCCE_sent_flag[i], RCCE_FLAG_SET, &res);
|
||||
if ( (i != RCCE_IAM) && (res) ) {
|
||||
source = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// wait for completion of pending (ans source-related) non-blocking requests:
|
||||
while(iRCCE_irecv_queue[source] != NULL) {
|
||||
iRCCE_irecv_push();
|
||||
iRCCE_isend_push();
|
||||
}
|
||||
|
||||
if (source<0 || source >= RCCE_NP)
|
||||
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
|
||||
else {
|
||||
return(iRCCE_mrecv_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
|
||||
&RCCE_sent_flag[source], size, source));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_mcast
|
||||
//--------------------------------------------------------------------------------------
|
||||
// multicast based on msend() and mrecv()
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_mcast(char *buf, size_t size, int root)
|
||||
{
|
||||
if(RCCE_IAM != root) {
|
||||
return iRCCE_mrecv(buf, size, root);
|
||||
} else {
|
||||
return iRCCE_msend(buf, size);
|
||||
}
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_bcast
|
||||
//--------------------------------------------------------------------------------------
|
||||
// wrapper function for using iRCCE's multicast feature
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_bcast(char *buf, size_t size, int root, RCCE_COMM comm)
|
||||
{
|
||||
if(memcmp(&comm, &RCCE_COMM_WORLD, sizeof(RCCE_COMM)) == 0) {
|
||||
return RCCE_bcast(buf, size, root, comm);
|
||||
} else {
|
||||
return iRCCE_mcast(buf, size, root);
|
||||
}
|
||||
}
|
82
hermit/usr/ircce/iRCCE_put.c
Normal file
82
hermit/usr/ircce/iRCCE_put.c
Normal file
|
@ -0,0 +1,82 @@
|
|||
//***************************************************************************************
|
||||
// Put data into communication buffer.
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Author: Rob F. Van der Wijngaart
|
||||
// Intel Corporation
|
||||
// Date: 008/30/2010
|
||||
//
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// [2010-11-03] switched to SCC-optimized memcpy() functions in scc_memcpy.h:
|
||||
// - memcpy_to_mpb()
|
||||
// - memcpy_from_mpb()
|
||||
// by Stefan Lankes, Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
#include "iRCCE_lib.h"
|
||||
|
||||
#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__)
|
||||
#include "scc_memcpy.h"
|
||||
#else
|
||||
#define memcpy_to_mpb memcpy
|
||||
#endif
|
||||
|
||||
void* iRCCE_memcpy_put(void *dest, const void *src, size_t count)
|
||||
{
|
||||
#if defined COPPERRIDGE || defined SCC
|
||||
return memcpy_to_mpb(dest, src, count);
|
||||
#else
|
||||
return memcpy(dest, src, count);
|
||||
#endif
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_put
|
||||
//--------------------------------------------------------------------------------------
|
||||
// copy data from address "source" in the local MPB or the calling UE's private memory
|
||||
// to address "target" in the remote MPB. We do not test to see if a move from the
|
||||
// calling UE's private memory stays within allocated memory
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_put(
|
||||
t_vcharp target, // target buffer, MPB
|
||||
t_vcharp source, // source buffer, MPB or private memory
|
||||
int num_bytes,
|
||||
int ID
|
||||
) {
|
||||
|
||||
// in non-GORY mode we only need to retain the MPB target shift; we
|
||||
// already know the target is in the MPB, not private memory
|
||||
target = RCCE_comm_buffer[ID]+(target-RCCE_comm_buffer[RCCE_IAM]);
|
||||
|
||||
// make sure that any data that has been put in our MPB by another UE is visible
|
||||
#ifdef _OPENMP
|
||||
#pragma omp flush
|
||||
#endif
|
||||
|
||||
// do the actual copy
|
||||
RC_cache_invalidate();
|
||||
|
||||
iRCCE_memcpy_put((void *)target, (void *)source, num_bytes);
|
||||
|
||||
// flush data to make it visible to all threads; cannot use flush list because it
|
||||
// concerns malloced space
|
||||
#ifdef _OPENMP
|
||||
#pragma omp flush
|
||||
#endif
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
497
hermit/usr/ircce/iRCCE_srecv.c
Normal file
497
hermit/usr/ircce/iRCCE_srecv.c
Normal file
|
@ -0,0 +1,497 @@
|
|||
//***************************************************************************************
|
||||
// Non-blocking receive routines.
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Author: Rob F. Van der Wijngaart
|
||||
// Intel Corporation
|
||||
// Date: 008/30/2010
|
||||
//
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// [2010-11-26] added a _pipelined_ version of blocking send/recv
|
||||
// by Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
// [2011-04-19] added wildcard mechanism (iRCCE_ANY_SOURCE) for receiving
|
||||
// a message from an arbitrary remote rank
|
||||
// by Simon Pickartz, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
// [2011-05-31] added iRCCE_ANY_LENGTH wildcard mechanism
|
||||
// by Carsten Clauss
|
||||
//
|
||||
// [2011-06-27] merged iRCCE_ANY_SOURCE branch with trunk (iRCCE_ANY_LENGTH)
|
||||
//
|
||||
// [2011-08-02] added iRCCE_iprobe() function for probing for incomming messages
|
||||
//
|
||||
// [2011-11-03] added internal push function for non-blocking synchronous send
|
||||
// iRCCE_push_srecv_request() (called by iRCCE_push_recv_request)
|
||||
//
|
||||
|
||||
#include "iRCCE_lib.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__)
|
||||
#include "scc_memcpy.h"
|
||||
#else
|
||||
#define memcpy_scc memcpy
|
||||
#endif
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_srecv_general
|
||||
//--------------------------------------------------------------------------------------
|
||||
// pipelined receive function
|
||||
//--------------------------------------------------------------------------------------
|
||||
static int iRCCE_srecv_general(
|
||||
char *privbuf, // destination buffer in local private memory (receive buffer)
|
||||
t_vcharp combuf, // intermediate buffer in MPB
|
||||
size_t chunk, // size of MPB available for this message (bytes)
|
||||
RCCE_FLAG *ready, // flag indicating whether receiver is ready
|
||||
RCCE_FLAG *sent, // flag indicating whether message has been sent by source
|
||||
ssize_t size, // size of message (bytes)
|
||||
int source, // UE that sent the message
|
||||
int *test // if 1 upon entry, do nonblocking receive; if message available
|
||||
// set to 1, otherwise to 0
|
||||
) {
|
||||
|
||||
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
|
||||
size_t wsize, // offset within receive buffer when pulling in "chunk" bytes
|
||||
remainder, // bytes remaining to be received
|
||||
nbytes; // number of bytes to be received in single iRCCE_get call
|
||||
int first_test; // only use first chunk to determine if message has been received yet
|
||||
char *bufptr; // running pointer inside privbuf for current location
|
||||
size_t subchunk1, subchunk2; // sub-chunks for the pipelined message transfer
|
||||
|
||||
#ifndef _iRCCE_ANY_LENGTH_
|
||||
#define FLAG_SET_VALUE RCCE_FLAG_SET
|
||||
#else
|
||||
RCCE_FLAG_STATUS FLAG_SET_VALUE;
|
||||
|
||||
while (1) {
|
||||
RCCE_flag_read(*sent, &size, RCCE_IAM);
|
||||
if(size!=0) break;
|
||||
}
|
||||
FLAG_SET_VALUE = (RCCE_FLAG_STATUS)size;
|
||||
#endif
|
||||
|
||||
if(iRCCE_recent_source != source) iRCCE_recent_source = source;
|
||||
if(iRCCE_recent_length != size) iRCCE_recent_length = size;
|
||||
|
||||
first_test = 1;
|
||||
|
||||
for (wsize=0; wsize < (size/chunk)*chunk; wsize+=chunk) {
|
||||
|
||||
if (*test && first_test) {
|
||||
first_test = 0;
|
||||
iRCCE_test_flag(*sent, RCCE_FLAG_SET, test);
|
||||
if (!(*test)) return(iRCCE_PENDING);
|
||||
}
|
||||
|
||||
if(wsize == 0) {
|
||||
// allign sub-chunks to cache line granularity:
|
||||
subchunk1 = ( (chunk / 2) / RCCE_LINE_SIZE ) * RCCE_LINE_SIZE;
|
||||
subchunk2 = chunk - subchunk1;
|
||||
}
|
||||
|
||||
bufptr = privbuf + wsize;
|
||||
nbytes = subchunk1;
|
||||
|
||||
RCCE_wait_until(*sent, FLAG_SET_VALUE);
|
||||
RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
RCCE_flag_write(ready, RCCE_FLAG_SET, source);
|
||||
iRCCE_get((t_vcharp)bufptr, combuf, nbytes, source);
|
||||
|
||||
bufptr = privbuf + wsize + subchunk1;
|
||||
nbytes = subchunk2;
|
||||
|
||||
RCCE_wait_until(*sent, FLAG_SET_VALUE);
|
||||
RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
RCCE_flag_write(ready, RCCE_FLAG_SET, source);
|
||||
iRCCE_get((t_vcharp)bufptr, combuf + subchunk1, nbytes, source);
|
||||
}
|
||||
|
||||
remainder = size%chunk;
|
||||
// if nothing is left over, we are done
|
||||
if (!remainder) return(iRCCE_SUCCESS);
|
||||
|
||||
// receive remainder of data--whole cache lines
|
||||
bufptr = privbuf + (size/chunk)*chunk;
|
||||
nbytes = remainder - remainder % RCCE_LINE_SIZE;
|
||||
if (nbytes) {
|
||||
// if function is called in test mode, check if first chunk has been sent already.
|
||||
// If so, proceed as usual. If not, exit immediately
|
||||
if (*test && first_test) {
|
||||
first_test = 0;
|
||||
iRCCE_test_flag(*sent, RCCE_FLAG_SET, test);
|
||||
if (!(*test)) return(iRCCE_PENDING);
|
||||
}
|
||||
|
||||
RCCE_wait_until(*sent, FLAG_SET_VALUE);
|
||||
RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
// copy data from local MPB space to private memory
|
||||
iRCCE_get((t_vcharp)bufptr, combuf, nbytes, source);
|
||||
|
||||
// tell the source I have moved data out of its comm buffer
|
||||
RCCE_flag_write(ready, RCCE_FLAG_SET, source);
|
||||
}
|
||||
|
||||
remainder = remainder % RCCE_LINE_SIZE;
|
||||
if (!remainder) return(iRCCE_SUCCESS);
|
||||
|
||||
// remainder is less than cache line. This must be copied into appropriately sized
|
||||
// intermediate space before exact number of bytes get copied to the final destination
|
||||
bufptr = privbuf + (size/chunk)*chunk + nbytes;
|
||||
nbytes = RCCE_LINE_SIZE;
|
||||
|
||||
// if function is called in test mode, check if first chunk has been sent already.
|
||||
// If so, proceed as usual. If not, exit immediately
|
||||
if (*test && first_test) {
|
||||
first_test = 0;
|
||||
iRCCE_test_flag(*sent, RCCE_FLAG_SET, test);
|
||||
if (!(*test)) return(iRCCE_PENDING);
|
||||
}
|
||||
|
||||
RCCE_wait_until(*sent, FLAG_SET_VALUE);
|
||||
RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
// copy data from local MPB space to private memory
|
||||
iRCCE_get((t_vcharp)padline, combuf, nbytes, source);
|
||||
memcpy_scc(bufptr, padline, remainder);
|
||||
|
||||
// tell the source I have moved data out of its comm buffer
|
||||
RCCE_flag_write(ready, RCCE_FLAG_SET, source);
|
||||
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_srecv
|
||||
//--------------------------------------------------------------------------------------
|
||||
// pipelined recv function (blocking!)
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_srecv(char *privbuf, ssize_t size, int source) {
|
||||
|
||||
int ignore = 0;
|
||||
|
||||
if(size < 0) {
|
||||
#ifdef _iRCCE_ANY_LENGTH_
|
||||
if (size != iRCCE_ANY_LENGTH)
|
||||
#endif
|
||||
{
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
if(size == 0) {
|
||||
// just synchronize:
|
||||
size = 1;
|
||||
privbuf = (char*)&size;
|
||||
}
|
||||
|
||||
// determine source of request if given source = iRCCE_ANY_SOURCE
|
||||
if (source == iRCCE_ANY_SOURCE) {
|
||||
|
||||
// wait for completion of _all_ pending non-blocking requests:
|
||||
iRCCE_irecv_wait(NULL);
|
||||
|
||||
int i, res;
|
||||
for( i=0;;i=(i+1)%RCCE_NP ){
|
||||
iRCCE_test_flag(RCCE_sent_flag[i], RCCE_FLAG_SET, &res);
|
||||
if ( (i != RCCE_IAM) && (res) ) {
|
||||
source = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// wait for completion of pending (ans source-related) non-blocking requests:
|
||||
while(iRCCE_irecv_queue[source] != NULL) {
|
||||
iRCCE_irecv_push();
|
||||
iRCCE_isend_push();
|
||||
}
|
||||
|
||||
#if !defined(SINGLEBITFLAGS) && !defined(RCCE_VERSION)
|
||||
if(size <= iRCCE_MAX_TAGGED_LEN) {
|
||||
#ifndef _iRCCE_ANY_LENGTH_
|
||||
#define FLAG_SET_VALUE RCCE_FLAG_SET
|
||||
#else
|
||||
RCCE_FLAG_STATUS FLAG_SET_VALUE;
|
||||
|
||||
if(size == iRCCE_ANY_LENGTH) {
|
||||
while (1) {
|
||||
RCCE_flag_read(RCCE_sent_flag[source], &size, RCCE_IAM);
|
||||
if(size!=0) break;
|
||||
}
|
||||
}
|
||||
FLAG_SET_VALUE = (RCCE_FLAG_STATUS)size;
|
||||
#endif
|
||||
if(size <= iRCCE_MAX_TAGGED_LEN) {
|
||||
// just wait and then read the tagged flag with payload:
|
||||
iRCCE_wait_tagged(RCCE_sent_flag[source], FLAG_SET_VALUE, privbuf, size);
|
||||
|
||||
RCCE_flag_write(&RCCE_sent_flag[source], RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
RCCE_flag_write(&RCCE_ready_flag[RCCE_IAM], RCCE_FLAG_SET, source);
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (source<0 || source >= RCCE_NP)
|
||||
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
|
||||
else {
|
||||
return(iRCCE_srecv_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
|
||||
&RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source],
|
||||
size, source, &ignore));
|
||||
}
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_probe
|
||||
//--------------------------------------------------------------------------------------
|
||||
// probe for incomming messages (blocking / does not receive)
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_probe(int source, int* test_rank)
|
||||
{
|
||||
// determine source of request if given source = iRCCE_ANY_SOURCE
|
||||
if (source == iRCCE_ANY_SOURCE) {
|
||||
|
||||
// wait for completion of _all_ pending non-blocking requests:
|
||||
iRCCE_irecv_wait(NULL);
|
||||
|
||||
int i, res;
|
||||
for( i=0;;i=(i+1)%RCCE_NP ){
|
||||
iRCCE_test_flag(RCCE_sent_flag[i], RCCE_FLAG_SET, &res);
|
||||
if ( (i != RCCE_IAM) && (res) ) {
|
||||
source = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
int res;
|
||||
do {
|
||||
iRCCE_test_flag(RCCE_sent_flag[source], RCCE_FLAG_SET, &res);
|
||||
}
|
||||
while(!res);
|
||||
}
|
||||
|
||||
if (test_rank != NULL) {
|
||||
(*test_rank) = source;
|
||||
}
|
||||
|
||||
#ifdef _iRCCE_ANY_LENGTH_
|
||||
{
|
||||
int size;
|
||||
RCCE_flag_read(RCCE_sent_flag[source], &size, RCCE_IAM);
|
||||
if(iRCCE_recent_length != size) iRCCE_recent_length = size;
|
||||
}
|
||||
#endif
|
||||
if(iRCCE_recent_source != source) iRCCE_recent_source = source;
|
||||
|
||||
return iRCCE_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_recv
|
||||
//--------------------------------------------------------------------------------------
|
||||
// pipelined recv function (non-blocking / analogous to RCCE_recv_test fuction)
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_srecv_test(char *privbuf, ssize_t size, int source, int *test) {
|
||||
|
||||
if(test == NULL) return iRCCE_recv(privbuf, size, source);
|
||||
|
||||
if(size <= 0) {
|
||||
#ifdef _iRCCE_ANY_LENGTH_
|
||||
if(size != iRCCE_ANY_LENGTH)
|
||||
#endif
|
||||
{
|
||||
(*test) = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
// determine source of request if given source = iRCCE_ANY_SOURCE
|
||||
if (source == iRCCE_ANY_SOURCE) {
|
||||
|
||||
// check whether there are still pending non-blocking receive requests:
|
||||
if(iRCCE_irecv_push() != iRCCE_SUCCESS) {
|
||||
(*test) = 0;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
|
||||
int i, res;
|
||||
for( i=0; i<RCCE_NP; i++){
|
||||
iRCCE_test_flag(RCCE_sent_flag[i], RCCE_FLAG_SET, &res);
|
||||
if ( (i != RCCE_IAM) && (res) ) {
|
||||
source = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (source == iRCCE_ANY_SOURCE) {
|
||||
// currently, there is no message available (from any source):
|
||||
(*test) = 0;
|
||||
return (iRCCE_PENDING);
|
||||
}
|
||||
|
||||
|
||||
if(iRCCE_irecv_queue[source] != NULL) {
|
||||
|
||||
// push pending non-blocking requests
|
||||
iRCCE_irecv_push();
|
||||
iRCCE_isend_push();
|
||||
|
||||
if(iRCCE_irecv_queue[source] != NULL) {
|
||||
(*test) = 0;
|
||||
return (iRCCE_PENDING);
|
||||
}
|
||||
}
|
||||
|
||||
if (source<0 || source >= RCCE_NP)
|
||||
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
|
||||
else {
|
||||
(*test) = 1;
|
||||
return(iRCCE_srecv_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
|
||||
&RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source],
|
||||
size, source, test));
|
||||
}
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_push_srecv_request
|
||||
//--------------------------------------------------------------------------------------
|
||||
// pipelined push for recv function (non-blocking and stricly synchronous!)
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_push_srecv_request(iRCCE_RECV_REQUEST *request) {
|
||||
|
||||
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
|
||||
int test; // flag for calling iRCCE_test_flag()
|
||||
|
||||
if(request->finished) return(iRCCE_SUCCESS);
|
||||
|
||||
if(request->label == 1) goto label1;
|
||||
if(request->label == 2) goto label2;
|
||||
if(request->label == 3) goto label3;
|
||||
if(request->label == 4) goto label4;
|
||||
|
||||
#ifdef _iRCCE_ANY_LENGTH_
|
||||
RCCE_flag_read(*(request->sent), &(request->flag_set_value), RCCE_IAM);
|
||||
if(request->flag_set_value == 0) {
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
request->size = (size_t)request->flag_set_value;
|
||||
#endif
|
||||
|
||||
// receive data in units of available chunk size of MPB
|
||||
for (; request->wsize < (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) {
|
||||
|
||||
request->bufptr = request->privbuf + request->wsize;
|
||||
request->nbytes = request->subchunk1;
|
||||
label1:
|
||||
iRCCE_test_flag(*(request->sent), request->flag_set_value, &test);
|
||||
if(!test) {
|
||||
request->label = 1;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
request->started = 1;
|
||||
|
||||
RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source);
|
||||
iRCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, request->source);
|
||||
|
||||
request->bufptr = request->privbuf + request->wsize + request->subchunk1;
|
||||
request->nbytes = request->subchunk2;
|
||||
|
||||
label2:
|
||||
iRCCE_test_flag(*(request->sent), request->flag_set_value, &test);
|
||||
if(!test) {
|
||||
request->label = 2;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
|
||||
RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source);
|
||||
iRCCE_get((t_vcharp)request->bufptr, request->combuf + request->subchunk1, request->nbytes, request->source);
|
||||
}
|
||||
|
||||
request->remainder = request->size % request->chunk;
|
||||
// if nothing is left over, we are done
|
||||
if (!request->remainder) {
|
||||
if(iRCCE_recent_source != request->source) iRCCE_recent_source = request->source;
|
||||
if(iRCCE_recent_length != request->size) iRCCE_recent_length = request->size;
|
||||
request->finished = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
// receive remainder of data--whole cache lines
|
||||
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk;
|
||||
request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE;
|
||||
if (request->nbytes) {
|
||||
label3:
|
||||
iRCCE_test_flag(*(request->sent), request->flag_set_value, &test);
|
||||
if(!test) {
|
||||
request->label = 3;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
request->started = 1;
|
||||
|
||||
RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
// copy data from source's MPB space to private memory
|
||||
iRCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, request->source);
|
||||
|
||||
// tell the source I have moved data out of its comm buffer
|
||||
RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source);
|
||||
}
|
||||
|
||||
request->remainder = request->size % request->chunk;
|
||||
request->remainder = request->remainder % RCCE_LINE_SIZE;
|
||||
if (!request->remainder) {
|
||||
if(iRCCE_recent_source != request->source) iRCCE_recent_source = request->source;
|
||||
if(iRCCE_recent_length != request->size) iRCCE_recent_length = request->size;
|
||||
request->finished = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
// remainder is less than cache line. This must be copied into appropriately sized
|
||||
// intermediate space before exact number of bytes get copied to the final destination
|
||||
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes;
|
||||
request->nbytes = RCCE_LINE_SIZE;
|
||||
label4:
|
||||
iRCCE_test_flag(*(request->sent), request->flag_set_value, &test);
|
||||
if(!test) {
|
||||
request->label = 4;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
request->started = 1;
|
||||
|
||||
RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
// copy data from source's MPB space to private memory
|
||||
iRCCE_get((t_vcharp)padline, request->combuf, request->nbytes, request->source);
|
||||
memcpy_scc(request->bufptr,padline,request->remainder);
|
||||
|
||||
// tell the source I have moved data out of its comm buffer
|
||||
RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source);
|
||||
|
||||
if(iRCCE_recent_source != request->source) iRCCE_recent_source = request->source;
|
||||
if(iRCCE_recent_length != request->size) iRCCE_recent_length = request->size;
|
||||
request->finished = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
282
hermit/usr/ircce/iRCCE_ssend.c
Normal file
282
hermit/usr/ircce/iRCCE_ssend.c
Normal file
|
@ -0,0 +1,282 @@
|
|||
//***************************************************************************************
|
||||
// Synchronized receive routines.
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Author: Rob F. Van der Wijngaart
|
||||
// Intel Corporation
|
||||
// Date: 008/30/2010
|
||||
//
|
||||
//***************************************************************************************
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// [2010-11-26] added a _pipelined_ version of blocking send/recv
|
||||
// by Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
// [2011-05-31] added iRCCE_ANY_LENGTH wildcard mechanism
|
||||
// by Carsten Clauss
|
||||
//
|
||||
// [2011-11-03] added internal push function for non-blocking synchronous send
|
||||
// iRCCE_push_ssend_request() (called by iRCCE_push_send_request)
|
||||
//
|
||||
|
||||
#include "iRCCE_lib.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#if (defined COPPERRIDGE || defined SCC) && !defined(__hermit__)
|
||||
#include "scc_memcpy.h"
|
||||
#else
|
||||
#define memcpy_scc memcpy
|
||||
#endif
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_ssend_general
|
||||
//--------------------------------------------------------------------------------------
|
||||
// pipelined send function
|
||||
//--------------------------------------------------------------------------------------
|
||||
static int iRCCE_ssend_general(
|
||||
char *privbuf, // source buffer in local private memory (send buffer)
|
||||
t_vcharp combuf, // intermediate buffer in MPB
|
||||
size_t chunk, // size of MPB available for this message (bytes)
|
||||
RCCE_FLAG *ready, // flag indicating whether receiver is ready
|
||||
RCCE_FLAG *sent, // flag indicating whether message has been sent by source
|
||||
ssize_t size, // size of message (bytes)
|
||||
int dest // UE that will receive the message
|
||||
) {
|
||||
|
||||
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
|
||||
size_t wsize, // offset within send buffer when putting in "chunk" bytes
|
||||
remainder, // bytes remaining to be sent
|
||||
nbytes; // number of bytes to be sent in single iRCCE_put call
|
||||
char *bufptr; // running pointer inside privbuf for current location
|
||||
size_t subchunk1, subchunk2; // sub-chunks for the pipelined message transfer
|
||||
|
||||
#ifndef _iRCCE_ANY_LENGTH_
|
||||
#define FLAG_SET_VALUE RCCE_FLAG_SET
|
||||
#else
|
||||
RCCE_FLAG_STATUS FLAG_SET_VALUE = (RCCE_FLAG_STATUS)size;
|
||||
#endif
|
||||
|
||||
for (wsize = 0; wsize < (size/chunk)*chunk; wsize+=chunk) {
|
||||
|
||||
if(wsize == 0) {
|
||||
// allign sub-chunks to cache line granularity:
|
||||
subchunk1 = ( (chunk / 2) / RCCE_LINE_SIZE ) * RCCE_LINE_SIZE;
|
||||
subchunk2 = chunk - subchunk1;
|
||||
}
|
||||
|
||||
bufptr = privbuf + wsize;
|
||||
nbytes = subchunk1;
|
||||
|
||||
iRCCE_put(combuf, (t_vcharp) bufptr, nbytes, RCCE_IAM);
|
||||
|
||||
RCCE_flag_write(sent, FLAG_SET_VALUE, dest);
|
||||
|
||||
RCCE_wait_until(*ready, RCCE_FLAG_SET);
|
||||
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
bufptr = privbuf + wsize + subchunk1;
|
||||
nbytes = subchunk2;
|
||||
|
||||
iRCCE_put(combuf + subchunk1, (t_vcharp) bufptr, nbytes, RCCE_IAM);
|
||||
|
||||
RCCE_flag_write(sent, FLAG_SET_VALUE, dest);
|
||||
|
||||
RCCE_wait_until(*ready, RCCE_FLAG_SET);
|
||||
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
}
|
||||
|
||||
remainder = size%chunk;
|
||||
// if nothing is left over, we are done
|
||||
if (!remainder) return(iRCCE_SUCCESS);
|
||||
|
||||
// send remainder of data--whole cache lines
|
||||
bufptr = privbuf + (size/chunk)*chunk;
|
||||
nbytes = remainder - remainder%RCCE_LINE_SIZE;
|
||||
if (nbytes) {
|
||||
// copy private data to own comm buffer
|
||||
iRCCE_put(combuf, (t_vcharp)bufptr, nbytes, RCCE_IAM);
|
||||
RCCE_flag_write(sent, FLAG_SET_VALUE, dest);
|
||||
// wait for the destination to be ready to receive a message
|
||||
RCCE_wait_until(*ready, RCCE_FLAG_SET);
|
||||
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
}
|
||||
|
||||
remainder = remainder%RCCE_LINE_SIZE;
|
||||
if (!remainder) return(iRCCE_SUCCESS);
|
||||
|
||||
// remainder is less than a cache line. This must be copied into appropriately sized
|
||||
// intermediate space before it can be sent to the receiver
|
||||
bufptr = privbuf + (size/chunk)*chunk + nbytes;
|
||||
nbytes = RCCE_LINE_SIZE;
|
||||
|
||||
// copy private data to own comm buffer
|
||||
memcpy_scc(padline, bufptr, remainder);
|
||||
iRCCE_put(combuf, (t_vcharp)padline, nbytes, RCCE_IAM);
|
||||
RCCE_flag_write(sent, FLAG_SET_VALUE, dest);
|
||||
|
||||
// wait for the destination to be ready to receive a message
|
||||
RCCE_wait_until(*ready, RCCE_FLAG_SET);
|
||||
RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_ssend
|
||||
//--------------------------------------------------------------------------------------
|
||||
// pipelined send function (blocking and synchronous!)
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_ssend(char *privbuf, ssize_t size, int dest) {
|
||||
|
||||
if(size < 0) return(iRCCE_SUCCESS);
|
||||
|
||||
if(size == 0) {
|
||||
// just synchronize:
|
||||
size = 1;
|
||||
privbuf = (char*)&size;
|
||||
}
|
||||
|
||||
while(iRCCE_isend_queue != NULL) {
|
||||
|
||||
// wait for completion of pending non-blocking requests
|
||||
iRCCE_isend_push();
|
||||
iRCCE_irecv_push();
|
||||
}
|
||||
|
||||
#if !defined(SINGLEBITFLAGS) && !defined(RCCE_VERSION)
|
||||
if(size <= iRCCE_MAX_TAGGED_LEN) {
|
||||
// just write the tagged 'sent' flag (with payload) and wait for 'ready' flag:
|
||||
iRCCE_flag_write_tagged(&RCCE_sent_flag[RCCE_IAM], (RCCE_FLAG_STATUS)size, dest, privbuf, size);
|
||||
|
||||
RCCE_wait_until(RCCE_ready_flag[dest], RCCE_FLAG_SET);
|
||||
RCCE_flag_write(&RCCE_ready_flag[dest], RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (dest<0 || dest >= RCCE_NP)
|
||||
return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
|
||||
else
|
||||
return(iRCCE_ssend_general(privbuf, RCCE_buff_ptr, RCCE_chunk,
|
||||
&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM],
|
||||
size, dest));
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_push_ssend_request
|
||||
//--------------------------------------------------------------------------------------
|
||||
// pipelined push for send function (non-blocking and stricly synchronous!)
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_push_ssend_request(iRCCE_SEND_REQUEST *request) {
|
||||
|
||||
char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
|
||||
int test; // flag for calling iRCCE_test_flag()
|
||||
|
||||
if(request->finished) return(iRCCE_SUCCESS);
|
||||
|
||||
if(request->label == 1) goto label1;
|
||||
if(request->label == 2) goto label2;
|
||||
if(request->label == 3) goto label3;
|
||||
if(request->label == 4) goto label4;
|
||||
|
||||
// send data in units of available chunk size of comm buffer
|
||||
for (request->wsize = 0; request->wsize < (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) {
|
||||
|
||||
request->bufptr = request->privbuf + request->wsize;
|
||||
request->nbytes = request->subchunk1;
|
||||
|
||||
iRCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, RCCE_IAM);
|
||||
RCCE_flag_write(request->sent, request->flag_set_value, request->dest);
|
||||
label1:
|
||||
iRCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
|
||||
if(!test) {
|
||||
request->label = 1;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
request->bufptr = request->privbuf + request->wsize + request->subchunk1;
|
||||
request->nbytes = request->subchunk2;
|
||||
|
||||
iRCCE_put(request->combuf + request->subchunk1, (t_vcharp) request->bufptr, request->nbytes, RCCE_IAM);
|
||||
RCCE_flag_write(request->sent, request->flag_set_value, request->dest);
|
||||
label2:
|
||||
iRCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
|
||||
if(!test) {
|
||||
request->label = 2;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
}
|
||||
|
||||
request->remainder = request->size % request->chunk;
|
||||
// if nothing is left over, we are done
|
||||
if (!request->remainder) {
|
||||
request->finished = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
// send remainder of data--whole cache lines
|
||||
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk;
|
||||
request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE;
|
||||
if (request->nbytes) {
|
||||
// copy private data to own comm buffer
|
||||
iRCCE_put(request->combuf, (t_vcharp)request->bufptr, request->nbytes, RCCE_IAM);
|
||||
RCCE_flag_write(request->sent, request->flag_set_value, request->dest);
|
||||
// wait for the destination to be ready to receive a message
|
||||
label3:
|
||||
iRCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
|
||||
if(!test) {
|
||||
request->label = 3;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
}
|
||||
|
||||
request->remainder = request->size % request->chunk;
|
||||
request->remainder = request->remainder%RCCE_LINE_SIZE;
|
||||
// if nothing is left over, we are done
|
||||
if (!request->remainder)
|
||||
{
|
||||
request->finished = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
// remainder is less than a cache line. This must be copied into appropriately sized
|
||||
// intermediate space before it can be sent to the receiver
|
||||
request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes;
|
||||
request->nbytes = RCCE_LINE_SIZE;
|
||||
// copy private data to own comm buffer
|
||||
memcpy(padline,request->bufptr,request->remainder);
|
||||
iRCCE_put(request->combuf, (t_vcharp)padline, request->nbytes, RCCE_IAM);
|
||||
RCCE_flag_write(request->sent, request->flag_set_value, request->dest);
|
||||
// wait for the destination to be ready to receive a message
|
||||
label4:
|
||||
iRCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
|
||||
if(!test) {
|
||||
request->label = 4;
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
|
||||
|
||||
request->finished = 1;
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
279
hermit/usr/ircce/iRCCE_synch.c
Normal file
279
hermit/usr/ircce/iRCCE_synch.c
Normal file
|
@ -0,0 +1,279 @@
|
|||
///*************************************************************************************
|
||||
// Synchronization functions.
|
||||
// Single-bit and whole-cache-line flags are sufficiently different that we provide
|
||||
// separate implementations of the synchronization routines for each case
|
||||
//**************************************************************************************
|
||||
//
|
||||
// Author: Rob F. Van der Wijngaart
|
||||
// Intel Corporation
|
||||
// Date: 008/30/2010
|
||||
//
|
||||
//**************************************************************************************
|
||||
//
|
||||
// Copyright 2010 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// [2010-10-25] added support for non-blocking send/recv operations
|
||||
// - iRCCE_isend(), ..._test(), ..._wait(), ..._push()
|
||||
// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push()
|
||||
// by Carsten Clauss, Chair for Operating Systems,
|
||||
// RWTH Aachen University
|
||||
//
|
||||
// [2010-11-12] extracted non-blocking code into separate library
|
||||
// by Carsten Scholtes
|
||||
//
|
||||
// [2011-01-21] updated the datatype of RCCE_FLAG according to the
|
||||
// recent version of RCCE
|
||||
//
|
||||
// [2011-04-12] added marco test for rcce version
|
||||
//
|
||||
// [2012-11-06] add barrier implementation as described in:
|
||||
// USENIX HotPar'12 Eval. Hardw. Synch. Supp. SCC
|
||||
// by Pablo Reble
|
||||
//
|
||||
#include "iRCCE_lib.h"
|
||||
|
||||
#ifdef SINGLEBITFLAGS
|
||||
#warning iRCCE_TAGGED_FLAGS: for using this feature, SINGLEBITFLAGS must be disabled! (make SINGLEBITFLAGS=0)
|
||||
#endif
|
||||
|
||||
#ifdef SINGLEBITFLAGS
|
||||
|
||||
int iRCCE_test_flag(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result) {
|
||||
|
||||
t_vcharp cflag;
|
||||
|
||||
#ifdef RCCE_VERSION
|
||||
// this is a newer version than V1.0.13
|
||||
t_vcharp flaga;
|
||||
#endif
|
||||
|
||||
cflag = flag.line_address;
|
||||
|
||||
#ifdef RCCE_VERSION
|
||||
// this is a newer version than V1.0.13
|
||||
flaga = flag.flag_addr;
|
||||
#endif
|
||||
|
||||
// always flush/invalidate to ensure we read the most recent value of *flag
|
||||
// keep reading it until it has the required value
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp flush
|
||||
#endif
|
||||
RC_cache_invalidate();
|
||||
|
||||
#ifdef RCCE_VERSION
|
||||
// this is a newer version than V1.0.13
|
||||
if(RCCE_bit_value(flaga, (flag.location)%RCCE_FLAGS_PER_BYTE) != val) {
|
||||
#else
|
||||
if(RCCE_bit_value(cflag, flag.location) != val) {
|
||||
#endif
|
||||
(*result) = 0;
|
||||
}
|
||||
else {
|
||||
(*result) = 1;
|
||||
}
|
||||
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// LOCKLESS SYNCHRONIZATION USING ONE WHOLE CACHE LINE PER FLAG //
|
||||
//////////////////////////////////////////////////////////////////
|
||||
|
||||
int iRCCE_test_flag(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result) {
|
||||
|
||||
#ifndef RCCE_VERSION
|
||||
RCCE_FLAG flag_pos = flag;
|
||||
#endif
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp flush
|
||||
#endif
|
||||
|
||||
RC_cache_invalidate();
|
||||
|
||||
#ifdef RCCE_VERSION
|
||||
if((RCCE_FLAG_STATUS)(*flag.flag_addr) != val) {
|
||||
#else
|
||||
if((*flag_pos) != val) {
|
||||
#endif
|
||||
(*result) = 0;
|
||||
}
|
||||
else {
|
||||
(*result) = 1;
|
||||
}
|
||||
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// FUNCTIONS FOR HANDLING TAGGED FLAGS (NEED WHOLE CACHE LINE PER FLAG) //
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int iRCCE_flag_alloc_tagged(RCCE_FLAG *flag)
|
||||
{
|
||||
#ifdef RCCE_VERSION
|
||||
// this is a newer version than V1.0.13
|
||||
flag->flag_addr = RCCE_malloc(RCCE_LINE_SIZE);
|
||||
if (!(flag->flag_addr)) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_UNDEFINED));
|
||||
return(RCCE_SUCCESS);
|
||||
#else
|
||||
return RCCE_flag_alloc(flag);
|
||||
#endif
|
||||
}
|
||||
|
||||
int iRCCE_flag_write_tagged(RCCE_FLAG *flag, RCCE_FLAG_STATUS val, int ID, void *tag, int len) {
|
||||
|
||||
unsigned char val_array[RCCE_LINE_SIZE] =
|
||||
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
||||
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
||||
|
||||
int error, i, j;
|
||||
|
||||
*(int *) val_array = val;
|
||||
#ifdef _OPENMP
|
||||
*(int *) &val_array[RCCE_LINE_SIZE-sizeof(int)] = val;
|
||||
#endif
|
||||
|
||||
if(tag)
|
||||
{
|
||||
if(len > iRCCE_MAX_TAGGED_LEN) len = iRCCE_MAX_TAGGED_LEN;
|
||||
iRCCE_memcpy_put(&val_array[sizeof(int)], tag, len);
|
||||
}
|
||||
|
||||
#ifdef RCCE_VERSION
|
||||
error = iRCCE_put(flag->flag_addr, val_array, RCCE_LINE_SIZE, ID);
|
||||
#else
|
||||
error = iRCCE_put((t_vcharp)(*flag), val_array, RCCE_LINE_SIZE, ID);
|
||||
#endif
|
||||
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
}
|
||||
|
||||
int iRCCE_flag_read_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS *val, int ID, void *tag, int len) {
|
||||
|
||||
unsigned char val_array[RCCE_LINE_SIZE];
|
||||
int error, i, j;
|
||||
|
||||
#ifdef RCCE_VERSION
|
||||
if(error=iRCCE_get(val_array, flag.flag_addr, RCCE_LINE_SIZE, ID))
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
#else
|
||||
if(error=iRCCE_get(val_array, (t_vcharp)flag, RCCE_LINE_SIZE, ID))
|
||||
return(RCCE_error_return(RCCE_debug_synch,error));
|
||||
#endif
|
||||
|
||||
if(val) *val = *(int *)val_array;
|
||||
|
||||
#ifdef _OPENMP
|
||||
if(val) *val = *(int *)&val_array[RCCE_LINE_SIZE-sizeof(int)];
|
||||
#endif
|
||||
|
||||
if( (val) && (*val) && (tag) ) {
|
||||
if(len > iRCCE_MAX_TAGGED_LEN) len = iRCCE_MAX_TAGGED_LEN;
|
||||
iRCCE_memcpy_put(tag, &val_array[sizeof(int)], len);
|
||||
}
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
int iRCCE_wait_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS val, void *tag, int len) {
|
||||
|
||||
int i, j;
|
||||
|
||||
#ifndef RCCE_VERSION
|
||||
RCCE_FLAG flag_pos = flag;
|
||||
#ifdef _OPENMP
|
||||
flag_pos = flag + RCCE_LINE_SIZE / sizeof(int) - 1;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
do {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp flush
|
||||
#endif
|
||||
RC_cache_invalidate();
|
||||
#ifdef RCCE_VERSION
|
||||
// this is a newer version than V1.0.13
|
||||
#ifdef _OPENMP
|
||||
} while ((RCCE_FLAG_STATUS)(*( ((int*)flag.flag_addr) + RCCE_LINE_SIZE / sizeof(int) - 1)) != val);
|
||||
#else
|
||||
} while ((RCCE_FLAG_STATUS)(*flag.flag_addr) != val);
|
||||
#endif
|
||||
#else
|
||||
} while ((*flag_pos) != val);
|
||||
#endif
|
||||
|
||||
if(tag) {
|
||||
if(len > iRCCE_MAX_TAGGED_LEN) len = iRCCE_MAX_TAGGED_LEN;
|
||||
#ifdef RCCE_VERSION
|
||||
iRCCE_memcpy_put(tag, &((char*)flag.flag_addr)[sizeof(int)], len);
|
||||
#else
|
||||
iRCCE_memcpy_put(tag, &((char*)flag)[sizeof(int)], len);
|
||||
#endif
|
||||
}
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
int iRCCE_test_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result, void *tag, int len) {
|
||||
|
||||
int i, j;
|
||||
|
||||
#ifndef RCCE_VERSION
|
||||
RCCE_FLAG flag_pos = flag;
|
||||
#ifdef _OPENMP
|
||||
flag_pos = flag + RCCE_LINE_SIZE / sizeof(int) - 1;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp flush
|
||||
#endif
|
||||
|
||||
RC_cache_invalidate();
|
||||
|
||||
#ifdef RCCE_VERSION
|
||||
if((RCCE_FLAG_STATUS)(*flag.flag_addr) != val) {
|
||||
#else
|
||||
if((*flag_pos) != val) {
|
||||
#endif
|
||||
(*result) = 0;
|
||||
}
|
||||
else {
|
||||
(*result) = 1;
|
||||
}
|
||||
|
||||
if((*result) && tag) {
|
||||
if(len > iRCCE_MAX_TAGGED_LEN) len = iRCCE_MAX_TAGGED_LEN;
|
||||
#ifdef RCCE_VERSION
|
||||
iRCCE_memcpy_put(tag, &((char*)flag.flag_addr)[sizeof(int)], len);
|
||||
#else
|
||||
iRCCE_memcpy_put(tag, &((char*)flag)[sizeof(int)], len);
|
||||
#endif
|
||||
}
|
||||
|
||||
return(RCCE_SUCCESS);
|
||||
}
|
||||
|
||||
int iRCCE_get_max_tagged_len(void)
|
||||
{
|
||||
return iRCCE_MAX_TAGGED_LEN;
|
||||
}
|
||||
#endif
|
324
hermit/usr/ircce/iRCCE_waitlist.c
Normal file
324
hermit/usr/ircce/iRCCE_waitlist.c
Normal file
|
@ -0,0 +1,324 @@
|
|||
/****************************************************************************************
|
||||
* Functions for a convenient handling of multiple outstanding non-blocking requests
|
||||
****************************************************************************************
|
||||
*
|
||||
* Authors: Jacek Galowicz, Carsten Clauss
|
||||
* Chair for Operating Systems, RWTH Aachen University
|
||||
* Date: 2010-12-09
|
||||
*
|
||||
****************************************************************************************
|
||||
*
|
||||
* Copyright 2010 Jacek Galowicz, Chair for Operating Systems,
|
||||
* RWTH Aachen University
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "iRCCE_lib.h"
|
||||
|
||||
void iRCCE_init_wait_list(iRCCE_WAIT_LIST *list)
|
||||
{
|
||||
list->first = NULL;
|
||||
list->last = NULL;
|
||||
}
|
||||
|
||||
static void iRCCE_add_wait_list_generic(iRCCE_WAIT_LIST *list, iRCCE_WAIT_LISTELEM * elem)
|
||||
{
|
||||
if (list->first == NULL) {
|
||||
list->first = elem;
|
||||
list->last = elem;
|
||||
return;
|
||||
}
|
||||
|
||||
list->last->next = elem;
|
||||
list->last = elem;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_add_recv_to_wait_list
|
||||
//--------------------------------------------------------------------------------------
|
||||
// Function for adding Send requests to the waitall-queue
|
||||
//--------------------------------------------------------------------------------------
|
||||
void iRCCE_add_send_to_wait_list(iRCCE_WAIT_LIST *list, iRCCE_SEND_REQUEST * req)
|
||||
{
|
||||
iRCCE_WAIT_LISTELEM *elem;
|
||||
elem = (iRCCE_WAIT_LISTELEM*)malloc(sizeof(iRCCE_WAIT_LISTELEM));
|
||||
|
||||
elem->type = iRCCE_WAIT_LIST_SEND_TYPE;
|
||||
elem->next = NULL;
|
||||
elem->req = (void*)req;
|
||||
iRCCE_add_wait_list_generic(list, elem);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_add_send_to_wait_list
|
||||
//--------------------------------------------------------------------------------------
|
||||
// Function for adding Recv requests to the waitall-queue
|
||||
//--------------------------------------------------------------------------------------
|
||||
void iRCCE_add_recv_to_wait_list(iRCCE_WAIT_LIST *list, iRCCE_RECV_REQUEST * req)
|
||||
{
|
||||
iRCCE_WAIT_LISTELEM *elem;
|
||||
elem = (iRCCE_WAIT_LISTELEM*)malloc(sizeof(iRCCE_WAIT_LISTELEM));
|
||||
|
||||
elem->type = iRCCE_WAIT_LIST_RECV_TYPE;
|
||||
elem->next = NULL;
|
||||
elem->req = (void*)req;
|
||||
iRCCE_add_wait_list_generic(list, elem);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_add_to_wait_list
|
||||
//--------------------------------------------------------------------------------------
|
||||
// Function for adding Send and/or Recv requests to the waitall-queue
|
||||
//--------------------------------------------------------------------------------------
|
||||
void iRCCE_add_to_wait_list(iRCCE_WAIT_LIST *list, iRCCE_SEND_REQUEST * send_req, iRCCE_RECV_REQUEST * recv_req)
|
||||
{
|
||||
if (send_req != NULL) iRCCE_add_send_to_wait_list(list, send_req);
|
||||
if (recv_req != NULL) iRCCE_add_recv_to_wait_list(list, recv_req);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_wait_all
|
||||
//--------------------------------------------------------------------------------------
|
||||
// Blocking wait for completion of all enqueued send and recv calls
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_wait_all(iRCCE_WAIT_LIST *list)
|
||||
{
|
||||
while(iRCCE_test_all(list, NULL) != iRCCE_SUCCESS) ;
|
||||
|
||||
return iRCCE_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_test_all
|
||||
//--------------------------------------------------------------------------------------
|
||||
// Nonblocking test for completion of all enqueued send and recv calls
|
||||
// Just provide NULL instead of testvar if you don't need it
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_test_all(iRCCE_WAIT_LIST *list, int *test)
|
||||
{
|
||||
int retval = iRCCE_SUCCESS;
|
||||
int req_state;
|
||||
iRCCE_WAIT_LISTELEM *pElem;
|
||||
iRCCE_WAIT_LISTELEM *pLastElem;
|
||||
iRCCE_WAIT_LISTELEM *pTemp;
|
||||
pLastElem = NULL;
|
||||
pElem = list->first;
|
||||
|
||||
while (pElem != NULL) {
|
||||
if (pElem->type == iRCCE_WAIT_LIST_SEND_TYPE)
|
||||
req_state = iRCCE_isend_test((iRCCE_SEND_REQUEST*)pElem->req, NULL);
|
||||
else
|
||||
req_state = iRCCE_irecv_test((iRCCE_RECV_REQUEST*)pElem->req, NULL);
|
||||
|
||||
if (req_state == iRCCE_SUCCESS) {
|
||||
// Remove this element from the list
|
||||
if (pElem == list->first) {
|
||||
list->first = pElem->next;
|
||||
}
|
||||
else if (pElem == list->last) {
|
||||
list->last = pLastElem;
|
||||
pLastElem->next = NULL;
|
||||
}
|
||||
else {
|
||||
pLastElem->next = pElem->next;
|
||||
}
|
||||
|
||||
pTemp = pElem->next;
|
||||
free(pElem);
|
||||
pElem = pTemp;
|
||||
}
|
||||
else {
|
||||
retval = iRCCE_PENDING;
|
||||
|
||||
pLastElem = pElem;
|
||||
pElem = pElem->next;
|
||||
}
|
||||
}
|
||||
|
||||
if (test) {
|
||||
if (retval == iRCCE_SUCCESS) {
|
||||
(*test) = 1;
|
||||
}
|
||||
else {
|
||||
(*test) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_wait_any
|
||||
//--------------------------------------------------------------------------------------
|
||||
// Blocking wait for completion of any enqueued send and recv request
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_wait_any(iRCCE_WAIT_LIST *list, iRCCE_SEND_REQUEST ** send_request, iRCCE_RECV_REQUEST ** recv_request)
|
||||
{
|
||||
while(iRCCE_test_any(list, send_request, recv_request) != iRCCE_SUCCESS) ;
|
||||
|
||||
return iRCCE_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTION: iRCCE_test_any
|
||||
//--------------------------------------------------------------------------------------
|
||||
// Nonblocking test for completion of any enqueued send or recv request
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_test_any(iRCCE_WAIT_LIST *list, iRCCE_SEND_REQUEST ** send_request, iRCCE_RECV_REQUEST ** recv_request)
|
||||
{
|
||||
int req_state;
|
||||
|
||||
iRCCE_WAIT_LISTELEM *pElem;
|
||||
iRCCE_WAIT_LISTELEM *pLastElem;
|
||||
iRCCE_WAIT_LISTELEM *pTemp;
|
||||
pLastElem = NULL;
|
||||
pElem = list->first;
|
||||
|
||||
while (pElem != NULL) {
|
||||
if (pElem->type == iRCCE_WAIT_LIST_SEND_TYPE)
|
||||
req_state = iRCCE_isend_test((iRCCE_SEND_REQUEST*)pElem->req, NULL);
|
||||
else
|
||||
req_state = iRCCE_irecv_test((iRCCE_RECV_REQUEST*)pElem->req, NULL);
|
||||
|
||||
if (req_state == iRCCE_SUCCESS) {
|
||||
// Remove this element from the list
|
||||
if (pElem == list->first) {
|
||||
list->first = pElem->next;
|
||||
}
|
||||
else if (pElem == list->last) {
|
||||
list->last = pLastElem;
|
||||
pLastElem->next = NULL;
|
||||
}
|
||||
else {
|
||||
pLastElem->next = pElem->next;
|
||||
}
|
||||
|
||||
if (pElem->type == iRCCE_WAIT_LIST_SEND_TYPE) {
|
||||
if(send_request) {
|
||||
(*send_request) = (iRCCE_SEND_REQUEST*)pElem->req;
|
||||
}
|
||||
if(recv_request) {
|
||||
(*recv_request) = NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(send_request) {
|
||||
(*send_request) = NULL;
|
||||
}
|
||||
if(recv_request) {
|
||||
(*recv_request) = (iRCCE_RECV_REQUEST*)pElem->req;
|
||||
}
|
||||
}
|
||||
|
||||
pTemp = pElem->next;
|
||||
free(pElem);
|
||||
pElem = pTemp;
|
||||
|
||||
return iRCCE_SUCCESS;
|
||||
}
|
||||
else {
|
||||
pLastElem = pElem;
|
||||
pElem = pElem->next;
|
||||
}
|
||||
}
|
||||
|
||||
if(send_request) {
|
||||
(*send_request) = NULL;
|
||||
}
|
||||
if(recv_request) {
|
||||
(*recv_request) = NULL;
|
||||
}
|
||||
|
||||
return iRCCE_PENDING;
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// FUNCTIONS: iRCCE_get_dest, iRCCE_get_source, iRCCE_get_length, iRCCE_get_status
|
||||
//--------------------------------------------------------------------------------------
|
||||
// Functions to determine the respective sender/receiver after test_any() / wait_any()
|
||||
// (Can also be used after receiving a message via wildcard mechanism!)
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_get_dest(iRCCE_SEND_REQUEST *request)
|
||||
{
|
||||
if(request != NULL) return request->dest;
|
||||
|
||||
return iRCCE_ERROR;
|
||||
}
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_get_source(iRCCE_RECV_REQUEST *request)
|
||||
{
|
||||
if(request != NULL) return request->source;
|
||||
|
||||
return iRCCE_recent_source;
|
||||
}
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_get_size(iRCCE_SEND_REQUEST * send_req, iRCCE_RECV_REQUEST * recv_req)
|
||||
{
|
||||
if(send_req != NULL) return send_req->size;
|
||||
if(recv_req != NULL) return recv_req->size;
|
||||
|
||||
return iRCCE_recent_length;
|
||||
}
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_get_length(void)
|
||||
{
|
||||
return iRCCE_recent_length;
|
||||
}
|
||||
//--------------------------------------------------------------------------------------
|
||||
int iRCCE_get_status(iRCCE_SEND_REQUEST * send_req, iRCCE_RECV_REQUEST * recv_req)
|
||||
{
|
||||
if(send_req != NULL) {
|
||||
|
||||
if(send_req->finished) {
|
||||
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
if(iRCCE_isend_queue != send_req) {
|
||||
|
||||
return(iRCCE_RESERVED);
|
||||
}
|
||||
else
|
||||
{
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
}
|
||||
|
||||
if(recv_req != NULL) {
|
||||
|
||||
if(recv_req->finished) {
|
||||
|
||||
return(iRCCE_SUCCESS);
|
||||
}
|
||||
|
||||
if(iRCCE_irecv_queue[recv_req->source] != recv_req) {
|
||||
|
||||
return(iRCCE_RESERVED);
|
||||
}
|
||||
else
|
||||
{
|
||||
return(iRCCE_PENDING);
|
||||
}
|
||||
}
|
||||
|
||||
return iRCCE_ERROR;
|
||||
}
|
110
hermit/usr/ircce/syscall.h
Normal file
110
hermit/usr/ircce/syscall.h
Normal file
|
@ -0,0 +1,110 @@
|
|||
/*
|
||||
* Copyright (c) 2011, Stefan Lankes, RWTH Aachen University
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __SYSCALL_H__
|
||||
#define __SYSCALL_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define __NR_exit 0
|
||||
#define __NR_write 1
|
||||
#define __NR_open 2
|
||||
#define __NR_close 3
|
||||
#define __NR_read 4
|
||||
#define __NR_lseek 5
|
||||
#define __NR_unlink 6
|
||||
#define __NR_getpid 7
|
||||
#define __NR_kill 8
|
||||
#define __NR_fstat 9
|
||||
#define __NR_sbrk 10
|
||||
#define __NR_fork 11
|
||||
#define __NR_wait 12
|
||||
#define __NR_execve 13
|
||||
#define __NR_times 14
|
||||
#define __NR_accept 15
|
||||
#define __NR_bind 16
|
||||
#define __NR_closesocket 17
|
||||
#define __NR_connect 18
|
||||
#define __NR_listen 19
|
||||
#define __NR_recv 20
|
||||
#define __NR_send 21
|
||||
#define __NR_socket 22
|
||||
#define __NR_getsockopt 23
|
||||
#define __NR_setsockopt 24
|
||||
#define __NR_gethostbyname 25
|
||||
#define __NR_sendto 26
|
||||
#define __NR_recvfrom 27
|
||||
#define __NR_select 28
|
||||
#define __NR_stat 29
|
||||
#define __NR_dup 30
|
||||
#define __NR_dup2 31
|
||||
#define __NR_msleep 32
|
||||
#define __NR_yield 33
|
||||
#define __NR_sem_init 34
|
||||
#define __NR_sem_destroy 35
|
||||
#define __NR_sem_wait 36
|
||||
#define __NR_sem_post 37
|
||||
#define __NR_sem_timedwait 38
|
||||
#define __NR_getprio 39
|
||||
#define __NR_setprio 40
|
||||
#define __NR_clone 41
|
||||
#define __NR_sem_cancelablewait 42
|
||||
#define __NR_get_ticks 43
|
||||
#define __NR_rcce_init 44
|
||||
#define __NR_rcce_fini 45
|
||||
#define __NR_rcce_malloc 46
|
||||
|
||||
inline static long
|
||||
syscall(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2)
|
||||
{
|
||||
long res;
|
||||
|
||||
// note: syscall stores the return address in rcx and rflags in r11
|
||||
asm volatile ("syscall"
|
||||
: "=a" (res)
|
||||
: "a" (nr), "D" (arg0), "S" (arg1), "d" (arg2)
|
||||
: "memory", "%rcx", "%r11");
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#define SYSCALL0(NR) \
|
||||
syscall(NR, 0, 0, 0)
|
||||
#define SYSCALL1(NR, ARG0) \
|
||||
syscall(NR, (unsigned long)ARG0, 0, 0)
|
||||
#define SYSCALL2(NR, ARG0, ARG1) \
|
||||
syscall(NR, (unsigned long)ARG0, (unsigned long)ARG1, 0)
|
||||
#define SYSCALL3(NR, ARG0, ARG1, ARG2) \
|
||||
syscall(NR, (unsigned long)ARG0, (unsigned long)ARG1, (unsigned long)ARG2)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
Loading…
Add table
Reference in a new issue