///************************************************************************************* // Synchronization functions. // Single-bit and whole-cache-line flags are sufficiently different that we provide // separate implementations of the synchronization routines for each case //************************************************************************************** // // Author: Rob F. Van der Wijngaart // Intel Corporation // Date: 008/30/2010 // //************************************************************************************** // // Copyright 2010 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "RCCE_lib.h" #ifdef __hermit__ #include "rte_memcpy.h" #define memcpy_scc rte_memcpy #elif defined(COPPERRIDGE) #include "scc_memcpy.h" #else #define memcpy_scc memcpy #endif #ifdef USE_BYTE_FLAGS #include "RCCE_byte_synch.c" #else #ifdef SINGLEBITFLAGS ////////////////////////////////////////////////////////////////// // LOCKING SYNCHRONIZATION USING ONE BIT PER FLAG ////////////////////////////////////////////////////////////////// //-------------------------------------------------------------------------------------- // FUNCTION: RCCE_wait_until //-------------------------------------------------------------------------------------- // wait until flag in local MPB becomes set or unset. To avoid reading stale data from // the cache instead of new flag value from the MPB, issue MPB cache invalidation before // each read, including within the spin cycle //-------------------------------------------------------------------------------------- int RCCE_wait_until(RCCE_FLAG flag, RCCE_FLAG_STATUS val) { t_vcharp cflag; cflag = flag.line_address; // avoid tests if we use the simplified API #ifdef GORY if (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED)); if (!cflag) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_ALLOCATED)); // check to see if flag is properly contained in the local comm buffer if (cflag - RCCE_comm_buffer[RCCE_IAM]>=0 && cflag+RCCE_LINE_SIZE - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<0){} else { return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_IN_COMM_BUFFER)); } #endif // always flush/invalidate to ensure we read the most recent value of *flag // keep reading it until it has the required value do { #ifdef _OPENMP #pragma omp flush #endif RC_cache_invalidate(); } while ((RCCE_bit_value(cflag, flag.location) != val)); return(RCCE_SUCCESS); } int RCCE_test_flag(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result) { t_vcharp cflag; cflag = flag.line_address; // avoid tests if we use the simplified API #ifdef GORY if (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED)); if (!cflag) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_ALLOCATED)); // check to see if flag is properly contained in the local comm buffer if (cflag - RCCE_comm_buffer[RCCE_IAM]>=0 && cflag+RCCE_LINE_SIZE - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<0){} else { return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_IN_COMM_BUFFER)); } #endif // always flush/invalidate to ensure we read the most recent value of *flag // keep reading it until it has the required value #ifdef _OPENMP #pragma omp flush #endif RC_cache_invalidate(); if(RCCE_bit_value(cflag, flag.location) != val) { (*result) = 0; } else { (*result) = 1; } return(RCCE_SUCCESS); } //-------------------------------------------------------------------------------------- // FUNCTION: RCCE_barrier //-------------------------------------------------------------------------------------- // very simple, linear barrier //-------------------------------------------------------------------------------------- int RCCE_barrier(RCCE_COMM *comm) { t_vchar cyclechar[RCCE_LINE_SIZE] __attribute__ ((aligned (RCCE_LINE_SIZE))); t_vchar valchar [RCCE_LINE_SIZE] __attribute__ ((aligned (RCCE_LINE_SIZE))); int counter, i, error; int ROOT = 0; t_vcharp gatherp, releasep; RCCE_FLAG_STATUS cycle; counter = 0; gatherp = comm->gather.line_address; if (RCCE_debug_synch) fprintf(STDERR,"UE %d has checked into barrier\n", RCCE_IAM); // flip local barrier variable if (error = RCCE_get(cyclechar, gatherp, RCCE_LINE_SIZE, RCCE_IAM)) return(RCCE_error_return(RCCE_debug_synch,error)); cycle = RCCE_flip_bit_value(cyclechar, comm->gather.location); if (error = RCCE_put(comm->gather.line_address, cyclechar, RCCE_LINE_SIZE, RCCE_IAM)) return(RCCE_error_return(RCCE_debug_synch,error)); if (RCCE_IAM==comm->member[ROOT]) { // read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size), // we know all UEs have reached the barrier while (counter != comm->size) { // skip the first member (#0), because that is the ROOT for (counter=i=1; isize; i++) { // copy flag values out of comm buffer if (error = RCCE_get(valchar, comm->gather.line_address, RCCE_LINE_SIZE, comm->member[i])) return(RCCE_error_return(RCCE_debug_synch,error)); if (RCCE_bit_value(valchar, comm->gather.location) == cycle) counter++; } } // set release flags for (i=1; isize; i++) if (error = RCCE_flag_write(&(comm->release), cycle, comm->member[i])) return(RCCE_error_return(RCCE_debug_synch,error)); } else { if (error = RCCE_wait_until(comm->release, cycle)) return(RCCE_error_return(RCCE_debug_synch,error)); } if (RCCE_debug_synch) fprintf(STDERR,"UE %d has cleared barrier\n", RCCE_IAM); return(RCCE_SUCCESS); } #else ////////////////////////////////////////////////////////////////// // LOCKLESS SYNCHRONIZATION USING ONE WHOLE CACHE LINE PER FLAG // ////////////////////////////////////////////////////////////////// //-------------------------------------------------------------------------------------- // FUNCTION: RCCE_wait_until //-------------------------------------------------------------------------------------- // wait until flag in local MPB becomes set or unset. To avoid reading stale data from // the cache instead of new flag value from the MPB, issue MPB cache invalidation before // each read, including within the spin cycle //-------------------------------------------------------------------------------------- int RCCE_wait_until(RCCE_FLAG flag, RCCE_FLAG_STATUS val) { t_vcharp cflag; cflag = (t_vcharp) flag; #ifdef GORY if (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED)); if (!cflag) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_ALLOCATED)); // check to see if flag is properly contained in the local comm buffer if (cflag - RCCE_comm_buffer[RCCE_IAM]>=0 && cflag+RCCE_LINE_SIZE - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<0){} else { return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_IN_COMM_BUFFER)); } #endif #ifdef USE_REVERTED_FLAGS flag = flag + RCCE_LINE_SIZE / sizeof(int) - 1; #endif // always flush/invalidate to ensure we read the most recent value of *flag // keep reading it until it has the required value. We only need to read the // first int of the MPB cache line containing the flag #ifndef USE_FLAG_EXPERIMENTAL do { #ifdef _OPENMP #pragma omp flush #endif RC_cache_invalidate(); } while ((*flag) != val); #else if (RCCE_debug_synch) fprintf(STDERR,"UE %d wait flag: %x from address %X \n", RCCE_IAM,val,flag); flag = RCCE_flag_buffer[RCCE_IAM]+(flag-RCCE_comm_buffer[RCCE_IAM]); while ((*flag) != val); #endif return(RCCE_SUCCESS); } #ifdef USE_TAGGED_FLAGS int RCCE_wait_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS val, void *tag, int len) { int i, j; RCCE_FLAG flag_pos; #ifndef USE_REVERTED_FLAGS flag_pos = flag; #else flag_pos = flag + RCCE_LINE_SIZE / sizeof(int) - 1; #endif do { #ifdef _OPENMP #pragma omp flush #endif RC_cache_invalidate(); } while ((*flag_pos) != val); if(tag) { if( len > ( RCCE_LINE_SIZE - sizeof(int) ) ) len = RCCE_LINE_SIZE - sizeof(int); #ifndef USE_REVERTED_FLAGS memcpy_scc(tag, &((char*)flag)[sizeof(int)], len); #else memcpy_scc(tag, &((char*)flag)[0], len); #endif } return(RCCE_SUCCESS); } #endif int RCCE_test_flag(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result) { t_vcharp cflag; cflag = (t_vcharp) flag; #ifdef GORY if (val != RCCE_FLAG_UNSET && val != RCCE_FLAG_SET) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_STATUS_UNDEFINED)); if (!cflag) return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_ALLOCATED)); // check to see if flag is properly contained in the local comm buffer if (cflag - RCCE_comm_buffer[RCCE_IAM]>=0 && cflag+RCCE_LINE_SIZE - (RCCE_comm_buffer[RCCE_IAM] + RCCE_BUFF_SIZE)<0){} else { return(RCCE_error_return(RCCE_debug_synch,RCCE_ERROR_FLAG_NOT_IN_COMM_BUFFER)); } #endif #ifdef USE_REVERTED_FLAGS flag = flag + RCCE_LINE_SIZE / sizeof(int) - 1; #endif // always flush/invalidate to ensure we read the most recent value of *flag // keep reading it until it has the required value. We only need to read the // first int of the MPB cache line containing the flag #ifdef _OPENMP #pragma omp flush #endif #ifndef USE_FLAG_EXPERIMENTAL RC_cache_invalidate(); #endif if((*flag) != val) { (*result) = 0; } else { (*result) = 1; } return(RCCE_SUCCESS); } #ifdef USE_TAGGED_FLAGS int RCCE_test_tagged(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result, void *tag, int len) { int i, j; RCCE_FLAG flag_pos; #ifndef USE_REVERTED_FLAGS flag_pos = flag; #else flag_pos = flag + RCCE_LINE_SIZE / sizeof(int) -1; #endif RC_cache_invalidate(); if((*flag_pos) != val) { (*result) = 0; } else { (*result) = 1; } if((*result) && tag) { if( len > ( RCCE_LINE_SIZE - sizeof(int) ) ) len = RCCE_LINE_SIZE - sizeof(int); #ifndef USE_REVERTED_FLAGS memcpy_scc(tag, &((char*)flag)[sizeof(int)], len); #else memcpy_scc(tag, &((char*)flag)[0], len); #endif } return(RCCE_SUCCESS); } #endif //-------------------------------------------------------------------------------------- // FUNCTION: RCCE_barrier //-------------------------------------------------------------------------------------- // very simple, linear barrier //-------------------------------------------------------------------------------------- int RCCE_barrier(RCCE_COMM *comm) { volatile unsigned char cyclechar[RCCE_LINE_SIZE] __attribute__ ((aligned (RCCE_LINE_SIZE))); volatile unsigned char valchar[RCCE_LINE_SIZE] __attribute__ ((aligned (RCCE_LINE_SIZE))); volatile char *cycle; volatile char *val; int counter, i, error; int ROOT = 0; counter = 0; cycle = (volatile char *)cyclechar; val = (volatile char *)valchar; if (RCCE_debug_synch) fprintf(STDERR,"UE %d has checked into barrier\n", RCCE_IAM); #ifdef USE_FAT_BARRIER // flip local barrier variable #ifndef USE_FLAG_EXPERIMENTAL if ((error = RCCE_get(cyclechar, (t_vcharp)(comm->gather[RCCE_IAM]), RCCE_LINE_SIZE, RCCE_IAM))) #else if ((error = RCCE_get_flag(cyclechar, (t_vcharp)(comm->gather[RCCE_IAM]), RCCE_LINE_SIZE, RCCE_IAM))) #endif return(RCCE_error_return(RCCE_debug_synch,error)); *cycle = !(*cycle); #ifndef USE_FLAG_EXPERIMENTAL if ((error = RCCE_put((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))) #else if ((error = RCCE_put_flag((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))) #endif return(RCCE_error_return(RCCE_debug_synch,error)); if ((error = RCCE_put((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, comm->member[ROOT]))) return(RCCE_error_return(RCCE_debug_synch,error)); if (RCCE_IAM==comm->member[ROOT]) { // read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size), // we know all UEs have reached the barrier while (counter != comm->size) { // skip the first member (#0), because that is the ROOT for (counter=i=1; isize; i++) { /* copy flag values out of comm buffer */ #ifndef USE_FLAG_EXPERIMENTAL if ((error = RCCE_get(valchar, (t_vcharp)(comm->gather[i]), RCCE_LINE_SIZE, RCCE_IAM))) #else if ((error = RCCE_get_flag(valchar, (t_vcharp)(comm->gather[i]), RCCE_LINE_SIZE, RCCE_IAM))) #endif return(RCCE_error_return(RCCE_debug_synch,error)); if (*val == *cycle) counter++; } } // set release flags for (i=1; isize; i++) { if ((error = RCCE_flag_write(&(comm->release), *cycle, comm->member[i]))) return(RCCE_error_return(RCCE_debug_synch,error)); } } else { if ((error = RCCE_wait_until(comm->release, *cycle))) return(RCCE_error_return(RCCE_debug_synch,error)); } #else // !USE_FAT_BARRIER // flip local barrier variable #ifndef USE_FLAG_EXPERIMENTAL if ((error = RCCE_get(cyclechar, (t_vcharp)(comm->gather), RCCE_LINE_SIZE, RCCE_IAM))) #else if ((error = RCCE_get_flag(cyclechar, (t_vcharp)(comm->gather), RCCE_LINE_SIZE, RCCE_IAM))) #endif return(RCCE_error_return(RCCE_debug_synch,error)); *cycle = !(*cycle); #ifndef USE_FLAG_EXPERIMENTAL if ((error = RCCE_put((t_vcharp)(comm->gather), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))) #else if ((error = RCCE_put_flag((t_vcharp)(comm->gather), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))) #endif return(RCCE_error_return(RCCE_debug_synch,error)); if (RCCE_IAM==comm->member[ROOT]) { // read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size), // we know all UEs have reached the barrier while (counter != comm->size) { // skip the first member (#0), because that is the ROOT for (counter=i=1; isize; i++) { /* copy flag values out of comm buffer */ #ifndef USE_FLAG_EXPERIMENTAL if ((error = RCCE_get(valchar, (t_vcharp)(comm->gather), RCCE_LINE_SIZE, comm->member[i]))) #else if ((error = RCCE_get_flag(valchar, (t_vcharp)(comm->gather), RCCE_LINE_SIZE, comm->member[i]))) #endif return(RCCE_error_return(RCCE_debug_synch,error)); if (*val == *cycle) counter++; } } // set release flags for (i=1; isize; i++) { if ((error = RCCE_flag_write(&(comm->release), *cycle, comm->member[i]))) return(RCCE_error_return(RCCE_debug_synch,error)); } } else { if ((error = RCCE_wait_until(comm->release, *cycle))) { return(RCCE_error_return(RCCE_debug_synch,error)); } } #endif // !USE_FAT_BARRIER if (RCCE_debug_synch) fprintf(STDERR,"UE %d has cleared barrier\n", RCCE_IAM); return(RCCE_SUCCESS); } //-------------------------------------------------------------------------------------- // FUNCTION: RCCE_nb_barrier //-------------------------------------------------------------------------------------- // non-blocking version of the linear barrier //-------------------------------------------------------------------------------------- int RCCE_nb_barrier(RCCE_COMM *comm) { volatile unsigned char cyclechar[RCCE_LINE_SIZE] __attribute__ ((aligned (RCCE_LINE_SIZE))); volatile unsigned char valchar[RCCE_LINE_SIZE] __attribute__ ((aligned (RCCE_LINE_SIZE))); int i, error; int ROOT = 0; #ifdef USE_FLAG_EXPERIMENTAL volatile char *cycle; volatile char *val; cycle = (volatile char *)cyclechar; val = (volatile char *)valchar; #else volatile int *cycle; volatile int *val; cycle = (volatile int *)cyclechar; val = (volatile int *)valchar; #endif if(comm->label == 1) goto label1; if(comm->label == 2) goto label2; comm->count = 0; if (RCCE_debug_synch) fprintf(STDERR,"UE %d has checked into barrier\n", RCCE_IAM); #ifdef USE_FAT_BARRIER // flip local barrier variable #ifndef USE_FLAG_EXPERIMENTAL if ((error = RCCE_get(cyclechar, (t_vcharp)(comm->gather[RCCE_IAM]), RCCE_LINE_SIZE, RCCE_IAM))) #else if ((error = RCCE_get_flag(cyclechar, (t_vcharp)(comm->gather[RCCE_IAM]), RCCE_LINE_SIZE, RCCE_IAM))) #endif return(RCCE_error_return(RCCE_debug_synch,error)); *cycle = !(*cycle); #ifndef USE_FLAG_EXPERIMENTAL if ((error = RCCE_put((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))) #else if ((error = RCCE_put_flag((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))) #endif return(RCCE_error_return(RCCE_debug_synch,error)); if ((error = RCCE_put((t_vcharp)(comm->gather[RCCE_IAM]), cyclechar, RCCE_LINE_SIZE, comm->member[ROOT]))) return(RCCE_error_return(RCCE_debug_synch,error)); if (RCCE_IAM==comm->member[ROOT]) { // read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size), // we know all UEs have reached the barrier comm->cycle = *cycle; label1: while (comm->count != comm->size) { // skip the first member (#0), because that is the ROOT for (comm->count=i=1; isize; i++) { /* copy flag values out of comm buffer */ #ifndef USE_FLAG_EXPERIMENTAL if ((error = RCCE_get(valchar, (t_vcharp)(comm->gather[i]), RCCE_LINE_SIZE, RCCE_IAM))) #else if ((error = RCCE_get_flag(valchar, (t_vcharp)(comm->gather[i]), RCCE_LINE_SIZE, RCCE_IAM))) #endif return(RCCE_error_return(RCCE_debug_synch,error)); if (*val == comm->cycle) comm->count++; } if(comm->count != comm->size) { comm->label = 1; return(RCCE_PENDING); } } // set release flags for (i=1; isize; i++) { if ((error = RCCE_flag_write(&(comm->release), comm->cycle, comm->member[i]))) return(RCCE_error_return(RCCE_debug_synch,error)); } } else { int test; comm->cycle = *cycle; label2: RCCE_test_flag(comm->release, comm->cycle, &test); if(!test) { comm->label = 2; return(RCCE_PENDING); } } comm->label = 0; #else // !USE_FAT_BARRIER // flip local barrier variable #ifndef USE_FLAG_EXPERIMENTAL if ((error = RCCE_get(cyclechar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE, RCCE_IAM))) #else if ((error = RCCE_get_flag(cyclechar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE, RCCE_IAM))) #endif return(RCCE_error_return(RCCE_debug_synch,error)); *cycle = !(*cycle); #ifndef USE_FLAG_EXPERIMENTAL if ((error = RCCE_put((t_vcharp)(comm->gather[0]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))) #else if ((error = RCCE_put_flag((t_vcharp)(comm->gather[0]), cyclechar, RCCE_LINE_SIZE, RCCE_IAM))) #endif return(RCCE_error_return(RCCE_debug_synch,error)); if (RCCE_IAM==comm->member[ROOT]) { // read "remote" gather flags; once all equal "cycle" (i.e counter==comm->size), // we know all UEs have reached the barrier comm->cycle = *cycle; label1: while (comm->count != comm->size) { // skip the first member (#0), because that is the ROOT for (comm->count=i=1; isize; i++) { /* copy flag values out of comm buffer */ #ifndef USE_FLAG_EXPERIMENTAL if ((error = RCCE_get(valchar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE, comm->member[i]))) #else if ((error = RCCE_get_flag(valchar, (t_vcharp)(comm->gather[0]), RCCE_LINE_SIZE, comm->member[i]))) #endif return(RCCE_error_return(RCCE_debug_synch,error)); if (*val == comm->cycle) comm->count++; } if(comm->count != comm->size) { comm->label = 1; return(RCCE_PENDING); } } // set release flags for (i=1; isize; i++) { if ((error = RCCE_flag_write(&(comm->release), comm->cycle, comm->member[i]))) return(RCCE_error_return(RCCE_debug_synch,error)); } } else { int test; comm->cycle = *cycle; label2: RCCE_test_flag(comm->release, comm->cycle, &test); if(!test) { comm->label = 2; return(RCCE_PENDING); } } comm->label = 0; #endif // !USE_FAT_BARRIER if (RCCE_debug_synch) fprintf(STDERR,"UE %d has cleared barrier\n", RCCE_IAM); return(RCCE_SUCCESS); } #endif void RCCE_fence() { return; } #endif