/* * Copyright 2011 Stefan Lankes, Alexander Pilz, Maximilian Marx, Michael Ober, * Chair for Operating Systems, RWTH Aachen University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ #include #include #include #include #include #include #include #include #include "tests.h" #if (defined(START_KERNEL_JACOBI) || defined(START_CHIEFTEST)) && defined(CONFIG_ROCKCREEK) #define MATRIX_SIZE 256 #define MAXVALUE 1337 #define PAGE_SIZE 4096 #define CACHE_SIZE (256*1024) #define SIZE ((MATRIX_SIZE+1)*MATRIX_SIZE*sizeof(double)+2*MATRIX_SIZE*sizeof(double)+10*PAGE_SIZE+CACHE_SIZE) #define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) #define RAND_MAX 32767 //#define SVM_TYPE SVM_STRONG #define SVM_TYPE SVM_LAZYRELEASE #define fabs(x) (x) >= 0 ? (x) : -1.0*(x) static unsigned int seed = 0; static int srand(unsigned int s) { seed = s; return 0; } /* Pseudo-random generator based on Minimal Standard by Lewis, Goodman, and Miller in 1969. I[j+1] = a*I[j] (mod m) where a = 16807 m = 2147483647 Using Schrage's algorithm, a*I[j] (mod m) can be rewritten as: a*(I[j] mod q) - r*{I[j]/q} if >= 0 a*(I[j] mod q) - r*{I[j]/q} + m otherwise where: {} denotes integer division q = {m/a} = 127773 r = m (mod a) = 2836 note that the seed value of 0 cannot be used in the calculation as it results in 0 itself */ static int rand(void) { long k; long s = (long)(seed); if (s == 0) s = 0x12345987; k = s / 127773; s = 16807 * (s - k * 127773) - 2836 * k; if (s < 0) s += 2147483647; seed = (unsigned int)s; return (int)(s & RAND_MAX); } static inline void cache_invalidate(void) { asm volatile ( ".byte 0x0f; .byte 0x0a;\n" ); // CL1FLUSHMB } static int generate_empty_matrix(double*** A , unsigned int N, int rankID) { unsigned int iCnt; int i,j; unsigned int iter_start, iter_end, pad; int num = RCCE_NP; pad = N/num; if (pad % 4) { pad -= pad % 4; unsigned int p = (N - num * pad) / 4; if (rankID < p) { iter_start = rankID*(pad+4); iter_end = (rankID+1)*(pad+4); } else { iter_start = p*(pad+4)+(rankID-p)*pad; iter_end = p*(pad+4)+(rankID+1-p)*pad; } } else { iter_start = rankID*pad; iter_end = (rankID+1)*pad; } kprintf("iter_start %d, iter_end %d\n", iter_start, iter_end); *A = (double**) kmalloc((N+1)*sizeof(double*)); if (*A == NULL) return -2; /* Error */ svm_barrier(SVM_TYPE); **A = (double*) svm_malloc((N+1)*N*sizeof(double), SVM_TYPE); if (**A == NULL) return -2; /* Error */ svm_barrier(SVM_TYPE); for(iCnt=1; iCnt Sum |A[i][j]| with (i != j) */ (*A)[i][i] = sum + 2.0; (*A)[i][N] += sum + 2.0; } svm_flush(0); svm_invalidate(); } svm_barrier(SVM_TYPE); return 0; } int jacobi(void* argv) { volatile double* temp = NULL; volatile double* swap; unsigned int i, j, k, iter_start, iter_end, pad; unsigned int iterations = 0; double error, norm, max = 0.0; double** A=0; volatile double* X; volatile double* X_old; double xi; uint64_t start, stop; int rankID, num; rankID = RCCE_IAM; num = RCCE_NP; if (generate_empty_matrix(&A,MATRIX_SIZE,rankID) < 0) { kprintf("generate_empty_matrix() failed...\n"); return -1; } if (rankID == 0) kprintf("generate_empty_matrix() done...\n"); svm_barrier(SVM_TYPE); X = (double*) svm_malloc(MATRIX_SIZE*sizeof(double), SVM_TYPE); X_old = (double*) svm_malloc(MATRIX_SIZE*sizeof(double), SVM_TYPE); if (X == NULL || X_old == NULL) { kprintf("X or X_old is NULL...\n"); return -1; } temp = (double*) svm_malloc(PAGE_SIZE, SVM_LAZYRELEASE); if (temp == NULL) { kprintf("temp is NULL...\n"); return -1; } if (rankID == 0) { memset((void*)temp, 0x00, PAGE_SIZE); for(i=0; i 1) && (rankID == 0)) { /* write always a complete cache line */ memset((void*)temp, 0, CACHE_LINE); svm_flush(0); } svm_barrier(SVM_TYPE); for(i=iter_start; i 1) { RCCE_acquire_lock(0); svm_invalidate(); norm += temp[0]; temp[0] = norm; svm_flush(0); RCCE_release_lock(0); svm_barrier(SVM_LAZYRELEASE); norm = temp[0]; } #endif /* check the break condition */ norm /= (double) MATRIX_SIZE; if (norm < 0.0000001) ; //break; } else { svm_barrier(SVM_TYPE); } //if (k % 100 == 0) // kprintf("k = %d\n", k); } stop = rdtsc(); if (MATRIX_SIZE < 16) { kprintf("Print the solution...\n"); /* print solution */ for(i=0; i 0.01) { kprintf("Result is on position %d wrong (%d/10000 != 1.0, error %d/10000)\n", i, (int) (10000.0*X[i]), (int) (10000.0*error)); err = 1; } } kprintf("maximal error is %d/10000\n", (int) (10000.0*max)); kprintf("\nmatrix size: %d x %d\n", MATRIX_SIZE, MATRIX_SIZE); kprintf("number of iterations: %d\n", iterations); kprintf("Calculation time: %llu ms (%llu ticks)\n", (stop-start)/(1000ULL*get_cpu_frequency()), stop-start); return err; } #endif