2011-12-21 05:22:16 -08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Copyright 2011 Stefan Lankes, Alexander Pilz, Maximilian Marx, Michael Ober,
|
|
|
|
* Chair for Operating Systems, RWTH Aachen University
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <metalsvm/stdio.h>
|
|
|
|
#include <metalsvm/stdlib.h>
|
|
|
|
#include <metalsvm/string.h>
|
|
|
|
#include <asm/svm.h>
|
|
|
|
#include <asm/RCCE.h>
|
|
|
|
#include <asm/RCCE_lib.h>
|
|
|
|
#include <asm/SCC_API.h>
|
|
|
|
#include <asm/irqflags.h>
|
|
|
|
|
2012-07-16 21:55:42 +02:00
|
|
|
#include "tests.h"
|
|
|
|
|
2012-09-17 14:58:58 +02:00
|
|
|
#if (defined(START_KERNEL_JACOBI) || defined(START_CHIEFTEST)) && defined(CONFIG_ROCKCREEK)
|
2012-04-24 10:35:24 +02:00
|
|
|
|
2011-12-21 05:22:16 -08:00
|
|
|
#define MATRIX_SIZE 256
|
|
|
|
#define MAXVALUE 1337
|
|
|
|
#define PAGE_SIZE 4096
|
|
|
|
#define CACHE_SIZE (256*1024)
|
|
|
|
#define SIZE ((MATRIX_SIZE+1)*MATRIX_SIZE*sizeof(double)+2*MATRIX_SIZE*sizeof(double)+10*PAGE_SIZE+CACHE_SIZE)
|
|
|
|
#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1))
|
|
|
|
#define RAND_MAX 32767
|
|
|
|
|
|
|
|
//#define SVM_TYPE SVM_STRONG
|
|
|
|
#define SVM_TYPE SVM_LAZYRELEASE
|
|
|
|
|
2011-12-21 08:26:03 -08:00
|
|
|
#define fabs(x) (x) >= 0 ? (x) : -1.0*(x)
|
2011-12-21 05:22:16 -08:00
|
|
|
|
|
|
|
static unsigned int seed = 0;
|
|
|
|
|
|
|
|
static int srand(unsigned int s)
|
|
|
|
{
|
|
|
|
seed = s;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Pseudo-random generator based on Minimal Standard by
|
|
|
|
Lewis, Goodman, and Miller in 1969.
|
|
|
|
|
|
|
|
I[j+1] = a*I[j] (mod m)
|
|
|
|
|
|
|
|
where a = 16807
|
|
|
|
m = 2147483647
|
|
|
|
|
|
|
|
Using Schrage's algorithm, a*I[j] (mod m) can be rewritten as:
|
|
|
|
|
|
|
|
a*(I[j] mod q) - r*{I[j]/q} if >= 0
|
|
|
|
a*(I[j] mod q) - r*{I[j]/q} + m otherwise
|
|
|
|
|
|
|
|
where: {} denotes integer division
|
|
|
|
q = {m/a} = 127773
|
|
|
|
r = m (mod a) = 2836
|
|
|
|
|
|
|
|
note that the seed value of 0 cannot be used in the calculation as
|
|
|
|
it results in 0 itself
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int rand(void)
|
|
|
|
{
|
|
|
|
long k;
|
|
|
|
long s = (long)(seed);
|
|
|
|
if (s == 0)
|
|
|
|
s = 0x12345987;
|
|
|
|
k = s / 127773;
|
|
|
|
s = 16807 * (s - k * 127773) - 2836 * k;
|
|
|
|
if (s < 0)
|
|
|
|
s += 2147483647;
|
|
|
|
seed = (unsigned int)s;
|
|
|
|
return (int)(s & RAND_MAX);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void cache_invalidate(void)
|
|
|
|
{
|
|
|
|
asm volatile ( ".byte 0x0f; .byte 0x0a;\n" ); // CL1FLUSHMB
|
|
|
|
}
|
|
|
|
|
|
|
|
static int generate_empty_matrix(double*** A , unsigned int N, int rankID) {
|
|
|
|
unsigned int iCnt;
|
|
|
|
int i,j;
|
|
|
|
unsigned int iter_start, iter_end, pad;
|
|
|
|
int num = RCCE_NP;
|
|
|
|
|
|
|
|
pad = N/num;
|
|
|
|
if (pad % 4) {
|
|
|
|
pad -= pad % 4;
|
|
|
|
|
|
|
|
unsigned int p = (N - num * pad) / 4;
|
|
|
|
|
|
|
|
if (rankID < p) {
|
|
|
|
iter_start = rankID*(pad+4);
|
|
|
|
iter_end = (rankID+1)*(pad+4);
|
|
|
|
} else {
|
|
|
|
iter_start = p*(pad+4)+(rankID-p)*pad;
|
|
|
|
iter_end = p*(pad+4)+(rankID+1-p)*pad;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
iter_start = rankID*pad;
|
|
|
|
iter_end = (rankID+1)*pad;
|
|
|
|
}
|
|
|
|
kprintf("iter_start %d, iter_end %d\n", iter_start, iter_end);
|
|
|
|
|
|
|
|
*A = (double**) kmalloc((N+1)*sizeof(double*));
|
|
|
|
|
|
|
|
if (*A == NULL)
|
|
|
|
return -2; /* Error */
|
|
|
|
|
|
|
|
svm_barrier(SVM_TYPE);
|
|
|
|
|
|
|
|
**A = (double*) svm_malloc((N+1)*N*sizeof(double), SVM_TYPE);
|
|
|
|
|
|
|
|
if (**A == NULL)
|
|
|
|
return -2; /* Error */
|
|
|
|
|
|
|
|
svm_barrier(SVM_TYPE);
|
|
|
|
|
|
|
|
for(iCnt=1; iCnt<N; iCnt++) { /* Assign pointers in the first "real index"; Value from 1 to N (0 yet set, value N means N+1) */
|
|
|
|
(*A)[iCnt] = &((*A)[0][iCnt*(N+1)]);
|
|
|
|
}
|
|
|
|
|
|
|
|
for(i=iter_start; i<iter_end; i++)
|
|
|
|
{
|
|
|
|
for(j=0; j<=N; j++)
|
|
|
|
(*A)[i][j] = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
svm_flush(0);
|
|
|
|
svm_invalidate();
|
|
|
|
svm_barrier(SVM_TYPE);
|
|
|
|
|
|
|
|
if(rankID == 0)
|
|
|
|
{
|
|
|
|
srand( 42 ) ; /* init random number generator */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* initialize the system of linear equations
|
|
|
|
* the result vector is one
|
|
|
|
*/
|
|
|
|
for (i = 0; i < N; i++)
|
|
|
|
{
|
|
|
|
double sum = 0.0;
|
|
|
|
|
|
|
|
for (j = 0; j < N; j++)
|
|
|
|
{
|
|
|
|
if (i != j)
|
|
|
|
{
|
|
|
|
double c = ((double)rand()) / ((double)RAND_MAX) * MAXVALUE;
|
|
|
|
|
|
|
|
sum += fabs(c);
|
|
|
|
(*A)[i][j] = c;
|
|
|
|
(*A)[i][N] += c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The Jacobi method will always converge if the matrix A is strictly or irreducibly diagonally dominant.
|
|
|
|
* Strict row diagonal dominance means that for each row, the absolute value of the diagonal term is
|
|
|
|
* greater than the sum of absolute values of other terms: |A[i][i]| > Sum |A[i][j]| with (i != j)
|
|
|
|
*/
|
|
|
|
|
|
|
|
(*A)[i][i] = sum + 2.0;
|
|
|
|
(*A)[i][N] += sum + 2.0;
|
|
|
|
}
|
|
|
|
|
|
|
|
svm_flush(0);
|
|
|
|
svm_invalidate();
|
|
|
|
}
|
|
|
|
|
|
|
|
svm_barrier(SVM_TYPE);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int jacobi(void* argv)
|
|
|
|
{
|
|
|
|
volatile double* temp = NULL;
|
|
|
|
volatile double* swap;
|
|
|
|
unsigned int i, j, k, iter_start, iter_end, pad;
|
|
|
|
unsigned int iterations = 0;
|
|
|
|
double error, norm, max = 0.0;
|
|
|
|
double** A=0;
|
|
|
|
volatile double* X;
|
|
|
|
volatile double* X_old;
|
|
|
|
double xi;
|
|
|
|
uint64_t start, stop;
|
|
|
|
int rankID, num;
|
|
|
|
|
|
|
|
rankID = RCCE_IAM;
|
|
|
|
num = RCCE_NP;
|
|
|
|
|
|
|
|
if (generate_empty_matrix(&A,MATRIX_SIZE,rankID) < 0)
|
|
|
|
{
|
|
|
|
kprintf("generate_empty_matrix() failed...\n");
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if (rankID == 0)
|
|
|
|
kprintf("generate_empty_matrix() done...\n");
|
|
|
|
|
|
|
|
svm_barrier(SVM_TYPE);
|
|
|
|
|
|
|
|
X = (double*) svm_malloc(MATRIX_SIZE*sizeof(double), SVM_TYPE);
|
|
|
|
X_old = (double*) svm_malloc(MATRIX_SIZE*sizeof(double), SVM_TYPE);
|
|
|
|
|
|
|
|
if (X == NULL || X_old == NULL)
|
|
|
|
{
|
|
|
|
kprintf("X or X_old is NULL...\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
temp = (double*) svm_malloc(PAGE_SIZE, SVM_LAZYRELEASE);
|
|
|
|
if (temp == NULL)
|
|
|
|
{
|
|
|
|
kprintf("temp is NULL...\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (rankID == 0) {
|
|
|
|
memset((void*)temp, 0x00, PAGE_SIZE);
|
|
|
|
for(i=0; i<MATRIX_SIZE; i++)
|
|
|
|
{
|
|
|
|
X[i] = ((double)rand()) / ((double)RAND_MAX) * 10.0;
|
|
|
|
X_old[i] = 0.0;
|
|
|
|
}
|
|
|
|
|
|
|
|
svm_flush(0);
|
|
|
|
svm_invalidate();
|
|
|
|
}
|
|
|
|
|
|
|
|
svm_barrier(SVM_TYPE);
|
|
|
|
|
|
|
|
if (rankID == 0)
|
|
|
|
kprintf("start calculation...\n");
|
|
|
|
|
|
|
|
svm_barrier(SVM_TYPE);
|
|
|
|
|
|
|
|
pad = MATRIX_SIZE/num;
|
|
|
|
if (pad % 4) {
|
|
|
|
pad -= pad % 4;
|
|
|
|
|
|
|
|
unsigned int p = (MATRIX_SIZE - num * pad) / 4;
|
|
|
|
|
|
|
|
if (rankID < p) {
|
|
|
|
iter_start = rankID*(pad+4);
|
|
|
|
iter_end = (rankID+1)*(pad+4);
|
|
|
|
} else {
|
|
|
|
iter_start = p*(pad+4)+(rankID-p)*pad;
|
|
|
|
iter_end = p*(pad+4)+(rankID+1-p)*pad;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
iter_start = rankID*pad;
|
|
|
|
iter_end = (rankID+1)*pad;
|
|
|
|
}
|
|
|
|
|
|
|
|
start = rdtsc();
|
|
|
|
|
|
|
|
//while(1)
|
|
|
|
for(k=0; k<865000; k++)
|
|
|
|
{
|
|
|
|
iterations++;
|
|
|
|
|
|
|
|
swap = X_old;
|
|
|
|
X_old = X;
|
|
|
|
X = swap;
|
|
|
|
|
|
|
|
for(i=iter_start; i<iter_end; i++)
|
|
|
|
{
|
|
|
|
for(j=0, xi=0.0; j<i; j++)
|
|
|
|
xi += A[i][j]* X_old[j];
|
|
|
|
|
|
|
|
for(j=i+1; j<MATRIX_SIZE; j++)
|
|
|
|
xi += A[i][j] * X_old[j];
|
|
|
|
X[i] = (A[i][MATRIX_SIZE] - xi) / A[i][i];
|
|
|
|
}
|
|
|
|
|
|
|
|
if (iterations % 5000 == 0 ) {/* calculate the Euclidean norm between X_old and X*/
|
|
|
|
norm = 0.0;
|
|
|
|
|
|
|
|
#if 1
|
|
|
|
svm_barrier(SVM_TYPE);
|
|
|
|
for(i=0; i<MATRIX_SIZE; i++)
|
|
|
|
norm += (X_old[i] - X[i]) * (X_old[i] - X[i]);
|
|
|
|
svm_barrier(SVM_TYPE);
|
|
|
|
#else
|
|
|
|
if ((num > 1) && (rankID == 0)) { /* write always a complete cache line */
|
|
|
|
memset((void*)temp, 0, CACHE_LINE);
|
|
|
|
svm_flush(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
svm_barrier(SVM_TYPE);
|
|
|
|
|
|
|
|
for(i=iter_start; i<iter_end; i++)
|
|
|
|
norm += (X_old[i] - X[i]) * (X_old[i] - X[i]);
|
|
|
|
|
|
|
|
if (num > 1) {
|
|
|
|
RCCE_acquire_lock(0);
|
|
|
|
svm_invalidate();
|
|
|
|
norm += temp[0];
|
|
|
|
temp[0] = norm;
|
|
|
|
svm_flush(0);
|
|
|
|
RCCE_release_lock(0);
|
|
|
|
|
|
|
|
svm_barrier(SVM_LAZYRELEASE);
|
|
|
|
norm = temp[0];
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* check the break condition */
|
|
|
|
norm /= (double) MATRIX_SIZE;
|
|
|
|
|
|
|
|
if (norm < 0.0000001)
|
|
|
|
; //break;
|
|
|
|
} else {
|
|
|
|
svm_barrier(SVM_TYPE);
|
|
|
|
}
|
|
|
|
|
|
|
|
//if (k % 100 == 0)
|
|
|
|
// kprintf("k = %d\n", k);
|
|
|
|
}
|
|
|
|
|
|
|
|
stop = rdtsc();
|
|
|
|
|
2011-12-21 08:26:03 -08:00
|
|
|
if (MATRIX_SIZE < 16) {
|
|
|
|
kprintf("Print the solution...\n");
|
|
|
|
/* print solution */
|
2011-12-21 05:22:16 -08:00
|
|
|
for(i=0; i<MATRIX_SIZE; i++) {
|
2011-12-21 08:26:03 -08:00
|
|
|
for(j=0; j<MATRIX_SIZE; j++)
|
|
|
|
kprintf("%u/100\t", (uint32_t) (100*A[i][j]));
|
|
|
|
kprintf("*\t%u/100\t=\t%u/100\n", (uint32_t) (100*X[i]), (uint32_t) (100*A[i][MATRIX_SIZE]));
|
2011-12-21 05:22:16 -08:00
|
|
|
}
|
|
|
|
}
|
2011-12-21 08:26:03 -08:00
|
|
|
kprintf("Check the result...\n");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* check the result
|
|
|
|
* X[i] have to be 1
|
|
|
|
*/
|
2012-09-17 14:51:10 +02:00
|
|
|
int err=0;
|
2011-12-21 08:26:03 -08:00
|
|
|
for(i=0; i<MATRIX_SIZE; i++) {
|
|
|
|
double diff = X[i] - 1.0;
|
|
|
|
|
|
|
|
error = fabs(diff);
|
|
|
|
if (max < error)
|
|
|
|
max = error;
|
2012-09-17 14:51:10 +02:00
|
|
|
if (error > 0.01) {
|
2011-12-21 08:26:03 -08:00
|
|
|
kprintf("Result is on position %d wrong (%d/10000 != 1.0, error %d/10000)\n", i, (int) (10000.0*X[i]), (int) (10000.0*error));
|
2012-09-17 14:51:10 +02:00
|
|
|
err = 1;
|
|
|
|
}
|
2011-12-21 08:26:03 -08:00
|
|
|
}
|
|
|
|
kprintf("maximal error is %d/10000\n", (int) (10000.0*max));
|
|
|
|
|
|
|
|
kprintf("\nmatrix size: %d x %d\n", MATRIX_SIZE, MATRIX_SIZE);
|
|
|
|
kprintf("number of iterations: %d\n", iterations);
|
|
|
|
kprintf("Calculation time: %llu ms (%llu ticks)\n", (stop-start)/(1000ULL*get_cpu_frequency()), stop-start);
|
2011-12-21 05:22:16 -08:00
|
|
|
|
2012-09-17 14:51:10 +02:00
|
|
|
return err;
|
2011-12-21 05:22:16 -08:00
|
|
|
}
|
2012-04-24 10:35:24 +02:00
|
|
|
|
|
|
|
#endif
|