metalsvm/apps/jacobi.c

378 lines
8.4 KiB
C
Raw Permalink Normal View History

2011-12-21 05:22:16 -08:00
/*
* Copyright 2011 Stefan Lankes, Alexander Pilz, Maximilian Marx, Michael Ober,
* Chair for Operating Systems, RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#include <metalsvm/stdio.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/string.h>
#include <asm/svm.h>
#include <asm/RCCE.h>
#include <asm/RCCE_lib.h>
#include <asm/SCC_API.h>
#include <asm/irqflags.h>
#include "tests.h"
#if (defined(START_KERNEL_JACOBI) || defined(START_CHIEFTEST)) && defined(CONFIG_ROCKCREEK)
2011-12-21 05:22:16 -08:00
#define MATRIX_SIZE 256
#define MAXVALUE 1337
#define PAGE_SIZE 4096
#define CACHE_SIZE (256*1024)
#define SIZE ((MATRIX_SIZE+1)*MATRIX_SIZE*sizeof(double)+2*MATRIX_SIZE*sizeof(double)+10*PAGE_SIZE+CACHE_SIZE)
#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1))
#define RAND_MAX 32767
//#define SVM_TYPE SVM_STRONG
#define SVM_TYPE SVM_LAZYRELEASE
2011-12-21 08:26:03 -08:00
#define fabs(x) (x) >= 0 ? (x) : -1.0*(x)
2011-12-21 05:22:16 -08:00
static unsigned int seed = 0;
static int srand(unsigned int s)
{
seed = s;
return 0;
}
/* Pseudo-random generator based on Minimal Standard by
Lewis, Goodman, and Miller in 1969.
I[j+1] = a*I[j] (mod m)
where a = 16807
m = 2147483647
Using Schrage's algorithm, a*I[j] (mod m) can be rewritten as:
a*(I[j] mod q) - r*{I[j]/q} if >= 0
a*(I[j] mod q) - r*{I[j]/q} + m otherwise
where: {} denotes integer division
q = {m/a} = 127773
r = m (mod a) = 2836
note that the seed value of 0 cannot be used in the calculation as
it results in 0 itself
*/
static int rand(void)
{
long k;
long s = (long)(seed);
if (s == 0)
s = 0x12345987;
k = s / 127773;
s = 16807 * (s - k * 127773) - 2836 * k;
if (s < 0)
s += 2147483647;
seed = (unsigned int)s;
return (int)(s & RAND_MAX);
}
static inline void cache_invalidate(void)
{
asm volatile ( ".byte 0x0f; .byte 0x0a;\n" ); // CL1FLUSHMB
}
static int generate_empty_matrix(double*** A , unsigned int N, int rankID) {
unsigned int iCnt;
int i,j;
unsigned int iter_start, iter_end, pad;
int num = RCCE_NP;
pad = N/num;
if (pad % 4) {
pad -= pad % 4;
unsigned int p = (N - num * pad) / 4;
if (rankID < p) {
iter_start = rankID*(pad+4);
iter_end = (rankID+1)*(pad+4);
} else {
iter_start = p*(pad+4)+(rankID-p)*pad;
iter_end = p*(pad+4)+(rankID+1-p)*pad;
}
} else {
iter_start = rankID*pad;
iter_end = (rankID+1)*pad;
}
kprintf("iter_start %d, iter_end %d\n", iter_start, iter_end);
*A = (double**) kmalloc((N+1)*sizeof(double*));
if (*A == NULL)
return -2; /* Error */
svm_barrier(SVM_TYPE);
**A = (double*) svm_malloc((N+1)*N*sizeof(double), SVM_TYPE);
if (**A == NULL)
return -2; /* Error */
svm_barrier(SVM_TYPE);
for(iCnt=1; iCnt<N; iCnt++) { /* Assign pointers in the first "real index"; Value from 1 to N (0 yet set, value N means N+1) */
(*A)[iCnt] = &((*A)[0][iCnt*(N+1)]);
}
for(i=iter_start; i<iter_end; i++)
{
for(j=0; j<=N; j++)
(*A)[i][j] = 0;
}
svm_flush(0);
svm_invalidate();
svm_barrier(SVM_TYPE);
if(rankID == 0)
{
srand( 42 ) ; /* init random number generator */
/*
* initialize the system of linear equations
* the result vector is one
*/
for (i = 0; i < N; i++)
{
double sum = 0.0;
for (j = 0; j < N; j++)
{
if (i != j)
{
double c = ((double)rand()) / ((double)RAND_MAX) * MAXVALUE;
sum += fabs(c);
(*A)[i][j] = c;
(*A)[i][N] += c;
}
}
/*
* The Jacobi method will always converge if the matrix A is strictly or irreducibly diagonally dominant.
* Strict row diagonal dominance means that for each row, the absolute value of the diagonal term is
* greater than the sum of absolute values of other terms: |A[i][i]| > Sum |A[i][j]| with (i != j)
*/
(*A)[i][i] = sum + 2.0;
(*A)[i][N] += sum + 2.0;
}
svm_flush(0);
svm_invalidate();
}
svm_barrier(SVM_TYPE);
return 0;
}
int jacobi(void* argv)
{
volatile double* temp = NULL;
volatile double* swap;
unsigned int i, j, k, iter_start, iter_end, pad;
unsigned int iterations = 0;
double error, norm, max = 0.0;
double** A=0;
volatile double* X;
volatile double* X_old;
double xi;
uint64_t start, stop;
int rankID, num;
rankID = RCCE_IAM;
num = RCCE_NP;
if (generate_empty_matrix(&A,MATRIX_SIZE,rankID) < 0)
{
kprintf("generate_empty_matrix() failed...\n");
return -1;
}
if (rankID == 0)
kprintf("generate_empty_matrix() done...\n");
svm_barrier(SVM_TYPE);
X = (double*) svm_malloc(MATRIX_SIZE*sizeof(double), SVM_TYPE);
X_old = (double*) svm_malloc(MATRIX_SIZE*sizeof(double), SVM_TYPE);
if (X == NULL || X_old == NULL)
{
kprintf("X or X_old is NULL...\n");
return -1;
}
temp = (double*) svm_malloc(PAGE_SIZE, SVM_LAZYRELEASE);
if (temp == NULL)
{
kprintf("temp is NULL...\n");
return -1;
}
if (rankID == 0) {
memset((void*)temp, 0x00, PAGE_SIZE);
for(i=0; i<MATRIX_SIZE; i++)
{
X[i] = ((double)rand()) / ((double)RAND_MAX) * 10.0;
X_old[i] = 0.0;
}
svm_flush(0);
svm_invalidate();
}
svm_barrier(SVM_TYPE);
if (rankID == 0)
kprintf("start calculation...\n");
svm_barrier(SVM_TYPE);
pad = MATRIX_SIZE/num;
if (pad % 4) {
pad -= pad % 4;
unsigned int p = (MATRIX_SIZE - num * pad) / 4;
if (rankID < p) {
iter_start = rankID*(pad+4);
iter_end = (rankID+1)*(pad+4);
} else {
iter_start = p*(pad+4)+(rankID-p)*pad;
iter_end = p*(pad+4)+(rankID+1-p)*pad;
}
} else {
iter_start = rankID*pad;
iter_end = (rankID+1)*pad;
}
start = rdtsc();
//while(1)
for(k=0; k<865000; k++)
{
iterations++;
swap = X_old;
X_old = X;
X = swap;
for(i=iter_start; i<iter_end; i++)
{
for(j=0, xi=0.0; j<i; j++)
xi += A[i][j]* X_old[j];
for(j=i+1; j<MATRIX_SIZE; j++)
xi += A[i][j] * X_old[j];
X[i] = (A[i][MATRIX_SIZE] - xi) / A[i][i];
}
if (iterations % 5000 == 0 ) {/* calculate the Euclidean norm between X_old and X*/
norm = 0.0;
#if 1
svm_barrier(SVM_TYPE);
for(i=0; i<MATRIX_SIZE; i++)
norm += (X_old[i] - X[i]) * (X_old[i] - X[i]);
svm_barrier(SVM_TYPE);
#else
if ((num > 1) && (rankID == 0)) { /* write always a complete cache line */
memset((void*)temp, 0, CACHE_LINE);
svm_flush(0);
}
svm_barrier(SVM_TYPE);
for(i=iter_start; i<iter_end; i++)
norm += (X_old[i] - X[i]) * (X_old[i] - X[i]);
if (num > 1) {
RCCE_acquire_lock(0);
svm_invalidate();
norm += temp[0];
temp[0] = norm;
svm_flush(0);
RCCE_release_lock(0);
svm_barrier(SVM_LAZYRELEASE);
norm = temp[0];
}
#endif
/* check the break condition */
norm /= (double) MATRIX_SIZE;
if (norm < 0.0000001)
; //break;
} else {
svm_barrier(SVM_TYPE);
}
//if (k % 100 == 0)
// kprintf("k = %d\n", k);
}
stop = rdtsc();
2011-12-21 08:26:03 -08:00
if (MATRIX_SIZE < 16) {
kprintf("Print the solution...\n");
/* print solution */
2011-12-21 05:22:16 -08:00
for(i=0; i<MATRIX_SIZE; i++) {
2011-12-21 08:26:03 -08:00
for(j=0; j<MATRIX_SIZE; j++)
kprintf("%u/100\t", (uint32_t) (100*A[i][j]));
kprintf("*\t%u/100\t=\t%u/100\n", (uint32_t) (100*X[i]), (uint32_t) (100*A[i][MATRIX_SIZE]));
2011-12-21 05:22:16 -08:00
}
}
2011-12-21 08:26:03 -08:00
kprintf("Check the result...\n");
/*
* check the result
* X[i] have to be 1
*/
int err=0;
2011-12-21 08:26:03 -08:00
for(i=0; i<MATRIX_SIZE; i++) {
double diff = X[i] - 1.0;
error = fabs(diff);
if (max < error)
max = error;
if (error > 0.01) {
2011-12-21 08:26:03 -08:00
kprintf("Result is on position %d wrong (%d/10000 != 1.0, error %d/10000)\n", i, (int) (10000.0*X[i]), (int) (10000.0*error));
err = 1;
}
2011-12-21 08:26:03 -08:00
}
kprintf("maximal error is %d/10000\n", (int) (10000.0*max));
kprintf("\nmatrix size: %d x %d\n", MATRIX_SIZE, MATRIX_SIZE);
kprintf("number of iterations: %d\n", iterations);
kprintf("Calculation time: %llu ms (%llu ticks)\n", (stop-start)/(1000ULL*get_cpu_frequency()), stop-start);
2011-12-21 05:22:16 -08:00
return err;
2011-12-21 05:22:16 -08:00
}
#endif