add jacobi example
This commit is contained in:
parent
e03d155fc0
commit
e3e77baee5
3 changed files with 376 additions and 4 deletions
|
@ -1,4 +1,4 @@
|
|||
C_source := tests.c echo.c netio.c laplace.c gfx_client.c gfx_generic.c
|
||||
C_source := tests.c echo.c netio.c jacobi.c laplace.c gfx_client.c gfx_generic.c
|
||||
MODULE := apps
|
||||
|
||||
include $(TOPDIR)/Makefile.inc
|
||||
|
|
370
apps/jacobi.c
Normal file
370
apps/jacobi.c
Normal file
|
@ -0,0 +1,370 @@
|
|||
|
||||
/*
|
||||
* Copyright 2011 Stefan Lankes, Alexander Pilz, Maximilian Marx, Michael Ober,
|
||||
* Chair for Operating Systems, RWTH Aachen University
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <metalsvm/stdio.h>
|
||||
#include <metalsvm/stdlib.h>
|
||||
#include <metalsvm/string.h>
|
||||
#include <asm/svm.h>
|
||||
#include <asm/RCCE.h>
|
||||
#include <asm/RCCE_lib.h>
|
||||
#include <asm/SCC_API.h>
|
||||
#include <asm/irqflags.h>
|
||||
|
||||
#define MATRIX_SIZE 256
|
||||
#define MAXVALUE 1337
|
||||
#define PAGE_SIZE 4096
|
||||
#define CACHE_SIZE (256*1024)
|
||||
#define SIZE ((MATRIX_SIZE+1)*MATRIX_SIZE*sizeof(double)+2*MATRIX_SIZE*sizeof(double)+10*PAGE_SIZE+CACHE_SIZE)
|
||||
#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1))
|
||||
#define RAND_MAX 32767
|
||||
|
||||
//#define SVM_TYPE SVM_STRONG
|
||||
#define SVM_TYPE SVM_LAZYRELEASE
|
||||
|
||||
#define fabs(x) x >= 0 ? x : -x
|
||||
|
||||
static unsigned int seed = 0;
|
||||
|
||||
static int srand(unsigned int s)
|
||||
{
|
||||
seed = s;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Pseudo-random generator based on Minimal Standard by
|
||||
Lewis, Goodman, and Miller in 1969.
|
||||
|
||||
I[j+1] = a*I[j] (mod m)
|
||||
|
||||
where a = 16807
|
||||
m = 2147483647
|
||||
|
||||
Using Schrage's algorithm, a*I[j] (mod m) can be rewritten as:
|
||||
|
||||
a*(I[j] mod q) - r*{I[j]/q} if >= 0
|
||||
a*(I[j] mod q) - r*{I[j]/q} + m otherwise
|
||||
|
||||
where: {} denotes integer division
|
||||
q = {m/a} = 127773
|
||||
r = m (mod a) = 2836
|
||||
|
||||
note that the seed value of 0 cannot be used in the calculation as
|
||||
it results in 0 itself
|
||||
*/
|
||||
|
||||
static int rand(void)
|
||||
{
|
||||
long k;
|
||||
long s = (long)(seed);
|
||||
if (s == 0)
|
||||
s = 0x12345987;
|
||||
k = s / 127773;
|
||||
s = 16807 * (s - k * 127773) - 2836 * k;
|
||||
if (s < 0)
|
||||
s += 2147483647;
|
||||
seed = (unsigned int)s;
|
||||
return (int)(s & RAND_MAX);
|
||||
}
|
||||
|
||||
static inline void cache_invalidate(void)
|
||||
{
|
||||
asm volatile ( ".byte 0x0f; .byte 0x0a;\n" ); // CL1FLUSHMB
|
||||
}
|
||||
|
||||
static int generate_empty_matrix(double*** A , unsigned int N, int rankID) {
|
||||
unsigned int iCnt;
|
||||
int i,j;
|
||||
unsigned int iter_start, iter_end, pad;
|
||||
int num = RCCE_NP;
|
||||
|
||||
pad = N/num;
|
||||
if (pad % 4) {
|
||||
pad -= pad % 4;
|
||||
|
||||
unsigned int p = (N - num * pad) / 4;
|
||||
|
||||
if (rankID < p) {
|
||||
iter_start = rankID*(pad+4);
|
||||
iter_end = (rankID+1)*(pad+4);
|
||||
} else {
|
||||
iter_start = p*(pad+4)+(rankID-p)*pad;
|
||||
iter_end = p*(pad+4)+(rankID+1-p)*pad;
|
||||
}
|
||||
} else {
|
||||
iter_start = rankID*pad;
|
||||
iter_end = (rankID+1)*pad;
|
||||
}
|
||||
kprintf("iter_start %d, iter_end %d\n", iter_start, iter_end);
|
||||
|
||||
*A = (double**) kmalloc((N+1)*sizeof(double*));
|
||||
|
||||
if (*A == NULL)
|
||||
return -2; /* Error */
|
||||
|
||||
svm_barrier(SVM_TYPE);
|
||||
|
||||
**A = (double*) svm_malloc((N+1)*N*sizeof(double), SVM_TYPE);
|
||||
|
||||
if (**A == NULL)
|
||||
return -2; /* Error */
|
||||
|
||||
svm_barrier(SVM_TYPE);
|
||||
|
||||
for(iCnt=1; iCnt<N; iCnt++) { /* Assign pointers in the first "real index"; Value from 1 to N (0 yet set, value N means N+1) */
|
||||
(*A)[iCnt] = &((*A)[0][iCnt*(N+1)]);
|
||||
}
|
||||
|
||||
for(i=iter_start; i<iter_end; i++)
|
||||
{
|
||||
for(j=0; j<=N; j++)
|
||||
(*A)[i][j] = 0;
|
||||
}
|
||||
|
||||
svm_flush(0);
|
||||
svm_invalidate();
|
||||
svm_barrier(SVM_TYPE);
|
||||
|
||||
if(rankID == 0)
|
||||
{
|
||||
srand( 42 ) ; /* init random number generator */
|
||||
|
||||
/*
|
||||
* initialize the system of linear equations
|
||||
* the result vector is one
|
||||
*/
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
double sum = 0.0;
|
||||
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
if (i != j)
|
||||
{
|
||||
double c = ((double)rand()) / ((double)RAND_MAX) * MAXVALUE;
|
||||
|
||||
sum += fabs(c);
|
||||
(*A)[i][j] = c;
|
||||
(*A)[i][N] += c;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The Jacobi method will always converge if the matrix A is strictly or irreducibly diagonally dominant.
|
||||
* Strict row diagonal dominance means that for each row, the absolute value of the diagonal term is
|
||||
* greater than the sum of absolute values of other terms: |A[i][i]| > Sum |A[i][j]| with (i != j)
|
||||
*/
|
||||
|
||||
(*A)[i][i] = sum + 2.0;
|
||||
(*A)[i][N] += sum + 2.0;
|
||||
}
|
||||
|
||||
svm_flush(0);
|
||||
svm_invalidate();
|
||||
}
|
||||
|
||||
svm_barrier(SVM_TYPE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int jacobi(void* argv)
|
||||
{
|
||||
volatile double* temp = NULL;
|
||||
volatile double* swap;
|
||||
unsigned int i, j, k, iter_start, iter_end, pad;
|
||||
unsigned int iterations = 0;
|
||||
double error, norm, max = 0.0;
|
||||
double** A=0;
|
||||
volatile double* X;
|
||||
volatile double* X_old;
|
||||
double xi;
|
||||
uint64_t start, stop;
|
||||
int rankID, num;
|
||||
|
||||
rankID = RCCE_IAM;
|
||||
num = RCCE_NP;
|
||||
|
||||
if (generate_empty_matrix(&A,MATRIX_SIZE,rankID) < 0)
|
||||
{
|
||||
kprintf("generate_empty_matrix() failed...\n");
|
||||
return -1;
|
||||
|
||||
}
|
||||
|
||||
if (rankID == 0)
|
||||
kprintf("generate_empty_matrix() done...\n");
|
||||
|
||||
svm_barrier(SVM_TYPE);
|
||||
|
||||
X = (double*) svm_malloc(MATRIX_SIZE*sizeof(double), SVM_TYPE);
|
||||
X_old = (double*) svm_malloc(MATRIX_SIZE*sizeof(double), SVM_TYPE);
|
||||
|
||||
if (X == NULL || X_old == NULL)
|
||||
{
|
||||
kprintf("X or X_old is NULL...\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
temp = (double*) svm_malloc(PAGE_SIZE, SVM_LAZYRELEASE);
|
||||
if (temp == NULL)
|
||||
{
|
||||
kprintf("temp is NULL...\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (rankID == 0) {
|
||||
memset((void*)temp, 0x00, PAGE_SIZE);
|
||||
for(i=0; i<MATRIX_SIZE; i++)
|
||||
{
|
||||
X[i] = ((double)rand()) / ((double)RAND_MAX) * 10.0;
|
||||
X_old[i] = 0.0;
|
||||
}
|
||||
|
||||
svm_flush(0);
|
||||
svm_invalidate();
|
||||
}
|
||||
|
||||
svm_barrier(SVM_TYPE);
|
||||
|
||||
if (rankID == 0)
|
||||
kprintf("start calculation...\n");
|
||||
|
||||
svm_barrier(SVM_TYPE);
|
||||
|
||||
pad = MATRIX_SIZE/num;
|
||||
if (pad % 4) {
|
||||
pad -= pad % 4;
|
||||
|
||||
unsigned int p = (MATRIX_SIZE - num * pad) / 4;
|
||||
|
||||
if (rankID < p) {
|
||||
iter_start = rankID*(pad+4);
|
||||
iter_end = (rankID+1)*(pad+4);
|
||||
} else {
|
||||
iter_start = p*(pad+4)+(rankID-p)*pad;
|
||||
iter_end = p*(pad+4)+(rankID+1-p)*pad;
|
||||
}
|
||||
} else {
|
||||
iter_start = rankID*pad;
|
||||
iter_end = (rankID+1)*pad;
|
||||
}
|
||||
|
||||
start = rdtsc();
|
||||
|
||||
//while(1)
|
||||
for(k=0; k<865000; k++)
|
||||
{
|
||||
iterations++;
|
||||
|
||||
swap = X_old;
|
||||
X_old = X;
|
||||
X = swap;
|
||||
|
||||
for(i=iter_start; i<iter_end; i++)
|
||||
{
|
||||
for(j=0, xi=0.0; j<i; j++)
|
||||
xi += A[i][j]* X_old[j];
|
||||
|
||||
for(j=i+1; j<MATRIX_SIZE; j++)
|
||||
xi += A[i][j] * X_old[j];
|
||||
X[i] = (A[i][MATRIX_SIZE] - xi) / A[i][i];
|
||||
}
|
||||
|
||||
if (iterations % 5000 == 0 ) {/* calculate the Euclidean norm between X_old and X*/
|
||||
norm = 0.0;
|
||||
|
||||
#if 1
|
||||
svm_barrier(SVM_TYPE);
|
||||
for(i=0; i<MATRIX_SIZE; i++)
|
||||
norm += (X_old[i] - X[i]) * (X_old[i] - X[i]);
|
||||
svm_barrier(SVM_TYPE);
|
||||
#else
|
||||
if ((num > 1) && (rankID == 0)) { /* write always a complete cache line */
|
||||
memset((void*)temp, 0, CACHE_LINE);
|
||||
svm_flush(0);
|
||||
}
|
||||
|
||||
svm_barrier(SVM_TYPE);
|
||||
|
||||
for(i=iter_start; i<iter_end; i++)
|
||||
norm += (X_old[i] - X[i]) * (X_old[i] - X[i]);
|
||||
|
||||
if (num > 1) {
|
||||
RCCE_acquire_lock(0);
|
||||
svm_invalidate();
|
||||
norm += temp[0];
|
||||
temp[0] = norm;
|
||||
svm_flush(0);
|
||||
RCCE_release_lock(0);
|
||||
|
||||
svm_barrier(SVM_LAZYRELEASE);
|
||||
norm = temp[0];
|
||||
}
|
||||
#endif
|
||||
|
||||
/* check the break condition */
|
||||
norm /= (double) MATRIX_SIZE;
|
||||
|
||||
if (norm < 0.0000001)
|
||||
; //break;
|
||||
} else {
|
||||
svm_barrier(SVM_TYPE);
|
||||
}
|
||||
|
||||
//if (k % 100 == 0)
|
||||
// kprintf("k = %d\n", k);
|
||||
}
|
||||
|
||||
stop = rdtsc();
|
||||
|
||||
if(rankID==0)
|
||||
{
|
||||
if (MATRIX_SIZE < 16) {
|
||||
kprintf("Print the solution...\n");
|
||||
/* print solution */
|
||||
for(i=0; i<MATRIX_SIZE; i++) {
|
||||
for(j=0; j<MATRIX_SIZE; j++)
|
||||
kprintf("%u/100\t", (uint32_t) (100*A[i][j]));
|
||||
kprintf("*\t%u/100\t=\t%u/100\n", (uint32_t) (100*X[i]), (uint32_t) (100*A[i][MATRIX_SIZE]));
|
||||
}
|
||||
}
|
||||
kprintf("Check the result...\n");
|
||||
|
||||
/*
|
||||
* check the result
|
||||
* X[i] have to be 1
|
||||
*/
|
||||
for(i=0; i<MATRIX_SIZE; i++) {
|
||||
error = fabs(X[i] - 1.0f);
|
||||
|
||||
if (max < error)
|
||||
max = error;
|
||||
if (error > 0.01f)
|
||||
kprintf("Result is on position %d wrong (%u/100 != 1.0)\n", i, (uint32_t) (100*X[i]));
|
||||
}
|
||||
kprintf("maximal error is %u/100\n", (uint32_t) (100*max));
|
||||
|
||||
kprintf("\nmatrix size: %d x %d\n", MATRIX_SIZE, MATRIX_SIZE);
|
||||
kprintf("number of iterations: %d\n", iterations);
|
||||
kprintf("Calculation time: %llu ms (%llu ticks)\n", (stop-start)/(1000ULL*get_cpu_frequency()), stop-start);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -41,6 +41,7 @@ static mailbox_int32_t mbox;
|
|||
static int val = 0;
|
||||
|
||||
int laplace(void* arg);
|
||||
int jacobi(void* arg);
|
||||
|
||||
static int consumer(void* arg)
|
||||
{
|
||||
|
@ -100,8 +101,8 @@ static int foo(void* arg)
|
|||
static int mail_ping(void* arg) {
|
||||
int i;
|
||||
|
||||
for(i=0; i<5; ++i)
|
||||
icc_mail_ping();
|
||||
//for(i=0; i<5; ++i)
|
||||
// icc_mail_ping();
|
||||
for(i=0; i<5; ++i)
|
||||
icc_mail_ping_irq();
|
||||
//icc_mail_ping_jitter();
|
||||
|
@ -427,12 +428,13 @@ int test_init(void)
|
|||
//create_kernel_task(NULL, join_test, NULL, NORMAL_PRIO);
|
||||
//create_kernel_task(NULL, producer, , NORMAL_PRIO);
|
||||
//create_kernel_task(NULL, consumer, NULL, NORMAL_PRIO);
|
||||
create_kernel_task(NULL, mail_ping, NULL, NORMAL_PRIO);
|
||||
//create_kernel_task(NULL, mail_ping, NULL, NORMAL_PRIO);
|
||||
//create_kernel_task(NULL, mail_noise, NULL, NORMAL_PRIO);
|
||||
//create_kernel_task(NULL, svm_test, NULL, NORMAL_PRIO);
|
||||
//create_kernel_task(NULL, svm_bench, NULL, NORMAL_PRIO);
|
||||
//create_kernel_task(NULL, pi, NULL, NORMAL_PRIO);
|
||||
//create_kernel_task(NULL, laplace, NULL, NORMAL_PRIO);
|
||||
create_kernel_task(NULL, jacobi, NULL, NORMAL_PRIO);
|
||||
//create_user_task(NULL, "/bin/hello", argv);
|
||||
//create_user_task(NULL, "/bin/tests", argv);
|
||||
//create_user_task(NULL, "/bin/jacobi", argv);
|
||||
|
|
Loading…
Add table
Reference in a new issue