From df250721b54e7b658077b8620b1c6eebf58391d7 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 19 Apr 2011 07:23:00 +0200
Subject: [PATCH 01/36] use logical operation instead of / and % to increase
 the performance

---
 mm/memory.c | 54 ++++++++++++++++++++++++++---------------------------
 1 file changed, 27 insertions(+), 27 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c
index cf7f5e48..ed6057e7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -52,8 +52,8 @@ extern const void kernel_end;
 
 inline static int page_marked(unsigned int i)
 {
-	unsigned int index = i / 8;
-	unsigned int mod = i % 8;
+	unsigned int index = i >> 3;
+	unsigned int mod = i & 0x7;
 
 	return  (bitmap[index] & (1 << mod));
 }
@@ -65,8 +65,8 @@ inline static int page_unmarked(unsigned int i)
 
 inline static void page_set_mark(unsigned int i)
 {
-	unsigned int index = i / 8;
-	unsigned int mod = i % 8;
+	unsigned int index = i >> 3;
+	unsigned int mod = i & 0x7;
 
 	//if (page_marked(i))
 	//	kprintf("page %u is alread marked\n", i);
@@ -104,7 +104,7 @@ int mmu_init(void)
 				end_addr = addr + mmap->len;
  
 				while (addr < end_addr) {
-					page_clear_mark(addr / PAGE_SIZE);
+					page_clear_mark(addr >> PAGE_SHIFT);
 					addr += PAGE_SIZE;
 					atomic_int32_inc(&total_pages);
 					atomic_int32_inc(&total_available_pages);
@@ -129,13 +129,13 @@ int mmu_init(void)
 		/*
 		 * Mark the mb_info as used.
  		 */
-		page_set_mark((size_t)mb_info / PAGE_SIZE);
+		page_set_mark((size_t)mb_info >> PAGE_SHIFT);
 		atomic_int32_inc(&total_allocated_pages);
 		atomic_int32_dec(&total_available_pages);
 
 		for(i=0; i<mb_info->mods_count; i++, mmodule++) {
 			for(addr=mmodule->mod_start; addr<mmodule->mod_end; addr+=PAGE_SIZE) {
-				page_set_mark(addr / PAGE_SIZE);
+				page_set_mark(addr >> PAGE_SHIFT);
 				atomic_int32_inc(&total_allocated_pages);
 				atomic_int32_dec(&total_available_pages);
 			}
@@ -143,7 +143,7 @@ int mmu_init(void)
 	} 
 #elif defined(CONFIG_ROCKCREEK)
 	for(addr=PRIVATE_MEM1_START; addr<PRIVATE_MEM1_END; addr+=PAGE_SIZE) {
-		page_clear_mark(addr / PAGE_SIZE);
+		page_clear_mark(addr >> PAGE_SHIFT);
 		if (addr > addr + PAGE_SIZE)
 			break;
 		atomic_int32_inc(&total_pages);
@@ -151,7 +151,7 @@ int mmu_init(void)
 	}
 
 	for(addr=PRIVATE_MEM2_START; addr<PRIVATE_MEM2_END; addr+=PAGE_SIZE) {
-		page_clear_mark(addr / PAGE_SIZE);
+		page_clear_mark(addr >> PAGE_SHIFT);
 		if (addr > addr + PAGE_SIZE)
 			break;
 		atomic_int32_inc(&total_pages);
@@ -161,7 +161,7 @@ int mmu_init(void)
 	/*
 	 * Mark the bootinfo as used.
 	 */
-	page_set_mark((size_t)bootinfo / PAGE_SIZE);
+	page_set_mark((size_t)bootinfo >> PAGE_SHIFT);
 	atomic_int32_inc(&total_allocated_pages);
 	atomic_int32_dec(&total_available_pages);
 
@@ -170,7 +170,7 @@ int mmu_init(void)
 	 * Therefore, we set these pages as used.
 	 */
 	for(addr=bootinfo->addr; addr < bootinfo->addr+bootinfo->size; addr+=PAGE_SIZE) {
-		page_set_mark(addr / PAGE_SIZE);
+		page_set_mark(addr >> PAGE_SHIFT);
 		atomic_int32_inc(&total_allocated_pages);
 		atomic_int32_dec(&total_available_pages);
 	}
@@ -179,19 +179,19 @@ int mmu_init(void)
 #endif
 
 	kernel_size = (size_t) &kernel_end - (size_t) &kernel_start;
-	if (kernel_size % PAGE_SIZE)
-		kernel_size += PAGE_SIZE - kernel_size % PAGE_SIZE;
-	atomic_int32_add(&total_allocated_pages, kernel_size/PAGE_SIZE);
-	atomic_int32_sub(&total_available_pages, kernel_size/PAGE_SIZE);
+	if (kernel_size & (PAGE_SIZE-1))
+		kernel_size += PAGE_SIZE - (kernel_size & (PAGE_SIZE-1));
+	atomic_int32_add(&total_allocated_pages, kernel_size >> PAGE_SHIFT);
+	atomic_int32_sub(&total_available_pages, kernel_size >> PAGE_SHIFT);
 
 	/* set kernel space as used */
-	for(i=(size_t) &kernel_start / PAGE_SIZE; i < (size_t) &kernel_end / PAGE_SIZE; i++)
+	for(i=(size_t) &kernel_start >> PAGE_SHIFT; i < (size_t) &kernel_end >> PAGE_SHIFT; i++)
 		page_set_mark(i);
-	if ((size_t) &kernel_end % PAGE_SIZE)
+	if ((size_t) &kernel_end & (PAGE_SIZE-1))
 		page_set_mark(i);
 
-	alloc_start = (size_t) &kernel_end / PAGE_SIZE;	
-	if ((size_t) &kernel_end % PAGE_SIZE)
+	alloc_start = (size_t) &kernel_end >> PAGE_SHIFT;	
+	if ((size_t) &kernel_end & (PAGE_SIZE-1))
 		alloc_start++;
 
 	return paging_init();
@@ -217,7 +217,7 @@ size_t get_pages(uint32_t npages)
 next_try:
 	while((k < BITMAP_SIZE) && page_marked(i)) {
 		k++;
-		i = (i+1) % BITMAP_SIZE;
+		i = (i+1) & (BITMAP_SIZE-1);
 	}
 
 	if (k >= BITMAP_SIZE)
@@ -225,7 +225,7 @@ next_try:
 
 	for(j=1; (j<npages) && (i+j < BITMAP_SIZE) && (k < BITMAP_SIZE); j++, k++) {
 		if (page_marked(i+j)) {
-			i = (i+j) % BITMAP_SIZE;
+			i = (i+j) & (BITMAP_SIZE-1);
 			goto next_try;
 		}
 	}
@@ -259,7 +259,7 @@ oom:
 
 int put_page(size_t phyaddr)
 {
-	uint32_t index = phyaddr / PAGE_SIZE;
+	uint32_t index = phyaddr >> PAGE_SHIFT;
 
 	if (BUILTIN_EXPECT(!phyaddr, 0))
 		return -EINVAL;
@@ -277,9 +277,9 @@ int put_page(size_t phyaddr)
 void* mem_allocation(size_t sz, uint32_t flags)
 {
 	size_t phyaddr, viraddr;
-	uint32_t npages = sz / PAGE_SIZE;
+	uint32_t npages = sz >> PAGE_SHIFT;
 
-	if (sz % PAGE_SIZE)
+	if (sz & (PAGE_SIZE-1))
 		npages++;
 
 	phyaddr = get_pages(npages);
@@ -304,8 +304,8 @@ void kfree(void* addr, size_t sz)
 	if (BUILTIN_EXPECT(!addr && !sz, 0))
 		return;
 
-        npages = sz / PAGE_SIZE;
-	if (sz % PAGE_SIZE)
+        npages = sz >> PAGE_SHIFT;
+	if (sz & (PAGE_SIZE-1))
 		npages++;
 
 	spinlock_lock(&bitmap_lock);
@@ -316,7 +316,7 @@ void kfree(void* addr, size_t sz)
 		if (!phyaddr)
 			continue;
 
-		index = phyaddr / PAGE_SIZE;
+		index = phyaddr >> PAGE_SHIFT;
 		page_clear_mark(index);
 		
 	}

From 3fea08710d70e1a401ec734ab5397e16242c3518 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 19 Apr 2011 20:01:18 +0200
Subject: [PATCH 02/36] use logical operations instead of / and % to increase
 the performance

---
 arch/x86/mm/page.c                | 16 ++++++++--------
 include/metalsvm/config.h.example |  1 +
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index cd3f210b..4e5c4f7c 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -640,8 +640,8 @@ int arch_paging_init(void)
 	 * Set the page table and page directory entries for the kernel. We map the kernel's physical address 
 	 * to the same virtual address.
 	 */
-	npages = ((size_t) &kernel_end - (size_t) &kernel_start) / PAGE_SIZE;
-	if ((size_t)&kernel_end % PAGE_SIZE)
+	npages = ((size_t) &kernel_end - (size_t) &kernel_start) >> PAGE_SHIFT;
+	if ((size_t)&kernel_end & (PAGE_SIZE-1))
 		npages++;
 	map_region((size_t)&kernel_start, (size_t)&kernel_start, npages, MAP_KERNEL_SPACE);
 
@@ -686,8 +686,8 @@ int arch_paging_init(void)
 
 		for(i=0; i<mb_info->mods_count; i++, mmodule++) {
 			// map physical address to the same virtual address
-			npages = (mmodule->mod_end - mmodule->mod_start) / PAGE_SIZE;
-			if (mmodule->mod_end % PAGE_SIZE)
+			npages = (mmodule->mod_end - mmodule->mod_start) >> PAGE_SHIFT;
+			if (mmodule->mod_end & (PAGE_SIZE-1))
 				npages++;
 			map_region((size_t) (mmodule->mod_start), (size_t) (mmodule->mod_start), npages, MAP_KERNEL_SPACE);
 		}
@@ -699,17 +699,17 @@ int arch_paging_init(void)
 	map_region(SCC_BOOTINFO, SCC_BOOTINFO, 1, MAP_KERNEL_SPACE);
 
 	// map the initial ramdisk
-	npages = bootinfo->size / PAGE_SIZE;
-	if (bootinfo->size % PAGE_SIZE)
+	npages = bootinfo->size >> PAGE_SHIFT;
+	if (bootinfo->size & (PAGE_SIZE-1))
 		npages++;
 	map_region(bootinfo->addr, bootinfo->addr, npages, MAP_KERNEL_SPACE);
 
 	// map SCC's configuration registers
-	viraddr = map_region(CRB_X0_Y0, CRB_X0_Y0, (CRB_OWN-CRB_X0_Y0+16*1024*1024)/PAGE_SIZE, MAP_KERNEL_SPACE|MAP_NO_CACHE);
+	viraddr = map_region(CRB_X0_Y0, CRB_X0_Y0, (CRB_OWN-CRB_X0_Y0+16*1024*1024) >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE);
 	kprintf("Map configuration registers at 0x%x\n", viraddr);
 
 	// map SCC's message passing buffers
-	viraddr = map_region(MPB_X0_Y0, MPB_X0_Y0, (MPB_OWN-MPB_X0_Y0+16*1024*1024)/PAGE_SIZE, MAP_KERNEL_SPACE|MAP_MPE);
+	viraddr = map_region(MPB_X0_Y0, MPB_X0_Y0, (MPB_OWN-MPB_X0_Y0+16*1024*1024) >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_MPE);
 	kprintf("Map message passing buffers at 0x%x\n", viraddr);
 #endif
 
diff --git a/include/metalsvm/config.h.example b/include/metalsvm/config.h.example
index b5c0536e..bb405699 100644
--- a/include/metalsvm/config.h.example
+++ b/include/metalsvm/config.h.example
@@ -32,6 +32,7 @@ extern "C" {
 #define KERNEL_STACK_SIZE	8192
 #define KMSG_SIZE		(128*1024)
 #define PAGE_SIZE		4096
+#define PAGE_SHIFT		12
 #define CACHE_LINE		64
 #define MAILBOX_SIZE		8
 #define TIMER_FREQ		100	/* in HZ */

From 6e255fe27e21707970abc3e2ab6ba3e6082ac9af Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 19 Apr 2011 20:18:38 +0200
Subject: [PATCH 03/36] add jacobi solver as example program

---
 newlib/examples/Makefile |   9 +-
 newlib/examples/jacobi.c | 200 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 208 insertions(+), 1 deletion(-)
 create mode 100644 newlib/examples/jacobi.c

diff --git a/newlib/examples/Makefile b/newlib/examples/Makefile
index 38a21cc6..e1a373f9 100644
--- a/newlib/examples/Makefile
+++ b/newlib/examples/Makefile
@@ -3,6 +3,7 @@ NEWLIB = ../x86/i586-metalsvm-elf32
 MAKE = make
 STRIP_DEBUG = --strip-debug
 KEEP_DEBUG = --only-keep-debug
+LDFLAGS =
 
 # other implicit rules
 %.o : %.c
@@ -10,7 +11,13 @@ KEEP_DEBUG = --only-keep-debug
 
 default: all
 	
-all: hello tests
+all: hello tests jacobi
+
+jacobi: jacobi.o
+	$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS)  $< -lm
+	$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
+	$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
+	chmod a-x $@.sym
 	
 tests: tests.o
 	$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $<
diff --git a/newlib/examples/jacobi.c b/newlib/examples/jacobi.c
new file mode 100644
index 00000000..a899d87e
--- /dev/null
+++ b/newlib/examples/jacobi.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright 2011 Stefan Lankes, Alexander Pilz, Maximilian Marx, Michael Ober,
+ * 		  Chair for Operating Systems, RWTH Aachen University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <time.h>
+#include <unistd.h>
+#include <errno.h>
+#undef errno
+extern int errno;
+
+#define MATRIX_SIZE 	256
+#define MAXVALUE 	1337
+#define PAGE_SIZE 	4096
+#define CACHE_SIZE      (256*1024)
+#define ALIGN(x,a)	(((x)+(a)-1)&~((a)-1))
+
+static int generate_empty_matrix(double*** A , unsigned int N) {
+	unsigned int iCnt;
+	int i,j;
+
+	*A = (double**) malloc((N+1)*sizeof(double*));
+
+	if (*A == NULL) 
+		return -2;	/* Error */
+
+	(*A)[0] = (double*) malloc((N+1)*N*sizeof(double));
+
+	if (**A == NULL)
+		return -2;	/* Error */
+
+	for(iCnt=1; iCnt<N; iCnt++) { /* Assign pointers in the first "real index"; Value from 1 to N (0 yet set, value N means N+1) */
+		(*A)[iCnt] = &((*A)[0][iCnt*(N+1)]);
+	}
+
+	memset(**A, 0, (N+1)*N*sizeof(double));      /* Fill matrix values with 0 */
+
+	srand( 42 /*(unsigned) time(NULL)*/ ) ; /* init random number generator */
+
+	/* 
+	 * initialize the system of linear equations
+	 * the result vector is one
+	 */
+	for (i = 0; i < N; i++) 
+	{
+		double sum = 0.0;
+
+		for (j = 0; j < N; j++) 
+		{
+			if (i != j) 
+			{
+				double c = ((double)rand()) / ((double)RAND_MAX) * MAXVALUE;
+
+				sum += fabs(c);
+				(*A)[i][j] = c;
+				(*A)[i][N] += c;
+			}
+		}
+
+		/*
+		 * The Jacobi method will always converge if the matrix A is strictly or irreducibly diagonally dominant. 
+		 * Strict row diagonal dominance means that for each row, the absolute value of the diagonal term is 
+		 * greater than the sum of absolute values of other terms: |A[i][i]| > Sum |A[i][j]| with (i != j)
+		 */
+
+		(*A)[i][i] = sum + 2.0;
+		(*A)[i][N] += sum + 2.0;
+	}
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	double*       temp;
+	unsigned int  i, j, iter_start, iter_end;
+	unsigned int  iterations = 0;
+	double        error, norm, norm_res, max = 0.0;
+	double** A=0;
+	double* X;
+	double* X_old, xi;
+	double start,stop;
+
+	if (generate_empty_matrix(&A,MATRIX_SIZE) < 0)
+	{
+		printf("generate_empty_matrix() failed...\n");
+		fflush(stdout);
+		exit(-1);
+
+	}
+
+	printf("generate_empty_matrix() done...\n");
+	fflush(stdout);
+
+	X=(double*) malloc(MATRIX_SIZE*sizeof(double));
+	X_old=(double*) malloc(MATRIX_SIZE*sizeof(double));
+	if(X == NULL || X_old == NULL)
+	{
+		printf("X or X_old is NULL...\n");
+		exit(-1);
+	}
+
+	for(i=0; i<MATRIX_SIZE; i++) 
+	{
+		X[i] = ((double)rand()) / ((double)RAND_MAX) * 10.0;
+		X_old[i] = 0.0;
+	}
+
+	printf("start calculation...\n");
+	fflush(stdout);
+
+	iter_start = 0;
+	iter_end = MATRIX_SIZE;
+
+	//start = RCCE_wtime();
+
+	while(1) 
+	{
+		iterations++;
+	
+		temp = X_old;
+		X_old = X;
+		X = temp;
+
+		for (i=iter_start; i<iter_end; i++) 
+		{	
+			for(j=0, xi=0.0; j<i; j++)
+				xi += A[i][j]* X_old[j];
+
+			for(j=i+1; j<MATRIX_SIZE; j++)
+				xi += A[i][j] * X_old[j];
+			X[i] = (A[i][MATRIX_SIZE] - xi) / A[i][i];
+		}
+
+		if (iterations % 5000 == 0 ) {/* calculate the Euclidean norm between X_old and X*/
+			norm_res = norm = 0.0;
+			for (i=iter_start; i<iter_end; i++)
+				norm += (X_old[i] - X[i]) * (X_old[i] - X[i]);
+
+			/* check the break condition */
+			norm_res /= (double) MATRIX_SIZE;
+			
+			if (norm_res < 0.0000001)
+				break;
+		}
+	}
+
+	//stop = RCCE_wtime();
+	
+	if (MATRIX_SIZE < 16) {
+		printf("Print the solution...\n");
+		/* print solution */
+		for(i=0; i<MATRIX_SIZE; i++) {
+			for(j=0; j<MATRIX_SIZE; j++) 
+				printf("%8.2f\t", A[i][j]);
+			printf("*\t%8.2f\t=\t%8.2f\n", X[i], A[i][MATRIX_SIZE]);
+		}
+	}
+	printf("Check the result...\n");
+
+	/* 
+	 * check the result 
+	 * X[i] have to be 1
+	 */
+	for(i=0; i<MATRIX_SIZE; i++) {
+		error = fabs(X[i] - 1.0f);
+
+		if (max < error)
+			max = error;
+			if (error > 0.01f)
+				printf("Result is on position %d wrong (%f != 1.0)\n", i, X[i]);
+	}
+	printf("maximal error is %f\n", max);
+
+	printf("\nmatrix size: %d x %d\n", MATRIX_SIZE, MATRIX_SIZE);
+	printf("number of iterations: %d\n", iterations);
+	//printf("calculation time: %f s\n", stop-start);
+
+	free((void*) X_old);
+	free((void*) X);
+
+	return 0;
+}

From ea19b157814ca1afe005ea878ef88b3f2f4cb6cc Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Wed, 20 Apr 2011 11:34:52 +0200
Subject: [PATCH 04/36] - add room to the FPU context

---
 arch/x86/include/asm/tasks_types.h | 67 ++++++++++++++++++++++++++++++
 include/metalsvm/tasks_types.h     |  7 +++-
 2 files changed, 72 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/include/asm/tasks_types.h

diff --git a/arch/x86/include/asm/tasks_types.h b/arch/x86/include/asm/tasks_types.h
new file mode 100644
index 00000000..f0095bf3
--- /dev/null
+++ b/arch/x86/include/asm/tasks_types.h
@@ -0,0 +1,67 @@
+/* 
+ * Copyright 2011 Stefan Lankes, Chair for Operating Systems,
+ *                               RWTH Aachen University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This file is part of MetalSVM.
+ *
+ */
+
+#ifndef __ARCH_TASKS_TYPES__
+#define __ARCH_TASKS_TYPES__
+
+#include <metalsvm/stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+	long	cwd;
+	long	swd;
+	long	twd;
+	long	fip;
+	long	fcs;
+	long	foo;
+	long	fos;
+	long	st_space[20];
+	long	status;
+} i387_fsave_t;
+
+typedef struct i387_fxsave_struct {
+	unsigned short	cwd;
+	unsigned short	swd;
+	unsigned short	twd;
+	unsigned short	fop;
+	long	fip;
+	long	fcs;
+	long	foo;
+	long	fos;
+	long	mxcsr;
+	long	reserved;
+	long	st_space[32];
+	long	xmm_space[32];
+	long	padding[56];
+} i387_fxsave_t __attribute__ ((aligned (16)));
+
+union fpu_union {
+	i387_fsave_t	fsave;
+	i387_fxsave_t	fxsave;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h
index 73ca13d6..29fa5337 100644
--- a/include/metalsvm/tasks_types.h
+++ b/include/metalsvm/tasks_types.h
@@ -33,6 +33,7 @@
 #include <metalsvm/vma.h>
 #include <metalsvm/spinlock_types.h>
 #include <metalsvm/mailbox_types.h>
+#include <asm/tasks_types.h>
 #include <asm/atomic.h>
 
 #ifdef __cplusplus
@@ -57,9 +58,9 @@ typedef struct task {
 	uint32_t                status;
 	/// Usage in number of pages
 	atomic_int32_t   	user_usage;
-  /// Avoids concurrent access to the page directory
+	/// Avoids concurrent access to the page directory
 	spinlock_t		pgd_lock;	
-  /// pointer to the page directory
+	/// pointer to the page directory
 	struct page_dir*	pgd;            
 	/// Lock for the VMA_list
 	spinlock_t		vma_lock;
@@ -69,6 +70,8 @@ typedef struct task {
 	mailbox_wait_msg_t	inbox;	
 	/// Mail outbox array
 	mailbox_wait_msg_t*	outbox[MAX_TASKS];
+	/// FPU state
+	union fpu_union		fpu_state;
 } __attribute__((packed))  task_t;
 
 #ifdef __cplusplus

From 7e0179f5f7cd319cfbc3a1604e053589d1124441 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Wed, 20 Apr 2011 15:16:22 +0200
Subject: [PATCH 05/36] first version to support the FPU

- seems to work
- currently, we didn't support SIMD instructions
---
 arch/x86/include/asm/tasks_types.h |  6 +++++-
 arch/x86/kernel/entry.asm          | 13 ++++++++-----
 arch/x86/kernel/isrs.c             | 20 +++++++++++++++++++-
 include/metalsvm/tasks_types.h     |  4 +++-
 kernel/tasks.c                     |  9 +++++++--
 5 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/tasks_types.h b/arch/x86/include/asm/tasks_types.h
index f0095bf3..138fbd1c 100644
--- a/arch/x86/include/asm/tasks_types.h
+++ b/arch/x86/include/asm/tasks_types.h
@@ -55,11 +55,15 @@ typedef struct i387_fxsave_struct {
 	long	padding[56];
 } i387_fxsave_t __attribute__ ((aligned (16)));
 
-union fpu_union {
+union fpu_state {
 	i387_fsave_t	fsave;
 	i387_fxsave_t	fxsave;
 };
 
+static inline void save_fpu_state(union fpu_state* state) {
+	asm volatile ("fsave %0; fwait" : "=m"((*state).fsave));
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/arch/x86/kernel/entry.asm b/arch/x86/kernel/entry.asm
index 1a325dba..ca3afaa1 100644
--- a/arch/x86/kernel/entry.asm
+++ b/arch/x86/kernel/entry.asm
@@ -64,16 +64,19 @@ ALIGN 4
 stublet:
 ; initialize stack pointer.
     mov esp, default_stack_pointer
-; enable cache and turn on FPU exceptions
     mov eax, cr0
-    ; enable cache
-    and eax, 0x9fffffff
-    ; ...and turn on FPU exceptions
-    or eax, 0x20
+; enable cache, disable paging and fpu emulation
+    and eax, 0x3ffffffb
+; ...monitor coprocessor and turn on FPU exceptions
+    or eax, 0x22
     mov cr0, eax
 ; clears the current pgd entry
     xor eax, eax
     mov cr3, eax
+; disable SSE support (TODO)
+    mov eax, cr4
+    and eax, 0xfffbf9ff
+    mov cr4, eax
 ; interpret multiboot information
     extern multiboot_init
     push ebx
diff --git a/arch/x86/kernel/isrs.c b/arch/x86/kernel/isrs.c
index 63075d00..6df9526e 100644
--- a/arch/x86/kernel/isrs.c
+++ b/arch/x86/kernel/isrs.c
@@ -74,6 +74,7 @@ extern void isr30(void);
 extern void isr31(void);
 
 static void fault_handler(struct state *s);
+static void fpu_handler(struct state *s);
 
 /* 
  * This is a very repetitive function... it's not hard, it's
@@ -158,6 +159,23 @@ void isrs_install(void)
 	// install the default handler
 	for(i=0; i<32; i++)
 		irq_install_handler(i, fault_handler);
+
+	// set hanlder for fpu exceptions
+	irq_uninstall_handler(7);
+	irq_install_handler(7, fpu_handler);
+}
+
+static void fpu_handler(struct state *s)
+{
+	task_t* task = per_core(current_task);
+
+	kputs("got FPU exception\n");
+	asm volatile ("clts"); // clear the TS flag of cr0
+	if (!task->fpu_used)  {
+		task->fpu_used = 1;
+		asm volatile ("finit");
+	} else 
+		asm volatile ("frstor %0" :: "m"(task->fpu.fsave)); // restore fpu state
 }
 
 /** @brief Exception messages
@@ -189,7 +207,7 @@ static void fault_handler(struct state *s)
 {
 	if (s->int_no < 32) {
 		kputs(exception_messages[s->int_no]);
-		kputs(" Exception.\n");
+		kprintf(" Exception. (%d)\n", s->int_no);
 		
 		/* Now, we signalize that we have handled the interrupt */
 		if (apic_is_enabled())
diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h
index 29fa5337..c46ec4b4 100644
--- a/include/metalsvm/tasks_types.h
+++ b/include/metalsvm/tasks_types.h
@@ -66,12 +66,14 @@ typedef struct task {
 	spinlock_t		vma_lock;
 	/// List of VMAs
 	vma_t*			vma_list;
+	/// Is set, when the FPU is used
+	uint32_t		fpu_used;
 	/// Mail inbox
 	mailbox_wait_msg_t	inbox;	
 	/// Mail outbox array
 	mailbox_wait_msg_t*	outbox[MAX_TASKS];
 	/// FPU state
-	union fpu_union		fpu_state;
+	union fpu_state		fpu;
 } __attribute__((packed))  task_t;
 
 #ifdef __cplusplus
diff --git a/kernel/tasks.c b/kernel/tasks.c
index 6be430ce..3ffb49b9 100644
--- a/kernel/tasks.c
+++ b/kernel/tasks.c
@@ -48,7 +48,7 @@ DEFINE_PER_CORE(task_t*, current_task, NULL);
  * A task's id will be its position in this array.
  */
 static task_t task_table[MAX_TASKS] = {[0 ... MAX_TASKS-1] = {0, TASK_INVALID, ATOMIC_INIT(0), \
-			 SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL}};
+			 SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0}};
 static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT;
 
 /** @brief helper function for the assembly code to determine the current task
@@ -67,6 +67,7 @@ int multitasking_init(void) {
 		memset(task_table[0].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
 		per_core(current_task) = task_table+0;
 		per_core(current_task)->pgd = get_boot_pgd();
+		task_table[0].fpu_used = 0;
 		return 0;
 	}
 
@@ -189,6 +190,7 @@ static int create_task(tid_t* id, entry_point_t ep, void* arg)
 
 			ret = create_default_frame(task_table+i, ep, arg);
 
+			task_table[i].fpu_used = 0;
 			task_table[i].status = TASK_READY;
 			break;
 		}
@@ -250,6 +252,7 @@ int sys_fork(void)
 			mailbox_wait_msg_init(&task_table[i].inbox);
 			memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
 			task_table[i].outbox[per_core(current_task)->id] = &per_core(current_task)->inbox; 
+			task_table[i].fpu_used = 0x00;
 
 			ret = arch_fork(task_table+i);
 
@@ -707,7 +710,9 @@ void scheduler(void)
 			if (per_core(current_task)->status == TASK_RUNNING)
 				per_core(current_task)->status = TASK_READY;
 			task_table[new_id].status = TASK_RUNNING;
-	
+
+			if (per_core(current_task)->fpu_used)
+				save_fpu_state(&(per_core(current_task)->fpu));	
 			per_core(current_task) = task_table+new_id;
 			goto get_task_out;
 		}

From 8a515c9925b11d64ac466ee968f8343ff8c99be8 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Wed, 20 Apr 2011 20:41:51 +0200
Subject: [PATCH 06/36] remove bug in the calulation of the break condition

---
 newlib/examples/jacobi.c | 44 ++++++++++++++++++----------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/newlib/examples/jacobi.c b/newlib/examples/jacobi.c
index a899d87e..6c0e7a20 100644
--- a/newlib/examples/jacobi.c
+++ b/newlib/examples/jacobi.c
@@ -1,6 +1,6 @@
 /*
- * Copyright 2011 Stefan Lankes, Alexander Pilz, Maximilian Marx, Michael Ober,
- * 		  Chair for Operating Systems, RWTH Aachen University
+ * Copyright 2010-2011 Stefan Lankes
+ *                     Chair for Operating Systems, RWTH Aachen University
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,10 +26,10 @@
 #undef errno
 extern int errno;
 
-#define MATRIX_SIZE 	256
-#define MAXVALUE 	1337
-#define PAGE_SIZE 	4096
-#define CACHE_SIZE      (256*1024)
+#define MATRIX_SIZE 	128
+#define MAXVALUE	1337
+#define PAGE_SIZE	4096
+#define CACHE_SIZE	(256*1024)
 #define ALIGN(x,a)	(((x)+(a)-1)&~((a)-1))
 
 static int generate_empty_matrix(double*** A , unsigned int N) {
@@ -89,28 +89,26 @@ static int generate_empty_matrix(double*** A , unsigned int N) {
 
 int main(int argc, char **argv)
 {
-	double*       temp;
-	unsigned int  i, j, iter_start, iter_end;
-	unsigned int  iterations = 0;
-	double        error, norm, norm_res, max = 0.0;
-	double** A=0;
-	double* X;
-	double* X_old, xi;
-	double start,stop;
+	double*		temp;
+	unsigned int	i, j, iter_start, iter_end;
+	unsigned int	iterations = 0;
+	double		error, norm, max = 0.0;
+	double**	A=0;
+	double*		X;
+	double*		X_old, xi;
+	double		start,stop;
 
 	if (generate_empty_matrix(&A,MATRIX_SIZE) < 0)
 	{
 		printf("generate_empty_matrix() failed...\n");
-		fflush(stdout);
 		exit(-1);
 
 	}
 
 	printf("generate_empty_matrix() done...\n");
-	fflush(stdout);
 
-	X=(double*) malloc(MATRIX_SIZE*sizeof(double));
-	X_old=(double*) malloc(MATRIX_SIZE*sizeof(double));
+	X = (double*) malloc(MATRIX_SIZE*sizeof(double));
+	X_old = (double*) malloc(MATRIX_SIZE*sizeof(double));
 	if(X == NULL || X_old == NULL)
 	{
 		printf("X or X_old is NULL...\n");
@@ -124,7 +122,6 @@ int main(int argc, char **argv)
 	}
 
 	printf("start calculation...\n");
-	fflush(stdout);
 
 	iter_start = 0;
 	iter_end = MATRIX_SIZE;
@@ -142,7 +139,7 @@ int main(int argc, char **argv)
 		for (i=iter_start; i<iter_end; i++) 
 		{	
 			for(j=0, xi=0.0; j<i; j++)
-				xi += A[i][j]* X_old[j];
+				xi += A[i][j] * X_old[j];
 
 			for(j=i+1; j<MATRIX_SIZE; j++)
 				xi += A[i][j] * X_old[j];
@@ -150,14 +147,13 @@ int main(int argc, char **argv)
 		}
 
 		if (iterations % 5000 == 0 ) {/* calculate the Euclidean norm between X_old and X*/
-			norm_res = norm = 0.0;
+			norm = 0.0;
 			for (i=iter_start; i<iter_end; i++)
 				norm += (X_old[i] - X[i]) * (X_old[i] - X[i]);
 
 			/* check the break condition */
-			norm_res /= (double) MATRIX_SIZE;
-			
-			if (norm_res < 0.0000001)
+			norm /= (double) MATRIX_SIZE;		
+			if (norm < 0.0000001)
 				break;
 		}
 	}

From faa41e25e2045421f75a39e19a491d184950bb55 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Wed, 20 Apr 2011 21:23:22 +0200
Subject: [PATCH 07/36] add basic support of sbrk and fix bug in saving of the
 fpu context

---
 arch/x86/kernel/isrs.c              | 11 ++++++---
 arch/x86/mm/page.c                  | 23 ++++++++++++++++-
 include/metalsvm/tasks_types.h      | 12 +++++++--
 kernel/syscall.c                    | 33 +++++++++++++++++++++++++
 kernel/tasks.c                      | 38 ++++++++++++++++++++---------
 kernel/tests.c                      |  1 +
 newlib/src/libgloss/metalsvm/sbrk.c | 29 ++++++++++------------
 7 files changed, 113 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kernel/isrs.c b/arch/x86/kernel/isrs.c
index 6df9526e..cfb45389 100644
--- a/arch/x86/kernel/isrs.c
+++ b/arch/x86/kernel/isrs.c
@@ -169,13 +169,16 @@ static void fpu_handler(struct state *s)
 {
 	task_t* task = per_core(current_task);
 
-	kputs("got FPU exception\n");
 	asm volatile ("clts"); // clear the TS flag of cr0
-	if (!task->fpu_used)  {
-		task->fpu_used = 1;
+	if (!(task->flags & TASK_FPU_INIT))  {
+		// use the FPU at the first time => Initialize FPU
 		asm volatile ("finit");
-	} else 
+		task->flags = task->flags|TASK_FPU_INIT|TASK_FPU_USED;
+	} else {
+		// restore the FPU context 
 		asm volatile ("frstor %0" :: "m"(task->fpu.fsave)); // restore fpu state
+		task->flags |= TASK_FPU_USED;
+	}
 }
 
 /** @brief Exception messages
diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index 4e5c4f7c..75f34015 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -588,7 +588,28 @@ int print_paging_tree(size_t viraddr)
 
 static void pagefault_handler(struct state *s)
 {
-	kprintf("PAGE FAULT: Task %u got page fault at %p (irq %d)\n", per_core(current_task)->id, read_cr2(), s->int_no);
+	task_t* task = per_core(current_task);
+	size_t viraddr = read_cr2();
+	size_t phyaddr;
+
+	if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) {
+		viraddr = viraddr & 0xFFFFF000;
+
+		phyaddr = get_page();
+		if (BUILTIN_EXPECT(!phyaddr, 0))
+			goto default_handler;
+
+		if (map_region(viraddr, phyaddr, 1, MAP_USER_SPACE|MAP_HEAP) == viraddr) {
+			memset((void*) viraddr, 0x00, PAGE_SIZE);
+			return;
+		}
+		
+		kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr);
+		put_page(phyaddr);
+	}
+
+default_handler:
+	kprintf("PAGE FAULT: Task %u got page fault at %p (irq %d)\n", task->id, viraddr, s->int_no);
 	kprintf("Register state: eax = 0x%x, ebx = 0x%x, ecx = 0x%x, edx = 0x%x, edi = 0x%x, esi = 0x%x, ebp = 0x%x, esp = 0x%x\n", 
 		s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp);
 
diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h
index c46ec4b4..58f65cf2 100644
--- a/include/metalsvm/tasks_types.h
+++ b/include/metalsvm/tasks_types.h
@@ -47,6 +47,10 @@ extern "C" {
 #define TASK_FINISHED	4
 #define TASK_IDLE	5
 
+#define TASK_DEFAULT_FLAGS	0
+#define TASK_FPU_INIT		(1 << 0)
+#define TASK_FPU_USED		(1 << 1)
+
 typedef int (STDCALL *entry_point_t)(void*);
 struct page_dir;
 
@@ -66,8 +70,12 @@ typedef struct task {
 	spinlock_t		vma_lock;
 	/// List of VMAs
 	vma_t*			vma_list;
-	/// Is set, when the FPU is used
-	uint32_t		fpu_used;
+	/// Additional status flags. For instance, to signalize the using of the FPU
+	uint32_t		flags;
+	/// Start address of the heap
+	uint32_t		start_heap;
+	/// End address of the heap
+	uint32_t		end_heap;
 	/// Mail inbox
 	mailbox_wait_msg_t	inbox;	
 	/// Mail outbox array
diff --git a/kernel/syscall.c b/kernel/syscall.c
index 76ce8e10..95a39fc6 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -22,6 +22,7 @@
 #include <metalsvm/syscall.h>
 #include <metalsvm/tasks.h>
 #include <metalsvm/errno.h>
+#include <metalsvm/spinlock.h>
 
 static int sys_write(int fildes, const char *buf, size_t len)
 {
@@ -37,6 +38,32 @@ static int sys_write(int fildes, const char *buf, size_t len)
 	return len;
 }
 
+static int sys_sbrk(int incr)
+{
+	task_t* task = per_core(current_task);
+	vma_t* tmp = NULL;
+	int ret;
+
+	spinlock_lock(&task->vma_lock);
+
+	tmp = task->vma_list;
+        while(tmp && !((task->end_heap >= tmp->start) && (task->end_heap <= tmp->end)))
+		tmp = tmp->next;
+
+	ret = (int) task->end_heap;
+	task->end_heap += incr;
+	if (task->end_heap < task->start_heap)
+		task->end_heap = task->start_heap;
+	
+	// resize virtual memory area
+	if (tmp && (tmp->end <= task->end_heap))
+		tmp->end = task->end_heap;
+
+	spinlock_unlock(&task->vma_lock);
+
+	return ret;
+}
+
 int syscall_handler(uint32_t sys_nr, ...)
 {
 	int ret = -EINVAL;
@@ -64,6 +91,12 @@ int syscall_handler(uint32_t sys_nr, ...)
 	case __NR_close:
 		ret = 0;
 		break;
+	case __NR_sbrk: {
+			int incr = va_arg(vl, int);
+
+			ret = sys_sbrk(incr);
+			break;
+		}
 	case __NR_getpid:
 		ret = per_core(current_task)->id;
 		break;
diff --git a/kernel/tasks.c b/kernel/tasks.c
index 3ffb49b9..ae699687 100644
--- a/kernel/tasks.c
+++ b/kernel/tasks.c
@@ -48,7 +48,7 @@ DEFINE_PER_CORE(task_t*, current_task, NULL);
  * A task's id will be its position in this array.
  */
 static task_t task_table[MAX_TASKS] = {[0 ... MAX_TASKS-1] = {0, TASK_INVALID, ATOMIC_INIT(0), \
-			 SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0}};
+			 SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0}};
 static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT;
 
 /** @brief helper function for the assembly code to determine the current task
@@ -67,7 +67,7 @@ int multitasking_init(void) {
 		memset(task_table[0].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
 		per_core(current_task) = task_table+0;
 		per_core(current_task)->pgd = get_boot_pgd();
-		task_table[0].fpu_used = 0;
+		task_table[0].flags = TASK_DEFAULT_FLAGS;
 		return 0;
 	}
 
@@ -190,7 +190,9 @@ static int create_task(tid_t* id, entry_point_t ep, void* arg)
 
 			ret = create_default_frame(task_table+i, ep, arg);
 
-			task_table[i].fpu_used = 0;
+			task_table[i].flags = TASK_DEFAULT_FLAGS;
+			task_table[i].start_heap = 0;
+			task_table[i].end_heap = 0;
 			task_table[i].status = TASK_READY;
 			break;
 		}
@@ -252,7 +254,10 @@ int sys_fork(void)
 			mailbox_wait_msg_init(&task_table[i].inbox);
 			memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
 			task_table[i].outbox[per_core(current_task)->id] = &per_core(current_task)->inbox; 
-			task_table[i].fpu_used = 0x00;
+			task_table[i].flags = per_core(current_task)->flags;
+			memcpy(&(task_table[i].fpu), &(per_core(current_task)->fpu), sizeof(union fpu_state));
+			task_table[i].start_heap = 0;
+			task_table[i].end_heap = 0;
 
 			ret = arch_fork(task_table+i);
 
@@ -352,8 +357,8 @@ static int load_task(load_args_t* largs)
 			if (!prog_header.virt_addr)
 				continue;
 
-			npages = (prog_header.mem_size / PAGE_SIZE);
-			if (prog_header.mem_size % PAGE_SIZE)
+			npages = (prog_header.mem_size >> PAGE_SHIFT);
+			if (prog_header.mem_size & (PAGE_SIZE-1))
 				npages++;
 
 			addr = get_pages(npages);
@@ -369,6 +374,10 @@ static int load_task(load_args_t* largs)
 			// clear pages
 			memset((void*) prog_header.virt_addr, 0, npages*PAGE_SIZE);
 
+			// set starting point of the heap
+			if (per_core(current_task)->start_heap < prog_header.virt_addr+prog_header.mem_size)
+				per_core(current_task)->start_heap = per_core(current_task)->end_heap = prog_header.virt_addr+prog_header.mem_size;
+
 			// load program
 			read_fs(node, (uint8_t*)prog_header.virt_addr, prog_header.file_size, prog_header.offset);
 
@@ -387,8 +396,8 @@ static int load_task(load_args_t* largs)
 
 		case ELF_PT_GNU_STACK: // Indicates stack executability
 			// create user-level stack
-			npages = DEFAULT_STACK_SIZE / PAGE_SIZE;
-			if (DEFAULT_STACK_SIZE % PAGE_SIZE)
+			npages = DEFAULT_STACK_SIZE >> PAGE_SHIFT;
+			if (DEFAULT_STACK_SIZE & (PAGE_SIZE-1))
 				npages++;
 
 			addr = get_pages(npages); 
@@ -475,6 +484,9 @@ static int load_task(load_args_t* largs)
 
 	kfree(largs, sizeof(load_args_t));
 
+	// clear fpu state
+	per_core(current_task)->flags &= ~(TASK_FPU_USED|TASK_FPU_INIT);
+
 	jump_to_user_code(header.entry, stack+offset);
 
 	return 0;
@@ -703,6 +715,12 @@ void scheduler(void)
 	if (per_core(current_task)->status == TASK_FINISHED)
 		per_core(current_task)->status = TASK_INVALID; 
 
+	/* if the task is using the FPU, we need to save the FPU context */
+	if (per_core(current_task)->flags & TASK_FPU_USED) {
+		save_fpu_state(&(per_core(current_task)->fpu));
+		per_core(current_task)->flags &= ~TASK_FPU_USED;
+	}
+
 	for(i=1, new_id=(per_core(current_task)->id + 1) % MAX_TASKS; 
 		i<MAX_TASKS; i++, new_id=(new_id+1) % MAX_TASKS) 
 	{
@@ -710,10 +728,8 @@ void scheduler(void)
 			if (per_core(current_task)->status == TASK_RUNNING)
 				per_core(current_task)->status = TASK_READY;
 			task_table[new_id].status = TASK_RUNNING;
-
-			if (per_core(current_task)->fpu_used)
-				save_fpu_state(&(per_core(current_task)->fpu));	
 			per_core(current_task) = task_table+new_id;
+
 			goto get_task_out;
 		}
 	}
diff --git a/kernel/tests.c b/kernel/tests.c
index fe26ffc6..08f3d45f 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -114,6 +114,7 @@ int test_init(void)
 	//create_kernel_task(NULL, consumer, NULL);
 	//create_user_task(NULL, "/bin/hello", argv);
 	create_user_task(NULL, "/bin/tests", argv);
+	//create_user_task(NULL, "/bin/jacobi", argv);
 
 	return 0;
 }
diff --git a/newlib/src/libgloss/metalsvm/sbrk.c b/newlib/src/libgloss/metalsvm/sbrk.c
index 356c5ee7..78002469 100644
--- a/newlib/src/libgloss/metalsvm/sbrk.c
+++ b/newlib/src/libgloss/metalsvm/sbrk.c
@@ -22,23 +22,20 @@
 #include <errno.h>
 #undef errno
 extern int errno;
+#include "warning.h"
+#include "syscall.h"
 
-#ifndef NULL
-#define NULL	((void*) 0)
-#endif
+void*
+_DEFUN (sbrk, (incr),
+        int incr)
+{
+	int ret;
 
-void *
-sbrk (incr)
-     int incr;
-{ 
-	extern char _end; // set by linker
-	static char *heap_end = NULL;
-	char *prev_heap_end;
+	ret = SYSCALL1(__NR_sbrk, incr);
+	if (ret < 0x1000) {
+		errno = -ret;
+		ret = -1;
+	}
 
-	if (!heap_end)
-		heap_end = &_end;
-	prev_heap_end = heap_end;
-	heap_end += incr;
-
-	return (void *) prev_heap_end;
+	return (void*) ret;
 } 

From e2ae62af2b33c8e8416e56223738d6e891ed5f93 Mon Sep 17 00:00:00 2001
From: Marian Ohligs <marian.ohligs@rwth-aachen.de>
Date: Wed, 20 Apr 2011 23:14:15 +0200
Subject: [PATCH 08/36] add initrd_write

---
 .gitignore  |  1 +
 fs/initrd.c | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/.gitignore b/.gitignore
index 391bf0f5..6842d839 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,7 @@
 *.img
 *.a
 *.log
+*.swp
 *.DS_Store
 tags
 include/metalsvm/config.h
diff --git a/fs/initrd.c b/fs/initrd.c
index c3094b44..fd62b3e6 100644
--- a/fs/initrd.c
+++ b/fs/initrd.c
@@ -90,6 +90,20 @@ static ssize_t initrd_read(vfs_node_t* node, uint8_t* buffer, size_t size, off_t
 	return size;
 }
 
+ 
+static ssize_t initrd_write(vfs_node_t* node, uint8_t* buffer, size_t size, off_t offset)
+{
+	block_list_t* blist = &node->block_list;
+	char* data = (char*) blist->data[0];
+
+	//////////ACHTUNG keine Überprüfung der Blcoklängen etc.
+	memcpy(data, buffer, size);
+
+	return size;
+}
+
+
+
 static dirent_t* initrd_readdir(vfs_node_t* node, uint32_t index)
 {
 	uint32_t i, j, count;
@@ -302,6 +316,7 @@ int initrd_init(void)
 			memset(new_node, 0x00, sizeof(vfs_node_t));
 			new_node->type = FS_FILE;
 			new_node->read = initrd_read;
+			new_node->write = initrd_write;
 			new_node->block_size = file_desc->length;
 			new_node->block_list.data[0] = ((char*) header) + file_desc->offset;
 			spinlock_init(&new_node->lock);

From 40ce1fe457591a38b1a1db9f447af4fdb227e1cf Mon Sep 17 00:00:00 2001
From: Marian Ohligs <marian.ohligs@rwth-aachen.de>
Date: Thu, 21 Apr 2011 00:25:05 +0200
Subject: [PATCH 09/36] manual merge old readwrite branch

---
 .gitignore                          |   1 +
 drivers/char/null.c                 |   2 +
 drivers/stderr/stderr.c             | 106 ++++++++++++++++++++++++++++
 drivers/stdin/stdin.c               | 106 ++++++++++++++++++++++++++++
 drivers/stdout/stdout.c             | 106 ++++++++++++++++++++++++++++
 fs/fs.c                             |   1 +
 fs/initrd.c                         |   9 +++
 include/metalsvm/fs.h               |   2 +-
 include/metalsvm/fs_types.h         |  42 +++++++++++
 include/metalsvm/tasks_types.h      |   3 +
 kernel/syscall.c                    |  95 +++++++++++++++++++------
 kernel/tasks.c                      |   2 +-
 kernel/tests.c                      |   4 +-
 newlib/examples/hello.c             |  20 +++---
 newlib/examples/test                |   1 +
 newlib/src/libgloss/metalsvm/open.c |   2 +-
 tools/Makefile                      |   2 +-
 17 files changed, 465 insertions(+), 39 deletions(-)
 create mode 100644 drivers/stderr/stderr.c
 create mode 100644 drivers/stdin/stdin.c
 create mode 100644 drivers/stdout/stdout.c
 create mode 100644 include/metalsvm/fs_types.h
 create mode 100644 newlib/examples/test

diff --git a/.gitignore b/.gitignore
index 6842d839..7c04e4b0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,7 @@ include/metalsvm/config.h
 Makefile
 tools/make_initrd
 newlib/examples/hello
+newlib/examples/jacobi
 newlib/examples/echo
 newlib/examples/tests
 newlib/tmp/*
diff --git a/drivers/char/null.c b/drivers/char/null.c
index 4fa0d840..b796899f 100644
--- a/drivers/char/null.c
+++ b/drivers/char/null.c
@@ -22,6 +22,8 @@
 #include <metalsvm/stdio.h>
 #include <metalsvm/errno.h>
 #include <metalsvm/fs.h>
+#include <metalsvm/spinlock.h>
+
 
 /* Implementation of a simple null device */
 
diff --git a/drivers/stderr/stderr.c b/drivers/stderr/stderr.c
new file mode 100644
index 00000000..effc5942
--- /dev/null
+++ b/drivers/stderr/stderr.c
@@ -0,0 +1,106 @@
+/* 
+ * Copyright 2010 Stefan Lankes, Chair for Operating Systems,
+ *                               RWTH Aachen University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This file is part of MetalSVM. 
+ */
+
+#include <metalsvm/stdlib.h>
+#include <metalsvm/string.h>
+#include <metalsvm/stdio.h>
+#include <metalsvm/errno.h>
+#include <metalsvm/fs.h>
+#include <metalsvm/spinlock.h>
+
+/* Implementation of a simple stderr device */
+
+static ssize_t stderr_read(vfs_node_t* node, uint8_t* buffer, size_t size, off_t offset)
+{
+	return size;
+}
+
+static ssize_t stderr_write(vfs_node_t* node, uint8_t* buffer, size_t size,  off_t offset)
+{
+	kprintf("\nFehler: %s", buffer);
+	return size;
+}
+
+static int stderr_open(vfs_node_t* node)
+{
+	return 0;
+}
+
+static int stderr_close(vfs_node_t* node)
+{
+	return 0;
+}
+
+int stderr_init(vfs_node_t* node, const char* name)
+{
+	uint32_t i, j;
+	vfs_node_t* new_node;
+	dir_block_t* blockdir;
+	dirent_t* dirent;
+	block_list_t* blist;
+
+	if (BUILTIN_EXPECT(!node || !name, 0))
+		return -EINVAL;
+
+	if (BUILTIN_EXPECT(node->type != FS_DIRECTORY, 0))
+		return -EINVAL;
+
+	if (finddir_fs(node, name))
+		return -EINVAL;
+
+	new_node = kmalloc(sizeof(vfs_node_t));
+	if (BUILTIN_EXPECT(!new_node, 0))
+		return -ENOMEM;
+
+	memset(new_node, 0x00, sizeof(vfs_node_t));
+        new_node->type = FS_CHARDEVICE;
+        new_node->open = &stderr_open;
+        new_node->close = &stderr_close;
+        new_node->read = &stderr_read;
+	new_node->write = &stderr_write;
+        spinlock_init(&new_node->lock);
+
+	blist= &node->block_list;
+	do {
+		for(i=0; i<MAX_DATABLOCKS; i++) {
+			if (blist->data[i]) {
+				blockdir = (dir_block_t*) blist->data[i];
+				for(j=0; j<MAX_DIRENTRIES; j++) {
+					dirent = &blockdir->entries[j];
+					if (!dirent->vfs_node) {
+						dirent->vfs_node = new_node;
+						strncpy(dirent->name, name, MAX_FNAME);
+						return 0;
+					}
+				}
+			}
+		}
+
+		if (!blist->next) {
+			blist->next = (block_list_t*) kmalloc(sizeof(block_list_t));
+			if (blist->next)
+				memset(blist->next, 0x00, sizeof(block_list_t));
+		}
+
+	} while(blist);
+
+	kfree(new_node, sizeof(vfs_node_t));
+
+	return -ENOMEM;
+}
diff --git a/drivers/stdin/stdin.c b/drivers/stdin/stdin.c
new file mode 100644
index 00000000..6b848082
--- /dev/null
+++ b/drivers/stdin/stdin.c
@@ -0,0 +1,106 @@
+/* 
+ * Copyright 2010 Stefan Lankes, Chair for Operating Systems,
+ *                               RWTH Aachen University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This file is part of MetalSVM. 
+ */
+
+#include <metalsvm/stdlib.h>
+#include <metalsvm/string.h>
+#include <metalsvm/stdio.h>
+#include <metalsvm/errno.h>
+#include <metalsvm/fs.h>
+#include <metalsvm/spinlock.h>
+
+/* Implementation of a simple stdin device */
+
+static ssize_t stdin_read(vfs_node_t* node, uint8_t* buffer, size_t size, off_t offset)
+{
+	kprintf("Keine Eingabe implementiert");
+	return size;
+}
+
+static ssize_t stdin_write(vfs_node_t* node, uint8_t* buffer, size_t size,  off_t offset)
+{
+	return size;
+}
+
+static int stdin_open(vfs_node_t* node)
+{
+	return 0;
+}
+
+static int stdin_close(vfs_node_t* node)
+{
+	return 0;
+}
+
+int stdin_init(vfs_node_t* node, const char* name)
+{
+	uint32_t i, j;
+	vfs_node_t* new_node;
+	dir_block_t* blockdir;
+	dirent_t* dirent;
+	block_list_t* blist;
+
+	if (BUILTIN_EXPECT(!node || !name, 0))
+		return -EINVAL;
+
+	if (BUILTIN_EXPECT(node->type != FS_DIRECTORY, 0))
+		return -EINVAL;
+
+	if (finddir_fs(node, name))
+		return -EINVAL;
+
+	new_node = kmalloc(sizeof(vfs_node_t));
+	if (BUILTIN_EXPECT(!new_node, 0))
+		return -ENOMEM;
+
+	memset(new_node, 0x00, sizeof(vfs_node_t));
+        new_node->type = FS_CHARDEVICE;
+        new_node->open = &stdin_open;
+        new_node->close = &stdin_close;
+        new_node->read = &stdin_read;
+	new_node->write = &stdin_write;
+        spinlock_init(&new_node->lock);
+
+	blist= &node->block_list;
+	do {
+		for(i=0; i<MAX_DATABLOCKS; i++) {
+			if (blist->data[i]) {
+				blockdir = (dir_block_t*) blist->data[i];
+				for(j=0; j<MAX_DIRENTRIES; j++) {
+					dirent = &blockdir->entries[j];
+					if (!dirent->vfs_node) {
+						dirent->vfs_node = new_node;
+						strncpy(dirent->name, name, MAX_FNAME);
+						return 0;
+					}
+				}
+			}
+		}
+
+		if (!blist->next) {
+			blist->next = (block_list_t*) kmalloc(sizeof(block_list_t));
+			if (blist->next)
+				memset(blist->next, 0x00, sizeof(block_list_t));
+		}
+
+	} while(blist);
+
+	kfree(new_node, sizeof(vfs_node_t));
+
+	return -ENOMEM;
+}
diff --git a/drivers/stdout/stdout.c b/drivers/stdout/stdout.c
new file mode 100644
index 00000000..268a4009
--- /dev/null
+++ b/drivers/stdout/stdout.c
@@ -0,0 +1,106 @@
+/* 
+ * Copyright 2010 Stefan Lankes, Chair for Operating Systems,
+ *                               RWTH Aachen University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This file is part of MetalSVM. 
+ */
+
+#include <metalsvm/stdlib.h>
+#include <metalsvm/string.h>
+#include <metalsvm/stdio.h>
+#include <metalsvm/errno.h>
+#include <metalsvm/fs.h>
+#include <metalsvm/spinlock.h>
+
+/* Implementation of a simple stdout device */
+
+static ssize_t stdout_read(vfs_node_t* node, uint8_t* buffer, size_t size, off_t offset)
+{
+	return size;
+}
+
+static ssize_t stdout_write(vfs_node_t* node, uint8_t* buffer, size_t size,  off_t offset)
+{
+	kprintf("%s", buffer);
+	return size;
+}
+
+static int stdout_open(vfs_node_t* node)
+{
+	return 0;
+}
+
+static int stdout_close(vfs_node_t* node)
+{
+	return 0;
+}
+
+int stdout_init(vfs_node_t* node, const char* name)
+{
+	uint32_t i, j;
+	vfs_node_t* new_node;
+	dir_block_t* blockdir;
+	dirent_t* dirent;
+	block_list_t* blist;
+
+	if (BUILTIN_EXPECT(!node || !name, 0))
+		return -EINVAL;
+
+	if (BUILTIN_EXPECT(node->type != FS_DIRECTORY, 0))
+		return -EINVAL;
+
+	if (finddir_fs(node, name))
+		return -EINVAL;
+
+	new_node = kmalloc(sizeof(vfs_node_t));
+	if (BUILTIN_EXPECT(!new_node, 0))
+		return -ENOMEM;
+
+	memset(new_node, 0x00, sizeof(vfs_node_t));
+        new_node->type = FS_CHARDEVICE;
+        new_node->open = &stdout_open;
+        new_node->close = &stdout_close;
+        new_node->read = &stdout_read;
+	new_node->write = &stdout_write;
+        spinlock_init(&new_node->lock);
+
+	blist= &node->block_list;
+	do {
+		for(i=0; i<MAX_DATABLOCKS; i++) {
+			if (blist->data[i]) {
+				blockdir = (dir_block_t*) blist->data[i];
+				for(j=0; j<MAX_DIRENTRIES; j++) {
+					dirent = &blockdir->entries[j];
+					if (!dirent->vfs_node) {
+						dirent->vfs_node = new_node;
+						strncpy(dirent->name, name, MAX_FNAME);
+						return 0;
+					}
+				}
+			}
+		}
+
+		if (!blist->next) {
+			blist->next = (block_list_t*) kmalloc(sizeof(block_list_t));
+			if (blist->next)
+				memset(blist->next, 0x00, sizeof(block_list_t));
+		}
+
+	} while(blist);
+
+	kfree(new_node, sizeof(vfs_node_t));
+
+	return -ENOMEM;
+}
diff --git a/fs/fs.c b/fs/fs.c
index 1be0f206..a2a28e6e 100644
--- a/fs/fs.c
+++ b/fs/fs.c
@@ -21,6 +21,7 @@
 #include <metalsvm/string.h>
 #include <metalsvm/fs.h>
 #include <metalsvm/errno.h>
+#include <metalsvm/spinlock.h>
 
 vfs_node_t* fs_root = NULL;		// The root of the filesystem.
 
diff --git a/fs/initrd.c b/fs/initrd.c
index fd62b3e6..9f3f17da 100644
--- a/fs/initrd.c
+++ b/fs/initrd.c
@@ -24,6 +24,8 @@
 #include <metalsvm/errno.h>
 #include <asm/multiboot.h>
 #include <asm/processor.h>
+#include <metalsvm/spinlock.h>
+
 
 static vfs_node_t initrd_root;
 
@@ -268,6 +270,13 @@ int initrd_init(void)
 	tmp = mkdir_fs(fs_root, "dev");
 	/* create the character device "null" */
 	null_init(tmp, "null");
+	/* create the standart input device "stdin" */
+	stdin_init(tmp, "stdin");
+	/* create the standart output device "stdout" */
+	stdout_init(tmp, "stdout");
+	/* create the standart error-output device "stderr" */
+	stderr_init(tmp, "stderr");
+
 
 	/* For every module.. */
 #ifdef CONFIG_MULTIBOOT
diff --git a/include/metalsvm/fs.h b/include/metalsvm/fs.h
index 56eba752..b9dbfb2e 100644
--- a/include/metalsvm/fs.h
+++ b/include/metalsvm/fs.h
@@ -27,7 +27,7 @@
 #define __FS_H__
 
 #include <metalsvm/stddef.h>
-#include <metalsvm/spinlock.h>
+#include <metalsvm/spinlock_types.h>
 
 #define FS_FILE        0x01
 #define FS_DIRECTORY   0x02
diff --git a/include/metalsvm/fs_types.h b/include/metalsvm/fs_types.h
new file mode 100644
index 00000000..9b9df7e3
--- /dev/null
+++ b/include/metalsvm/fs_types.h
@@ -0,0 +1,42 @@
+/* 
++ * Copyright 2011 Stefan Lankes, Chair for Operating Systems,
++ *                               RWTH Aachen University
++ *
++ * Licensed under the Apache License, Version 2.0 (the "License");
++ * you may not use this file except in compliance with the License.
++ * You may obtain a copy of the License at
++ *
++ *     http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing, software
++ * distributed under the License is distributed on an "AS IS" BASIS,
++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
++ * See the License for the specific language governing permissions and
++ * limitations under the License.
++ *
++ * This file is part of MetalSVM.
+ */
+
+#ifndef __FS_TYPES_H__
+#define __FS_TYPES_H__
+
+#include <metalsvm/stddef.h>
+#include <metalsvm/fs.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct fildes {
+        vfs_node_t* 	node;		/*  */
+        off_t 		offset;		/*  */
+} fildes_t;
+
+#define MAX_FILDES 10
+#define FS_INIT { [0 ... MAX_FILDES-1] = {NULL, 0} }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h
index 58f65cf2..2ab22f8c 100644
--- a/include/metalsvm/tasks_types.h
+++ b/include/metalsvm/tasks_types.h
@@ -34,6 +34,7 @@
 #include <metalsvm/spinlock_types.h>
 #include <metalsvm/mailbox_types.h>
 #include <asm/tasks_types.h>
+#include <metalsvm/fs_types.h>
 #include <asm/atomic.h>
 
 #ifdef __cplusplus
@@ -70,6 +71,8 @@ typedef struct task {
 	spinlock_t		vma_lock;
 	/// List of VMAs
 	vma_t*			vma_list;
+	/// Filedescriptor table
+	fildes_t		fildes_table[MAX_FILDES];
 	/// Additional status flags. For instance, to signalize the using of the FPU
 	uint32_t		flags;
 	/// Start address of the heap
diff --git a/kernel/syscall.c b/kernel/syscall.c
index 95a39fc6..809ea4ec 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -24,19 +24,56 @@
 #include <metalsvm/errno.h>
 #include <metalsvm/spinlock.h>
 
-static int sys_write(int fildes, const char *buf, size_t len)
+static int sys_read(int fd, const char *buf, size_t len)
 {
-	int i;
-
-	if (BUILTIN_EXPECT(!buf, 0))
-		return -1;
-
-	for (i = 0; i<len; i++, buf++) {
-		kputchar(*buf);
-	}
-        
-	return len;
+	unsigned int readbytes;
+	readbytes = read_fs(
+			per_core(current_task)->fildes_table[fd].node, 
+			(uint8_t*)buf, len, 
+			per_core(current_task)->fildes_table[fd].offset);
+	per_core(current_task)->fildes_table[fd].offset += readbytes;
+	/*kprintf("fd:%i, Dateilaenge:%i, Dateiinhalt: %s \n", fd, len, buf);*/
+	/* Beware: still reading above file limit! */
+	return readbytes; 
 }
+ 
+static int sys_write(int fd, const char *buf, size_t len)
+{
+	unsigned int wrotebytes;
+	/* per_core(current_task)->fildes_table[fd].node->write = 1; */
+	wrotebytes = write_fs(
+			per_core(current_task)->fildes_table[fd].node, 
+			(uint8_t*)buf, len, 0);
+	/* per_core(current_task)->fildes_table[fd].offset); */
+	/* kprintf("ins Dateis. geschr. -- fd:%i, Dateilaenge:%i, Dateiinhalt: %s \n", fd, len, buf); */
+	per_core(current_task)->fildes_table[fd].offset += wrotebytes;
+
+	return wrotebytes;
+}
+
+static int sys_open(const char* file, int flags, int mode)
+{
+	int fd;
+	for (fd = 0; fd < MAX_FILDES; fd++) {
+		if (per_core(current_task)->fildes_table[fd].node == NULL) {
+			per_core(current_task)->fildes_table[fd].node = findnode_fs((char*) file);
+			return fd;
+		} 
+ 	}
+	if (fd >= MAX_FILDES) {
+		kprintf("Unable to create filedescriptor");
+		return -EINVAL;
+	}   
+}
+
+static int sys_close(int fd)
+{
+	close_fs(per_core(current_task)->fildes_table[fd].node);
+	per_core(current_task)->fildes_table[fd].node = NULL;
+	per_core(current_task)->fildes_table[fd].offset = 0;
+	return 0;
+
+ }
 
 static int sys_sbrk(int incr)
 {
@@ -77,20 +114,34 @@ int syscall_handler(uint32_t sys_nr, ...)
 		sys_exit(va_arg(vl, uint32_t));
 		ret = 0;
 		break;
-	case __NR_write: {
-			int fildes = va_arg(vl, int);
+	case __NR_read: {
+			int fd = va_arg(vl, int);
 			const char* buf = va_arg(vl, const char*);
-			size_t len = va_arg(vl, size_t);
-		
-			ret = sys_write(fildes, buf, len);
+ 			size_t len = va_arg(vl, size_t);
+			kprintf("%s", buf);
+			//ret = sys_read(fd, buf, len);
 			break;
 		}
-	case __NR_open:
-		ret = 1;
-		break;
-	case __NR_close:
-		ret = 0;
-		break;
+	case __NR_write: {
+			int fd = va_arg(vl, int);
+			const char* buf = va_arg(vl, const char*);
+			size_t len = va_arg(vl, size_t);
+			kprintf("%s", buf);
+			//ret = sys_write(fd, buf, len);
+			break;
+		}
+	case __NR_open: {
+			const char* file = va_arg(vl, const char*);
+			int flags = va_arg(vl, int);
+			int mode = va_arg(vl, int);
+			//ret = sys_open(file, flags, mode);
+			break;
+		}
+	case __NR_close: {
+			int fd = va_arg(vl, int);
+			//ret = sys_close(fd);
+ 			break;
+ 		}
 	case __NR_sbrk: {
 			int incr = va_arg(vl, int);
 
diff --git a/kernel/tasks.c b/kernel/tasks.c
index ae699687..8f29b5a0 100644
--- a/kernel/tasks.c
+++ b/kernel/tasks.c
@@ -48,7 +48,7 @@ DEFINE_PER_CORE(task_t*, current_task, NULL);
  * A task's id will be its position in this array.
  */
 static task_t task_table[MAX_TASKS] = {[0 ... MAX_TASKS-1] = {0, TASK_INVALID, ATOMIC_INIT(0), \
-			 SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0}};
+			 SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, FS_INIT, 0, 0, 0}};
 static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT;
 
 /** @brief helper function for the assembly code to determine the current task
diff --git a/kernel/tests.c b/kernel/tests.c
index 08f3d45f..c2d9a47b 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -108,12 +108,12 @@ int test_init(void)
 	sem_init(&consuming, 0);
 	mailbox_int32_init(&mbox);
 
-	create_kernel_task(NULL, foo, "Hello from foo1\n");
+	//create_kernel_task(NULL, foo, "Hello from foo1\n");
 	//create_kernel_task(NULL, join_test, NULL);
 	//create_kernel_task(NULL, producer, NULL);
 	//create_kernel_task(NULL, consumer, NULL);
 	//create_user_task(NULL, "/bin/hello", argv);
-	create_user_task(NULL, "/bin/tests", argv);
+	create_user_task(NULL, "/bin/hello", argv);
 	//create_user_task(NULL, "/bin/jacobi", argv);
 
 	return 0;
diff --git a/newlib/examples/hello.c b/newlib/examples/hello.c
index bffa4136..32382d08 100644
--- a/newlib/examples/hello.c
+++ b/newlib/examples/hello.c
@@ -27,17 +27,15 @@ extern int errno;
 
 int main(int argc, char** argv)
 {
-	int i;
-	const char str[] = "Hello World!!!\n";
-
-	for(i=0; environ[i]; i++) 
-		printf("environ[%d] = %s\n", i, environ[i]);
-	for(i=0; i<argc; i++)
-		printf("argv[%d] = %s\n", i, argv[i]);
-
-	write(1, str, strlen(str));
-
-	printf("Hello from printf!!!\n");
+	//int i;
+	//char* str = (char *)malloc(40 * sizeof(char));
+	//FILE* testfile;
+ 	printf("hallo");
+	//testfile = fopen("/bin/test", "w+r");
+	//setbuf(testfile, NULL);
+	//fread(str, 1, 10, testfile);
+	//fwrite("wtest1\n", 1, 7, testfile);
+	//fread(str, 1, 10, testfile);
 
 	return errno;
 }
diff --git a/newlib/examples/test b/newlib/examples/test
new file mode 100644
index 00000000..51d96d29
--- /dev/null
+++ b/newlib/examples/test
@@ -0,0 +1 @@
+HalloXA!!
diff --git a/newlib/src/libgloss/metalsvm/open.c b/newlib/src/libgloss/metalsvm/open.c
index c7f40c14..9127ab9f 100644
--- a/newlib/src/libgloss/metalsvm/open.c
+++ b/newlib/src/libgloss/metalsvm/open.c
@@ -34,7 +34,7 @@ _DEFUN (_open, (file, flags, mode),
 {
 	int ret;
 
-        ret = SYSCALL2(__NR_open, flags, mode);
+        ret = SYSCALL3(__NR_open, file, flags, mode);
 	if (ret < 0) {
 		errno = -ret;
 		ret = -1;
diff --git a/tools/Makefile b/tools/Makefile
index fb77c026..68348fcb 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -7,7 +7,7 @@ LDFLGAS =
 DEFINES=
 NASM = nasm
 NASMFLAGS = -fbin
-EXECFILES = $(shell find ../newlib/examples -perm -u+r+x -type f) 
+EXECFILES = $(shell find ../newlib/examples -perm -u+r+x -type f) ../newlib/examples/test 
 
 # other implicit rules
 %.o : %.c

From e765e16eb17d19243ba6ebd414042cc425beec1d Mon Sep 17 00:00:00 2001
From: Marian Ohligs <marian.ohligs@rwth-aachen.de>
Date: Thu, 21 Apr 2011 01:05:47 +0200
Subject: [PATCH 10/36] fixed a bug in create_user_task

---
 kernel/tasks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/tasks.c b/kernel/tasks.c
index 8f29b5a0..e9341558 100644
--- a/kernel/tasks.c
+++ b/kernel/tasks.c
@@ -533,7 +533,7 @@ int create_user_task(tid_t* id, const char* fname, char** argv)
 		}
 	}
 
-	if (argc <= 0)
+	if (argc < 0)
 		return -EINVAL;
 	if (buffer_size >= MAX_ARGS)
 		return -EINVAL;

From 373d663fd06006052e34b4c7e5fd7281567ac3b2 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 19 Apr 2011 07:17:07 -0700
Subject: [PATCH 11/36] add iRCCE code

---
 arch/x86/include/asm/iRCCE.h     | 154 ++++++++++++
 arch/x86/include/asm/iRCCE_lib.h |  39 +++
 arch/x86/scc/Makefile            |   2 +-
 arch/x86/scc/iRCCE_admin.c       |  67 ++++++
 arch/x86/scc/iRCCE_get.c         |  85 +++++++
 arch/x86/scc/iRCCE_irecv.c       | 393 +++++++++++++++++++++++++++++++
 arch/x86/scc/iRCCE_isend.c       | 355 ++++++++++++++++++++++++++++
 arch/x86/scc/iRCCE_put.c         |  87 +++++++
 arch/x86/scc/iRCCE_recv.c        | 190 +++++++++++++++
 arch/x86/scc/iRCCE_send.c        | 165 +++++++++++++
 arch/x86/scc/iRCCE_synch.c       | 127 ++++++++++
 11 files changed, 1663 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/include/asm/iRCCE.h
 create mode 100644 arch/x86/include/asm/iRCCE_lib.h
 create mode 100644 arch/x86/scc/iRCCE_admin.c
 create mode 100644 arch/x86/scc/iRCCE_get.c
 create mode 100644 arch/x86/scc/iRCCE_irecv.c
 create mode 100644 arch/x86/scc/iRCCE_isend.c
 create mode 100644 arch/x86/scc/iRCCE_put.c
 create mode 100644 arch/x86/scc/iRCCE_recv.c
 create mode 100644 arch/x86/scc/iRCCE_send.c
 create mode 100644 arch/x86/scc/iRCCE_synch.c

diff --git a/arch/x86/include/asm/iRCCE.h b/arch/x86/include/asm/iRCCE.h
new file mode 100644
index 00000000..8b878bfd
--- /dev/null
+++ b/arch/x86/include/asm/iRCCE.h
@@ -0,0 +1,154 @@
+// 
+// Copyright 2010 Intel Corporation
+// 
+//    Licensed under the Apache License, Version 2.0 (the "License");
+//    you may not use this file except in compliance with the License.
+//    You may obtain a copy of the License at
+// 
+//        http://www.apache.org/licenses/LICENSE-2.0
+// 
+//    Unless required by applicable law or agreed to in writing, software
+//    distributed under the License is distributed on an "AS IS" BASIS,
+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//    See the License for the specific language governing permissions and
+//    limitations under the License.
+// 
+//    [2010-10-25] added support for non-blocking send/recv operations
+//                 - iRCCE_isend(), ..._test(), ..._wait(), ..._push()
+//                 - iRCCE_irecv(), ..._test(), ..._wait(), ..._push()
+//                 by Carsten Clauss, Chair for Operating Systems,
+//                                    RWTH Aachen University
+//
+//    [2010-11-12] extracted non-blocking code into separate library
+//                 by Carsten Scholtes, University of Bayreuth
+//
+//    [2010-12-09] added functions for a convenient handling of multiple
+//                 pending non-blocking requests
+//                 by Jacek Galowicz, Chair for Operating Systems
+//                                    RWTH Aachen University
+//
+#ifndef IRCCE_H
+#define IRCCE_H
+
+#include <asm/RCCE.h>
+
+#define iRCCE_SUCCESS  RCCE_SUCCESS
+#define iRCCE_PENDING      -1
+#define iRCCE_RESERVED     -2
+#define iRCCE_NOT_ENQUEUED -3
+
+typedef struct _iRCCE_SEND_REQUEST {
+  char *privbuf;    // source buffer in local private memory (send buffer)
+  t_vcharp combuf;  // intermediate buffer in MPB
+  size_t chunk;     // size of MPB available for this message (bytes)
+  RCCE_FLAG *ready; // flag indicating whether receiver is ready
+  RCCE_FLAG *sent;  // flag indicating whether message has been sent by source
+  size_t size;      // size of message (bytes)
+  int dest;         // UE that will receive the message
+
+  size_t wsize;     // offset within send buffer when putting in "chunk" bytes
+  size_t remainder;  // bytes remaining to be sent
+  size_t nbytes;    // number of bytes to be sent in single RCCE_put call
+  char *bufptr;     // running pointer inside privbuf for current location
+
+  int label;        // jump/goto label for the reentrance of the respective poll function
+  int finished;     // flag that indicates whether the request has already been finished
+
+  struct _iRCCE_SEND_REQUEST *next;
+} iRCCE_SEND_REQUEST;
+
+
+typedef struct _iRCCE_RECV_REQUEST {
+  char *privbuf;    // source buffer in local private memory (send buffer)
+  t_vcharp combuf;  // intermediate buffer in MPB
+  size_t chunk;     // size of MPB available for this message (bytes)
+  RCCE_FLAG *ready; // flag indicating whether receiver is ready
+  RCCE_FLAG *sent;  // flag indicating whether message has been sent by source
+  size_t size;      // size of message (bytes)
+  int source;       // UE that will send the message
+
+  size_t wsize;     // offset within send buffer when putting in "chunk" bytes
+  size_t remainder; // bytes remaining to be sent
+  size_t nbytes;    // number of bytes to be sent in single RCCE_put call
+  char *bufptr;     // running pointer inside privbuf for current location
+
+  int label;        // jump/goto label for the reentrance of the respective poll function
+  int finished;     // flag that indicates whether the request has already been finished
+  int started;      // flag that indicates whether message parts have already been received
+
+  struct _iRCCE_RECV_REQUEST *next;
+} iRCCE_RECV_REQUEST;
+
+#define iRCCE_WAIT_LIST_RECV_TYPE 0
+#define iRCCE_WAIT_LIST_SEND_TYPE 1
+
+typedef struct _iRCCE_WAIT_LISTELEM {
+	int type;
+	struct _iRCCE_WAIT_LISTELEM * next;
+	void * req;
+} iRCCE_WAIT_LISTELEM;
+
+typedef struct _iRCCE_WAIT_LIST {
+	iRCCE_WAIT_LISTELEM * first;
+	iRCCE_WAIT_LISTELEM * last;
+} iRCCE_WAIT_LIST;
+
+
+///////////////////////////////////////////////////////////////
+//
+//                       THE iRCCE API:
+//
+//  Initialize function:
+int   iRCCE_init(void);
+//
+//  Non-blocking send/recv functions:
+int   iRCCE_isend(char *, size_t, int, iRCCE_SEND_REQUEST *);
+int   iRCCE_isend_test(iRCCE_SEND_REQUEST *, int *);
+int   iRCCE_isend_wait(iRCCE_SEND_REQUEST *);
+int   iRCCE_isend_push(void);
+int   iRCCE_irecv(char *, size_t, int, iRCCE_RECV_REQUEST *);
+int   iRCCE_irecv_test(iRCCE_RECV_REQUEST *, int *);
+int   iRCCE_irecv_wait(iRCCE_RECV_REQUEST *);
+int   iRCCE_irecv_push(void);
+//
+//  Blocking but pipelined send/recv functions:
+int   iRCCE_send(char *, size_t, int);
+int   iRCCE_recv(char *, size_t, int);
+//
+//  SCC-customized put/get and memcpy functions:
+int   iRCCE_put(t_vcharp, t_vcharp, int, int);
+int   iRCCE_get(t_vcharp, t_vcharp, int, int);
+void* iRCCE_memcpy_put(void*, const void*, size_t);
+void* iRCCE_memcpy_get(void*, const void*, size_t);
+//
+//  Wait/test-all/any functions:
+void  iRCCE_init_wait_list(iRCCE_WAIT_LIST*);
+void  iRCCE_add_to_wait_list(iRCCE_WAIT_LIST*, iRCCE_SEND_REQUEST *, iRCCE_RECV_REQUEST *);
+int   iRCCE_test_all(iRCCE_WAIT_LIST*, int *);
+int   iRCCE_wait_all(iRCCE_WAIT_LIST*);
+int   iRCCE_test_any(iRCCE_WAIT_LIST*, iRCCE_SEND_REQUEST **, iRCCE_RECV_REQUEST **);
+int   iRCCE_wait_any(iRCCE_WAIT_LIST*, iRCCE_SEND_REQUEST **, iRCCE_RECV_REQUEST **);
+//
+//  Cancel functions for yet not started non-blocking requests:
+int   iRCCE_isend_cancel(iRCCE_SEND_REQUEST *, int *);
+int   iRCCE_irecv_cancel(iRCCE_RECV_REQUEST *, int *);
+//
+///////////////////////////////////////////////////////////////
+//
+//      Just for for convenience:
+#if 1
+#define RCCE_isend        iRCCE_isend
+#define RCCE_isend_test   iRCCE_isend_test
+#define RCCE_isend_wait   iRCCE_isend_wait
+#define RCCE_isend_push   iRCCE_isend_push
+#define RCCE_irecv        iRCCE_irecv
+#define RCCE_irecv_test   iRCCE_irecv_test
+#define RCCE_irecv_wait   iRCCE_irecv_wait
+#define RCCE_irecv_push   iRCCE_irecv_push
+#define RCCE_SEND_REQUEST iRCCE_SEND_REQUEST
+#define RCCE_RECV_REQUEST iRCCE_RECV_REQUEST
+#endif
+///////////////////////////////////////////////////////////////
+
+#endif
+
diff --git a/arch/x86/include/asm/iRCCE_lib.h b/arch/x86/include/asm/iRCCE_lib.h
new file mode 100644
index 00000000..0d8b4e16
--- /dev/null
+++ b/arch/x86/include/asm/iRCCE_lib.h
@@ -0,0 +1,39 @@
+// 
+// Copyright 2010 Intel Corporation
+// 
+//    Licensed under the Apache License, Version 2.0 (the "License");
+//    you may not use this file except in compliance with the License.
+//    You may obtain a copy of the License at
+// 
+//        http://www.apache.org/licenses/LICENSE-2.0
+// 
+//    Unless required by applicable law or agreed to in writing, software
+//    distributed under the License is distributed on an "AS IS" BASIS,
+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//    See the License for the specific language governing permissions and
+//    limitations under the License.
+//
+//    [2010-10-25] added support for non-blocking send/recv operations
+//                 - iRCCE_isend(), ..._test(), ..._wait(), ..._push()
+//                 - iRCCE_irecv(), ..._test(), ..._wait(), ..._push()
+//                 by Carsten Clauss, Chair for Operating Systems,
+//                                    RWTH Aachen University
+//
+//    [2010-11-12] extracted non-blocking code into separate library
+//                 by Carsten Scholtes
+// 
+#ifndef IRCCE_LIB_H
+#define IRCCE_LIB_H
+#include <asm/iRCCE.h>
+#include <asm/RCCE_lib.h>
+
+extern iRCCE_SEND_REQUEST* iRCCE_isend_queue;
+extern iRCCE_RECV_REQUEST* iRCCE_irecv_queue[RCCE_MAXNP];
+#ifdef _OPENMP
+#pragma omp threadprivate (iRCCE_isend_queue, iRCCE_irecv_queue)
+#endif
+
+int iRCCE_test_flag(RCCE_FLAG, RCCE_FLAG_STATUS, int *);
+
+#endif
+
diff --git a/arch/x86/scc/Makefile b/arch/x86/scc/Makefile
index d4866f2b..f32bc11f 100644
--- a/arch/x86/scc/Makefile
+++ b/arch/x86/scc/Makefile
@@ -1,4 +1,4 @@
-C_source := scc_init.c SCC_API.c RCCE_malloc.c RCCE_shmalloc.c RCCE_debug.c RCCE_qsort.c RCCE_DCMflush.c RCCE_send.c RCCE_recv.c RCCE_flags.c RCCE_comm.c RCCE_put.c RCCE_get.c RCCE_synch.c RCCE_bcast.c RCCE_admin.c # RCCE_power_management.c
+C_source := scc_init.c SCC_API.c iRCCE_admin.c iRCCE_send.c iRCCE_isend.c iRCCE_irecv.c iRCCE_recv.c iRCCE_get.c iRCCE_put.c iRCCE_synch.c RCCE_malloc.c RCCE_shmalloc.c RCCE_debug.c RCCE_qsort.c RCCE_DCMflush.c RCCE_send.c RCCE_recv.c RCCE_flags.c RCCE_comm.c RCCE_put.c RCCE_get.c RCCE_synch.c RCCE_bcast.c RCCE_admin.c # RCCE_power_management.c
 ASM_source := 
 MODULE := arch_x86_scc
 
diff --git a/arch/x86/scc/iRCCE_admin.c b/arch/x86/scc/iRCCE_admin.c
new file mode 100644
index 00000000..c61d66b9
--- /dev/null
+++ b/arch/x86/scc/iRCCE_admin.c
@@ -0,0 +1,67 @@
+//***************************************************************************************
+// Administrative routines. 
+//***************************************************************************************
+//
+// Author: Rob F. Van der Wijngaart
+//         Intel Corporation
+// Date:   008/30/2010
+//
+//***************************************************************************************
+//
+// 
+// Copyright 2010 Intel Corporation
+// 
+//    Licensed under the Apache License, Version 2.0 (the "License");
+//    you may not use this file except in compliance with the License.
+//    You may obtain a copy of the License at
+// 
+//        http://www.apache.org/licenses/LICENSE-2.0
+// 
+//    Unless required by applicable law or agreed to in writing, software
+//    distributed under the License is distributed on an "AS IS" BASIS,
+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//    See the License for the specific language governing permissions and
+//    limitations under the License.
+// 
+//    [2010-10-25] added support for non-blocking send/recv operations
+//                 - iRCCE_isend(), ..._test(), ..._wait(), ..._push()
+//                 - iRCCE_irecv(), ..._test(), ..._wait(), ..._push()
+//                 by Carsten Clauss, Chair for Operating Systems,
+//                                    RWTH Aachen University
+//
+//    [2010-11-12] extracted non-blocking code into separate library
+//                 by Carsten Scholtes
+//
+//    [2011-02-21] added support for multiple incoming queues
+//                 (one recv queue per remote rank)
+// 
+
+#include <metalsvm/stddef.h>
+
+#ifdef CONFIG_ROCKCREEK
+
+#include <asm/iRCCE_lib.h>
+
+// send request queue
+iRCCE_SEND_REQUEST* iRCCE_isend_queue;
+// recv request queue
+iRCCE_RECV_REQUEST* iRCCE_irecv_queue[RCCE_MAXNP];
+
+//--------------------------------------------------------------------------------------
+// FUNCTION: iRCCE_init
+//--------------------------------------------------------------------------------------
+// initialize the library
+//--------------------------------------------------------------------------------------
+int iRCCE_init(void) {
+  int i;
+
+  for(i=0; i<RCCE_MAXNP; i++) {
+    iRCCE_irecv_queue[i] = NULL;
+  }
+
+  iRCCE_isend_queue = NULL;
+
+  return (iRCCE_SUCCESS);
+}
+
+#endif
diff --git a/arch/x86/scc/iRCCE_get.c b/arch/x86/scc/iRCCE_get.c
new file mode 100644
index 00000000..1b2efb83
--- /dev/null
+++ b/arch/x86/scc/iRCCE_get.c
@@ -0,0 +1,85 @@
+//***************************************************************************************
+// Get data from communication buffer. 
+//***************************************************************************************
+//
+// Author: Rob F. Van der Wijngaart
+//         Intel Corporation
+// Date:   008/30/2010
+//
+//***************************************************************************************
+// 
+// Copyright 2010 Intel Corporation
+// 
+//    Licensed under the Apache License, Version 2.0 (the "License");
+//    you may not use this file except in compliance with the License.
+//    You may obtain a copy of the License at
+// 
+//        http://www.apache.org/licenses/LICENSE-2.0
+// 
+//    Unless required by applicable law or agreed to in writing, software
+//    distributed under the License is distributed on an "AS IS" BASIS,
+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//    See the License for the specific language governing permissions and
+//    limitations under the License.
+//
+//    [2010-11-03] switched to SCC-optimized memcpy() functions in scc_memcpy.h:
+//                 - memcpy_to_mpb()
+//                 - memcpy_from_mpb() 
+//                 by Stefan Lankes, Carsten Clauss, Chair for Operating Systems,
+//                                                   RWTH Aachen University
+//
+#include <metalsvm/stdlib.h>
+#include <metalsvm/string.h>
+
+#ifdef CONFIG_ROCKCREEK
+
+#include <asm/iRCCE_lib.h>
+
+#ifdef COPPERRIDGE
+#include "scc_memcpy.h"
+#endif
+
+void* iRCCE_memcpy_get(void *dest, const void *src, size_t count)
+{
+#ifdef COPPERRIDGE
+  return memcpy_from_mpb(dest, src, count);
+#else
+  return memcpy(dest, src, count);
+#endif
+}
+
+//--------------------------------------------------------------------------------------
+// FUNCTION: iRCCE_get
+//--------------------------------------------------------------------------------------
+// copy data from address "source" in the remote MPB to address "target" in either the
+// local MPB, or in the calling UE's private memory. We do not test to see if a move
+// into the calling UE's private memory stays within allocated memory                     *
+//--------------------------------------------------------------------------------------
+int iRCCE_get(
+  t_vcharp target, // target buffer, MPB or private memory
+  t_vcharp source, // source buffer, MPB
+  int num_bytes,   // number of bytes to copy (must be multiple of cache line size
+  int ID           // rank of source UE
+  ) {
+
+  // in non-GORY mode we only need to retain the MPB source shift; we
+  // already know the source is in the MPB, not private memory
+  source = RCCE_comm_buffer[ID]+(source-RCCE_comm_buffer[RCCE_IAM]);
+  
+  // do the actual copy, making sure we copy fresh data                  
+#ifdef _OPENMP
+  #pragma omp flush
+#endif
+  RC_cache_invalidate();
+
+  iRCCE_memcpy_get((void *)target, (void *)source, num_bytes);
+
+  // flush data to make sure it is visible to all threads; cannot use a flush list 
+  // because it concerns malloced space                     
+#ifdef _OPENMP
+  #pragma omp flush
+#endif
+  return(iRCCE_SUCCESS);
+}
+
+#endif
diff --git a/arch/x86/scc/iRCCE_irecv.c b/arch/x86/scc/iRCCE_irecv.c
new file mode 100644
index 00000000..e7d5ad1d
--- /dev/null
+++ b/arch/x86/scc/iRCCE_irecv.c
@@ -0,0 +1,393 @@
+//***************************************************************************************
+// Synchronized receive routines. 
+//***************************************************************************************
+//
+// Author: Rob F. Van der Wijngaart
+//         Intel Corporation
+// Date:   008/30/2010
+//
+//***************************************************************************************
+// 
+// Copyright 2010 Intel Corporation
+// 
+//    Licensed under the Apache License, Version 2.0 (the "License");
+//    you may not use this file except in compliance with the License.
+//    You may obtain a copy of the License at
+// 
+//        http://www.apache.org/licenses/LICENSE-2.0
+// 
+//    Unless required by applicable law or agreed to in writing, software
+//    distributed under the License is distributed on an "AS IS" BASIS,
+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//    See the License for the specific language governing permissions and
+//    limitations under the License.
+// 
+//    [2010-10-25] added support for non-blocking send/recv operations
+//                 - iRCCE_isend(), ..._test(), ..._wait(), ..._push()
+//                 - iRCCE_irecv(), ..._test(), ..._wait(), ..._push()
+//                 by Carsten Clauss, Chair for Operating Systems,
+//                                    RWTH Aachen University
+//
+//    [2010-11-12] extracted non-blocking code into separate library
+//                 by Carsten Scholtes
+//
+//    [2010-12-09] added cancel functions for non-blocking send/recv requests
+//                 by Carsten Clauss
+//
+//    [2011-02-21] added support for multiple incoming queues
+//                 (one recv queue per remote rank)
+//                  
+
+#include <metalsvm/stdlib.h>
+#include <metalsvm/string.h>
+
+#ifdef CONFIG_ROCKCREEK
+
+#include <asm/iRCCE_lib.h>
+
+static int iRCCE_push_recv_request(iRCCE_RECV_REQUEST *request) {
+
+	char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
+	int   test;                   // flag for calling iRCCE_test_flag()
+
+	if(request->finished) return(iRCCE_SUCCESS);
+
+	if(request->label == 1) goto label1;
+	if(request->label == 2) goto label2;
+	if(request->label == 3) goto label3;
+
+	// receive data in units of available chunk size of MPB 
+	for (; request->wsize < (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) {
+		request->bufptr = request->privbuf + request->wsize;
+		request->nbytes = request->chunk;
+label1:
+		iRCCE_test_flag(*(request->sent), RCCE_FLAG_SET, &test);
+		if(!test) {
+			request->label = 1;
+			return(iRCCE_PENDING);
+		}
+		request->started = 1;
+
+		RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM);
+		// copy data from source's MPB space to private memory 
+		iRCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, request->source);
+
+		// tell the source I have moved data out of its comm buffer
+		RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source);
+	}
+
+	request->remainder = request->size % request->chunk; 
+	// if nothing is left over, we are done 
+	if (!request->remainder) {
+		request->finished = 1;
+		return(iRCCE_SUCCESS);
+	}
+
+	// receive remainder of data--whole cache lines               
+	request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk;
+	request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE;
+	if (request->nbytes) {
+label2:
+		iRCCE_test_flag(*(request->sent), RCCE_FLAG_SET, &test);
+		if(!test) {
+			request->label = 2;
+			return(iRCCE_PENDING);
+		}
+		request->started = 1;
+
+		RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM);
+		// copy data from source's MPB space to private memory 
+		iRCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, request->source);
+
+		// tell the source I have moved data out of its comm buffer
+		RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source);
+	}
+
+	request->remainder = request->size % request->chunk; 
+	request->remainder = request->remainder % RCCE_LINE_SIZE;
+	if (!request->remainder) {
+		request->finished = 1;
+		return(iRCCE_SUCCESS);
+	}
+
+	// remainder is less than cache line. This must be copied into appropriately sized 
+	// intermediate space before exact number of bytes get copied to the final destination 
+	request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes;
+	request->nbytes = RCCE_LINE_SIZE;
+label3:
+	iRCCE_test_flag(*(request->sent), RCCE_FLAG_SET, &test);
+	if(!test) {
+		request->label = 3;
+		return(iRCCE_PENDING);
+	}
+	request->started = 1;
+
+	RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM);
+	// copy data from source's MPB space to private memory   
+	iRCCE_get((t_vcharp)padline, request->combuf, request->nbytes, request->source);
+	memcpy(request->bufptr,padline,request->remainder);
+
+	// tell the source I have moved data out of its comm buffer
+	RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source);
+
+	request->finished = 1;
+	return(iRCCE_SUCCESS);
+}
+
+static void iRCCE_init_recv_request(
+		char *privbuf,    // source buffer in local private memory (send buffer)
+		t_vcharp combuf,  // intermediate buffer in MPB
+		size_t chunk,     // size of MPB available for this message (bytes)
+		RCCE_FLAG *ready, // flag indicating whether receiver is ready
+		RCCE_FLAG *sent,  // flag indicating whether message has been sent by source
+		size_t size,      // size of message (bytes)
+		int source,       // UE that will send the message
+		iRCCE_RECV_REQUEST *request
+		) {
+
+	request->privbuf   = privbuf;
+	request->combuf    = combuf;
+	request->chunk     = chunk;
+	request->ready     = ready;
+	request->sent      = sent;
+	request->size      = size;
+	request->source    = source;
+
+	request->wsize     = 0;
+	request->remainder = 0;
+	request->nbytes    = 0;
+	request->bufptr    = NULL;
+
+	request->label     = 0;
+	request->finished  = 0;
+	request->started   = 0;
+
+	request->next      = NULL;
+
+	return;
+}
+
+
+//--------------------------------------------------------------------------------------
+// FUNCTION: iRCCE_irecv
+//--------------------------------------------------------------------------------------
+// non-blocking recv function; returns an handle of type iRCCE_RECV_REQUEST
+//--------------------------------------------------------------------------------------
+static iRCCE_RECV_REQUEST blocking_irecv_request;
+int iRCCE_irecv(char *privbuf, size_t size, int source, iRCCE_RECV_REQUEST *request) {
+
+	if(request == NULL) request = &blocking_irecv_request;
+
+	if (source<0 || source >= RCCE_NP) 
+		return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
+	else {
+		iRCCE_init_recv_request(privbuf, RCCE_buff_ptr, RCCE_chunk, 
+				&RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source], 
+				size, source, request);
+
+		if(iRCCE_irecv_queue[source] == NULL) {
+
+			if(iRCCE_push_recv_request(request) == iRCCE_SUCCESS) {
+				return(iRCCE_SUCCESS);
+			}
+			else {       
+				iRCCE_irecv_queue[source] = request;
+
+				if(request == &blocking_irecv_request) {
+					iRCCE_irecv_wait(request);
+					return(iRCCE_SUCCESS);
+				}
+
+				return(iRCCE_PENDING);
+			}
+		}
+		else {
+			if(iRCCE_irecv_queue[source]->next == NULL) {
+				iRCCE_irecv_queue[source]->next = request;
+			}
+			else {
+				iRCCE_RECV_REQUEST *run = iRCCE_irecv_queue[source];
+				while(run->next != NULL) run = run->next;      
+				run->next = request;   
+			}
+
+				if(request == &blocking_irecv_request) {
+					iRCCE_irecv_wait(request);
+					return(iRCCE_SUCCESS);
+				}
+
+			return(iRCCE_RESERVED);
+		}
+	}
+}
+
+//--------------------------------------------------------------------------------------
+// FUNCTION: iRCCE_irecv_test
+//--------------------------------------------------------------------------------------
+// test function for completion of the requestes non-blocking recv operation
+// Just provide NULL instead of the testvar if you don't need it
+//--------------------------------------------------------------------------------------
+int iRCCE_irecv_test(iRCCE_RECV_REQUEST *request, int *test) {
+
+	int source;
+
+	if(request == NULL) {
+
+		if(iRCCE_irecv_push() == iRCCE_SUCCESS) {
+			if (test) (*test) = 1;
+			return(iRCCE_SUCCESS);
+		}
+		else {
+			if (test) (*test) = 0;
+			return(iRCCE_PENDING);
+		}    
+	}
+
+	source = request->source;
+
+	if(request->finished) {
+		if (test) (*test) = 1;
+		return(iRCCE_SUCCESS);
+	}
+
+	if(iRCCE_irecv_queue[source] != request) {
+		if (test) (*test) = 0;
+		return(iRCCE_RESERVED);
+	}
+
+	iRCCE_push_recv_request(request);
+
+	if(request->finished) {
+		iRCCE_irecv_queue[source] = request->next;
+
+		if (test) (*test) = 1;
+		return(iRCCE_SUCCESS);
+	}
+
+	if (test) (*test) = 0;
+	return(iRCCE_PENDING);
+}
+
+
+//--------------------------------------------------------------------------------------
+// FUNCTION: iRCCE_irecv_push
+//--------------------------------------------------------------------------------------
+// progress function for pending requests in the irecv queue 
+//--------------------------------------------------------------------------------------
+static int iRCCE_irecv_push_source(int source) {
+
+	iRCCE_RECV_REQUEST *request = iRCCE_irecv_queue[source];
+
+	if(request == NULL) {
+		return(iRCCE_SUCCESS);
+	}
+
+	if(request->finished) {
+		return(iRCCE_SUCCESS);
+	}
+
+	iRCCE_push_recv_request(request);   
+
+	if(request->finished) {    
+		iRCCE_irecv_queue[source] = request->next;
+		return(iRCCE_SUCCESS);
+	}
+
+	return(iRCCE_PENDING);
+}
+
+int iRCCE_irecv_push(void) {
+
+	int i, j; 
+	int retval = iRCCE_SUCCESS;
+
+	for(i=0; i<RCCE_NP; i++) {
+
+		j = iRCCE_irecv_push_source(i);
+
+		if(j != iRCCE_SUCCESS) {
+			retval = j;
+		}
+	}
+
+	return retval;
+}
+
+//--------------------------------------------------------------------------------------
+// FUNCTION: iRCCE_irecv_wait
+//--------------------------------------------------------------------------------------
+// just wait for completion of the requested non-blocking send operation
+//--------------------------------------------------------------------------------------
+int iRCCE_irecv_wait(iRCCE_RECV_REQUEST *request) {
+
+	if(request != NULL) {
+		while(!request->finished) {
+			iRCCE_irecv_push();
+			iRCCE_isend_push();
+		}
+	}
+	else {
+		do {
+			iRCCE_isend_push();
+		}
+		while(  iRCCE_irecv_push() != iRCCE_SUCCESS );
+	}
+
+	return(iRCCE_SUCCESS);
+}
+
+
+//--------------------------------------------------------------------------------------
+// FUNCTION: iRCCE_irecv_cancel
+//--------------------------------------------------------------------------------------
+// try to cancel a pending non-blocking recv request
+//--------------------------------------------------------------------------------------
+int iRCCE_irecv_cancel(iRCCE_RECV_REQUEST *request, int *test) {
+  
+	int source;
+	iRCCE_RECV_REQUEST *run;
+  
+	if( (request == NULL) || (request->finished) ) {
+		if (test) (*test) = 0;
+		return iRCCE_NOT_ENQUEUED;
+	}
+
+	source = request->source;
+  
+	if(iRCCE_irecv_queue[source] == NULL) {
+		if (test) (*test) = 0;
+		return iRCCE_NOT_ENQUEUED;
+	}
+  
+	if(iRCCE_irecv_queue[source] == request) {
+
+		// have parts of the message already been received?
+		if(request->started) {
+			if (test) (*test) = 0;
+			return iRCCE_PENDING;
+		}
+		else {
+			// no, thus request can be canceld just in time:
+			iRCCE_irecv_queue[source] = request->next;
+			if (test) (*test) = 1;
+			return iRCCE_SUCCESS;
+		}
+	}
+ 
+	for(run = iRCCE_irecv_queue[source]; run->next != NULL; run = run->next) {
+    
+		// request found --> remove it from recv queue:
+		if(run->next == request) {
+      
+			run->next = run->next->next;
+      
+			if (test) (*test) = 1;
+			return iRCCE_SUCCESS;
+		}
+	}
+  
+	if (test) (*test) = 0;
+	return iRCCE_NOT_ENQUEUED;
+}
+
+#endif
diff --git a/arch/x86/scc/iRCCE_isend.c b/arch/x86/scc/iRCCE_isend.c
new file mode 100644
index 00000000..18c9dca0
--- /dev/null
+++ b/arch/x86/scc/iRCCE_isend.c
@@ -0,0 +1,355 @@
+//***************************************************************************************
+// Non-blocking send routines. 
+//***************************************************************************************
+//
+// Author: Rob F. Van der Wijngaart
+//         Intel Corporation
+// Date:   008/30/2010
+//
+//***************************************************************************************
+// 
+// Copyright 2010 Intel Corporation
+// 
+//    Licensed under the Apache License, Version 2.0 (the "License");
+//    you may not use this file except in compliance with the License.
+//    You may obtain a copy of the License at
+// 
+//        http://www.apache.org/licenses/LICENSE-2.0
+// 
+//    Unless required by applicable law or agreed to in writing, software
+//    distributed under the License is distributed on an "AS IS" BASIS,
+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//    See the License for the specific language governing permissions and
+//    limitations under the License.
+// 
+//    [2010-10-25] added support for non-blocking send/recv operations
+//                 - iRCCE_isend(), ..._test(), ..._wait(), ..._push()
+//                 - iRCCE_irecv(), ..._test(), ..._wait(), ..._push()
+//                 by Carsten Clauss, Chair for Operating Systems,
+//                                    RWTH Aachen University
+//
+//    [2010-11-12] extracted non-blocking code into separate library
+//                 by Carsten Scholtes
+//
+//    [2010-12-09] added cancel functions for non-blocking send/recv requests
+//                 by Carsten Clauss
+//
+
+#include <metalsvm/stdlib.h>
+#include <metalsvm/string.h>
+
+#ifdef CONFIG_ROCKCREEK
+
+#include <asm/iRCCE_lib.h>
+
+static int iRCCE_push_send_request(iRCCE_SEND_REQUEST *request) {
+
+	char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
+	int   test;       // flag for calling iRCCE_test_flag()
+
+	if(request->finished) return(iRCCE_SUCCESS);
+
+	if(request->label == 1) goto label1;
+	if(request->label == 2) goto label2;
+	if(request->label == 3) goto label3;
+
+	// send data in units of available chunk size of comm buffer 
+	for (; request->wsize< (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) {
+		request->bufptr = request->privbuf + request->wsize;
+		request->nbytes = request->chunk;
+		// copy private data to own comm buffer
+		iRCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, RCCE_IAM);
+		RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest);
+		// wait for the destination to be ready to receive a message          
+label1:
+		iRCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
+		if(!test) {
+			request->label = 1;
+			return(iRCCE_PENDING);
+		}
+		RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
+	}
+
+	request->remainder = request->size % request->chunk; 
+	// if nothing is left over, we are done 
+	if (!request->remainder) {
+		request->finished = 1;
+		return(iRCCE_SUCCESS);
+	}
+
+	// send remainder of data--whole cache lines            
+	request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk;
+	request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE;
+	if (request->nbytes) {
+		// copy private data to own comm buffer
+		iRCCE_put(request->combuf, (t_vcharp)request->bufptr, request->nbytes, RCCE_IAM);
+		RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest);
+		// wait for the destination to be ready to receive a message          
+label2:
+		iRCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
+		if(!test) {
+			request->label = 2;
+			return(iRCCE_PENDING);
+		}
+		RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
+	}
+
+	request->remainder = request->size % request->chunk; 
+	request->remainder = request->remainder%RCCE_LINE_SIZE;
+	// if nothing is left over, we are done 
+	if (!request->remainder)
+	{
+		request->finished = 1;
+		return(iRCCE_SUCCESS);
+	}
+
+	// remainder is less than a cache line. This must be copied into appropriately sized 
+	// intermediate space before it can be sent to the receiver 
+	request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes;
+	request->nbytes = RCCE_LINE_SIZE;
+	// copy private data to own comm buffer 
+	memcpy(padline,request->bufptr,request->remainder);
+	iRCCE_put(request->combuf, (t_vcharp)padline, request->nbytes, RCCE_IAM);
+	RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest);
+	// wait for the destination to be ready to receive a message          
+label3:
+	iRCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test);
+	if(!test) {
+		request->label = 3;
+		return(iRCCE_PENDING);
+	}
+	RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM);
+
+	request->finished = 1;
+	return(iRCCE_SUCCESS);
+}
+
+static void iRCCE_init_send_request(
+		char *privbuf,    // source buffer in local private memory (send buffer)
+		t_vcharp combuf,  // intermediate buffer in MPB
+		size_t chunk,     // size of MPB available for this message (bytes)
+		RCCE_FLAG *ready, // flag indicating whether receiver is ready
+		RCCE_FLAG *sent,  // flag indicating whether message has been sent by source
+		size_t size,      // size of message (bytes)
+		int dest,         // UE that will receive the message
+		iRCCE_SEND_REQUEST *request
+		) {
+
+	request->privbuf  = privbuf;
+	request->combuf   = combuf;
+	request->chunk    = chunk;
+	request->ready    = ready;
+	request->sent     = sent;
+	request->size     = size;
+	request->dest     = dest;
+
+	request->wsize     = 0;
+	request->remainder = 0;
+	request->nbytes    = 0;
+	request->bufptr    = NULL;
+
+	request->label    = 0;
+
+	request->finished = 0;
+
+	request->next     = NULL;
+
+	return;
+}
+
+//--------------------------------------------------------------------------------------
+// FUNCTION: iRCCE_isend
+//--------------------------------------------------------------------------------------
+// non-blocking send function; returns a handle of type iRCCE_SEND_REQUEST
+//--------------------------------------------------------------------------------------
+static iRCCE_SEND_REQUEST blocking_isend_request;
+int iRCCE_isend(char *privbuf, size_t size, int dest, iRCCE_SEND_REQUEST *request) {
+
+	if(request == NULL) request = &blocking_isend_request;
+
+	if (dest<0 || dest >= RCCE_NP) 
+		return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
+	else {
+		iRCCE_init_send_request(privbuf, RCCE_buff_ptr, RCCE_chunk, 
+				&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM], 
+				size, dest, request);
+
+		if(iRCCE_isend_queue == NULL) {
+
+			if(iRCCE_push_send_request(request) == iRCCE_SUCCESS) {
+				return(iRCCE_SUCCESS);
+			}
+			else {
+				iRCCE_isend_queue = request;
+
+				if(request == &blocking_isend_request) {
+					iRCCE_isend_wait(request);
+					return(iRCCE_SUCCESS);
+				}
+
+				return(iRCCE_PENDING);
+			}
+		}
+		else {
+			if(iRCCE_isend_queue->next == NULL) {
+				iRCCE_isend_queue->next = request;
+			}
+			else {
+				iRCCE_SEND_REQUEST *run = iRCCE_isend_queue;
+				while(run->next != NULL) run = run->next;      
+				run->next = request;   
+			}
+
+			if(request == &blocking_isend_request) {
+				iRCCE_isend_wait(request);
+				return(iRCCE_SUCCESS);
+			}
+
+			return(iRCCE_RESERVED);
+		}
+	}
+}
+
+//--------------------------------------------------------------------------------------
+// FUNCTION: iRCCE_isend_test
+//--------------------------------------------------------------------------------------
+// test function for completion of the requestes non-blocking send operation
+// Just provide NULL instead of testvar if you don't need it
+//--------------------------------------------------------------------------------------
+int iRCCE_isend_test(iRCCE_SEND_REQUEST *request, int *test) {
+
+	if(request == NULL) {
+
+		iRCCE_isend_push();
+
+		if(iRCCE_isend_queue == NULL) {
+			if (test) (*test) = 1;
+			return(iRCCE_SUCCESS);
+		}
+		else {
+			if (test) (*test) = 0;
+			return(iRCCE_PENDING);
+		}    
+	}
+
+	if(request->finished) {
+		if (test) (*test) = 1;
+		return(iRCCE_SUCCESS);
+	}
+
+	if(iRCCE_isend_queue != request) {
+		if (test) (*test) = 0;
+		return(iRCCE_RESERVED);
+	}
+
+	iRCCE_push_send_request(request);   
+
+	if(request->finished) {
+		iRCCE_isend_queue = request->next;
+
+	 if (test) (*test) = 1;
+		return(iRCCE_SUCCESS);
+	}
+
+	if (test) (*test) = 0;
+	return(iRCCE_PENDING);
+}
+
+
+//--------------------------------------------------------------------------------------
+// FUNCTION: iRCCE_isend_push
+//--------------------------------------------------------------------------------------
+// progress function for pending requests in the isend queue 
+//--------------------------------------------------------------------------------------
+int iRCCE_isend_push(void) {
+
+	iRCCE_SEND_REQUEST *request = iRCCE_isend_queue;
+
+	if(request == NULL) {
+		return(iRCCE_SUCCESS);
+	}
+
+	if(request->finished) {
+		return(iRCCE_SUCCESS);
+	}
+
+	iRCCE_push_send_request(request);   
+
+	if(request->finished) {
+		iRCCE_isend_queue = request->next;   
+		return(iRCCE_SUCCESS);
+	}
+
+	return(iRCCE_PENDING);
+}
+
+
+//--------------------------------------------------------------------------------------
+// FUNCTION: iRCCE_isend_wait
+//--------------------------------------------------------------------------------------
+// just wait for completion of the requestes non-blocking send operation
+//--------------------------------------------------------------------------------------
+int iRCCE_isend_wait(iRCCE_SEND_REQUEST *request) {
+
+	if(request != NULL) {
+
+		while(!request->finished) {
+
+			iRCCE_isend_push();
+			iRCCE_irecv_push();      
+		}
+	}
+	else {
+
+		while(iRCCE_isend_queue != NULL) {
+
+			iRCCE_isend_push();     
+			iRCCE_irecv_push();     
+		}
+	}
+
+	return(iRCCE_SUCCESS);
+}
+
+
+//--------------------------------------------------------------------------------------
+// FUNCTION: iRCCE_isend_cancel
+//--------------------------------------------------------------------------------------
+// try to cancel a pending non-blocking send request
+//--------------------------------------------------------------------------------------
+int iRCCE_isend_cancel(iRCCE_SEND_REQUEST *request, int *test) {
+  
+	iRCCE_SEND_REQUEST *run;
+  
+	if( (request == NULL) || (request->finished) ) {
+		if (test) (*test) = 0;
+		return iRCCE_NOT_ENQUEUED;
+	}
+  
+	if(iRCCE_isend_queue == NULL) {
+		if (test) (*test) = 0;
+		return iRCCE_NOT_ENQUEUED;
+	}
+  
+	if(iRCCE_isend_queue == request) {
+		if (test) (*test) = 0;
+		return iRCCE_PENDING;
+	}
+ 
+	for(run = iRCCE_isend_queue; run->next != NULL; run = run->next) {
+    
+		// request found --> remove it from send queue:
+		if(run->next == request) {
+      
+			run->next = run->next->next;
+      
+			if (test) (*test) = 1;
+			return iRCCE_SUCCESS;
+		}
+	}
+  
+	if (test) (*test) = 0;
+	return iRCCE_NOT_ENQUEUED;
+}
+
+#endif
diff --git a/arch/x86/scc/iRCCE_put.c b/arch/x86/scc/iRCCE_put.c
new file mode 100644
index 00000000..93cea070
--- /dev/null
+++ b/arch/x86/scc/iRCCE_put.c
@@ -0,0 +1,87 @@
+//***************************************************************************************
+// Put data into communication buffer. 
+//***************************************************************************************
+//
+// Author: Rob F. Van der Wijngaart
+//         Intel Corporation
+// Date:   008/30/2010
+//
+//***************************************************************************************
+// 
+// Copyright 2010 Intel Corporation
+// 
+//    Licensed under the Apache License, Version 2.0 (the "License");
+//    you may not use this file except in compliance with the License.
+//    You may obtain a copy of the License at
+// 
+//        http://www.apache.org/licenses/LICENSE-2.0
+// 
+//    Unless required by applicable law or agreed to in writing, software
+//    distributed under the License is distributed on an "AS IS" BASIS,
+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//    See the License for the specific language governing permissions and
+//    limitations under the License.
+// 
+//    [2010-11-03] switched to SCC-optimized memcpy() functions in scc_memcpy.h:
+//                 - memcpy_to_mpb()
+//                 - memcpy_from_mpb() 
+//                 by Stefan Lankes, Carsten Clauss, Chair for Operating Systems,
+//                                                   RWTH Aachen University
+//
+#include <metalsvm/stdlib.h>
+#include <metalsvm/string.h>
+
+#ifdef CONFIG_ROCKCREEK
+
+#include <asm/iRCCE_lib.h>
+
+#ifdef COPPERRIDGE
+#include "scc_memcpy.h"
+#endif
+
+void* iRCCE_memcpy_put(void *dest, const void *src, size_t count)
+{
+#ifdef COPPERRIDGE
+  return memcpy_to_mpb(dest, src, count);
+#else
+  return memcpy(dest, src, count);
+#endif
+}
+
+//--------------------------------------------------------------------------------------
+// FUNCTION: iRCCE_put
+//--------------------------------------------------------------------------------------
+// copy data from address "source" in the local MPB or the calling UE's private memory 
+// to address "target" in the remote MPB. We do not test to see if a move from the 
+// calling UE's private memory stays within allocated memory                        
+//--------------------------------------------------------------------------------------
+int iRCCE_put(
+  t_vcharp target, // target buffer, MPB
+  t_vcharp source, // source buffer, MPB or private memory
+  int num_bytes, 
+  int ID
+  ) {
+
+  // in non-GORY mode we only need to retain the MPB target shift; we
+  // already know the target is in the MPB, not private memory
+  target = RCCE_comm_buffer[ID]+(target-RCCE_comm_buffer[RCCE_IAM]);    
+
+  // make sure that any data that has been put in our MPB by another UE is visible 
+#ifdef _OPENMP
+  #pragma omp flush
+#endif
+
+  // do the actual copy 
+  RC_cache_invalidate();
+
+  iRCCE_memcpy_put((void *)target, (void *)source, num_bytes);
+
+  // flush data to make it visible to all threads; cannot use flush list because it 
+  // concerns malloced space                        
+#ifdef _OPENMP
+  #pragma omp flush
+#endif
+  return(iRCCE_SUCCESS);
+}
+
+#endif
diff --git a/arch/x86/scc/iRCCE_recv.c b/arch/x86/scc/iRCCE_recv.c
new file mode 100644
index 00000000..17beccfd
--- /dev/null
+++ b/arch/x86/scc/iRCCE_recv.c
@@ -0,0 +1,190 @@
+//***************************************************************************************
+// Non-blocking receive routines. 
+//***************************************************************************************
+//
+// Author: Rob F. Van der Wijngaart
+//         Intel Corporation
+// Date:   008/30/2010
+//
+//***************************************************************************************
+// 
+// Copyright 2010 Intel Corporation
+// 
+//    Licensed under the Apache License, Version 2.0 (the "License");
+//    you may not use this file except in compliance with the License.
+//    You may obtain a copy of the License at
+// 
+//        http://www.apache.org/licenses/LICENSE-2.0
+// 
+//    Unless required by applicable law or agreed to in writing, software
+//    distributed under the License is distributed on an "AS IS" BASIS,
+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//    See the License for the specific language governing permissions and
+//    limitations under the License.
+// 
+//    [2010-11-26] added a _pipelined_ version of blocking send/recv
+//                 by Carsten Clauss, Chair for Operating Systems,
+//                                    RWTH Aachen University
+//
+#include <metalsvm/stdlib.h>
+#include <metalsvm/string.h>
+
+#ifdef CONFIG_ROCKCREEK
+
+#include <asm/iRCCE_lib.h>
+
+//--------------------------------------------------------------------------------------
+// FUNCTION: iRCCE_recv_general
+//--------------------------------------------------------------------------------------
+// pipelined receive function
+//--------------------------------------------------------------------------------------
+static int iRCCE_recv_general(
+		char *privbuf,    // destination buffer in local private memory (receive buffer)
+		t_vcharp combuf,  // intermediate buffer in MPB
+		size_t chunk,     // size of MPB available for this message (bytes)
+		RCCE_FLAG *ready, // flag indicating whether receiver is ready
+		RCCE_FLAG *sent,  // flag indicating whether message has been sent by source
+		size_t size,      // size of message (bytes)
+		int source,       // UE that sent the message
+		int *test        // if 1 upon entry, do nonblocking receive; if message available
+		// set to 1, otherwise to 0
+		) {
+
+	char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
+	size_t wsize,   // offset within receive buffer when pulling in "chunk" bytes
+				 remainder, // bytes remaining to be received
+				 nbytes;    // number of bytes to be received in single iRCCE_get call
+	int first_test; // only use first chunk to determine if message has been received yet
+	char *bufptr;   // running pointer inside privbuf for current location
+
+	first_test = 1;
+
+#if 0
+	// receive data in units of available chunk size of MPB 
+	for (wsize=0; wsize< (size/chunk)*chunk; wsize+=chunk) {
+		bufptr = privbuf + wsize;
+		nbytes = chunk;
+		// if function is called in test mode, check if first chunk has been sent already. 
+		// If so, proceed as usual. If not, exit immediately 
+		if (*test && first_test) {
+			first_test = 0;
+			if (!(*test = RCCE_probe(*sent))) return(iRCCE_SUCCESS);
+		}
+		RCCE_wait_until(*sent, RCCE_FLAG_SET);
+		RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);
+		// copy data from local MPB space to private memory 
+		iRCCE_get((t_vcharp)bufptr, combuf, nbytes, source);
+
+		// tell the source I have moved data out of its comm buffer
+		RCCE_flag_write(ready, RCCE_FLAG_SET, source);
+	}
+#else
+	{ // pipelined version of send/recv:
+
+		size_t subchunk1 = chunk / 2;
+		size_t subchunk2 = chunk - subchunk1;
+
+		for (wsize=0; wsize < (size/chunk)*chunk; wsize+=chunk) {
+
+			if (*test && first_test) {
+				first_test = 0;
+				if (!(*test = RCCE_probe(*sent))) return(iRCCE_SUCCESS);
+			}    
+
+			bufptr = privbuf + wsize;
+			nbytes = subchunk1;
+
+			RCCE_wait_until(*ready, RCCE_FLAG_SET);
+			RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
+			iRCCE_get((t_vcharp)bufptr, combuf, nbytes, source);
+
+			RCCE_flag_write(ready, RCCE_FLAG_SET, source);      
+
+			bufptr = privbuf + wsize + subchunk1;
+			nbytes = subchunk2;
+
+			RCCE_wait_until(*sent, RCCE_FLAG_SET);
+			RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);
+			iRCCE_get((t_vcharp)bufptr, combuf + subchunk1, nbytes, source);
+
+			RCCE_flag_write(sent, RCCE_FLAG_SET, source);     
+		}   
+	}
+#endif
+
+	remainder = size%chunk; 
+	// if nothing is left over, we are done 
+	if (!remainder) return(iRCCE_SUCCESS);
+
+	// receive remainder of data--whole cache lines               
+	bufptr = privbuf + (size/chunk)*chunk;
+	nbytes = remainder - remainder % RCCE_LINE_SIZE;
+	if (nbytes) {
+		// if function is called in test mode, check if first chunk has been sent already. 
+		// If so, proceed as usual. If not, exit immediately 
+		if (*test && first_test) {
+			first_test = 0;
+			if (!(*test = RCCE_probe(*sent))) return(iRCCE_SUCCESS);
+		}
+		RCCE_wait_until(*sent, RCCE_FLAG_SET);
+		RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);
+		// copy data from local MPB space to private memory 
+		iRCCE_get((t_vcharp)bufptr, combuf, nbytes, source);
+
+		// tell the source I have moved data out of its comm buffer
+		RCCE_flag_write(ready, RCCE_FLAG_SET, source);
+	}
+
+	remainder = remainder % RCCE_LINE_SIZE;
+	if (!remainder) return(iRCCE_SUCCESS);
+
+	// remainder is less than cache line. This must be copied into appropriately sized 
+	// intermediate space before exact number of bytes get copied to the final destination 
+	bufptr = privbuf + (size/chunk)*chunk + nbytes;
+	nbytes = RCCE_LINE_SIZE;
+
+	// if function is called in test mode, check if first chunk has been sent already. 
+	// If so, proceed as usual. If not, exit immediately 
+	if (*test && first_test) {
+		first_test = 0;
+		if (!(*test = RCCE_probe(*sent))) return(iRCCE_SUCCESS);
+	}
+	RCCE_wait_until(*sent, RCCE_FLAG_SET);
+	RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);
+
+	// copy data from local MPB space to private memory   
+	iRCCE_get((t_vcharp)padline, combuf, nbytes, source);
+	memcpy(bufptr,padline,remainder);    
+
+	// tell the source I have moved data out of its comm buffer
+	RCCE_flag_write(ready, RCCE_FLAG_SET, source);
+
+	return(iRCCE_SUCCESS);
+}
+
+
+//--------------------------------------------------------------------------------------
+// FUNCTION: iRCCE_recv
+//--------------------------------------------------------------------------------------
+// pipelined recv function  (blocking!)
+//--------------------------------------------------------------------------------------
+int iRCCE_recv(char *privbuf, size_t size, int source) {
+	int ignore;
+
+	while(iRCCE_irecv_queue[source] != NULL) {
+		// wait for completion of pending non-blocking requests
+		iRCCE_irecv_push();
+		iRCCE_isend_push();
+	}
+
+	if (source<0 || source >= RCCE_NP) 
+		return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
+	else {
+		ignore = 0;
+		return(iRCCE_recv_general(privbuf, RCCE_buff_ptr, RCCE_chunk, 
+					&RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source], 
+					size, source, &ignore));
+	}
+}
+
+#endif
diff --git a/arch/x86/scc/iRCCE_send.c b/arch/x86/scc/iRCCE_send.c
new file mode 100644
index 00000000..ad1582b3
--- /dev/null
+++ b/arch/x86/scc/iRCCE_send.c
@@ -0,0 +1,165 @@
+//***************************************************************************************
+// Synchronized receive routines. 
+//***************************************************************************************
+//
+// Author: Rob F. Van der Wijngaart
+//         Intel Corporation
+// Date:   008/30/2010
+//
+//***************************************************************************************
+// 
+// Copyright 2010 Intel Corporation
+// 
+//    Licensed under the Apache License, Version 2.0 (the "License");
+//    you may not use this file except in compliance with the License.
+//    You may obtain a copy of the License at
+// 
+//        http://www.apache.org/licenses/LICENSE-2.0
+// 
+//    Unless required by applicable law or agreed to in writing, software
+//    distributed under the License is distributed on an "AS IS" BASIS,
+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//    See the License for the specific language governing permissions and
+//    limitations under the License.
+// 
+//    [2010-11-26] added a _pipelined_ version of blocking send/recv
+//                 by Carsten Clauss, Chair for Operating Systems,
+//                                    RWTH Aachen University
+//
+#include <metalsvm/stdlib.h>
+#include <metalsvm/string.h>
+
+#ifdef CONFIG_ROCKCREEK
+
+#include <asm/iRCCE_lib.h>
+
+//--------------------------------------------------------------------------------------
+// FUNCTION: iRCCE_send_general
+//--------------------------------------------------------------------------------------
+// pipelined send function
+//--------------------------------------------------------------------------------------
+static int iRCCE_send_general(
+		char *privbuf,    // source buffer in local private memory (send buffer)
+		t_vcharp combuf,  // intermediate buffer in MPB
+		size_t chunk,     // size of MPB available for this message (bytes)
+		RCCE_FLAG *ready, // flag indicating whether receiver is ready
+		RCCE_FLAG *sent,  // flag indicating whether message has been sent by source
+		size_t size,      // size of message (bytes)
+		int dest          // UE that will receive the message
+		) {
+
+	char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size
+	size_t wsize,    // offset within send buffer when putting in "chunk" bytes
+				 remainder, // bytes remaining to be sent
+				 nbytes;    // number of bytes to be sent in single iRCCE_put call
+	char *bufptr;    // running pointer inside privbuf for current location
+
+#if 0
+	// send data in units of available chunk size of comm buffer 
+	for (wsize=0; wsize< (size/chunk)*chunk; wsize+=chunk) {
+		bufptr = privbuf + wsize;
+		nbytes = chunk;
+		// copy private data to own comm buffer
+		iRCCE_put(combuf, (t_vcharp) bufptr, nbytes, RCCE_IAM);
+		RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
+		// wait for the destination to be ready to receive a message          
+		RCCE_wait_until(*ready, RCCE_FLAG_SET);
+		RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
+	}
+#else
+	{ // pipelined version of send/recv:
+		size_t subchunk1 = chunk / 2;
+		size_t subchunk2 = chunk - subchunk1;
+
+		wsize = 0;
+
+		for (; wsize < (size/chunk)*chunk; wsize+=chunk) {
+
+			bufptr = privbuf + wsize;
+			nbytes = subchunk1;
+
+			iRCCE_put(combuf, (t_vcharp) bufptr, nbytes, RCCE_IAM);      
+			RCCE_flag_write(ready, RCCE_FLAG_SET, dest);
+
+			if(wsize>0)
+			{
+				RCCE_wait_until(*sent, RCCE_FLAG_SET);
+				RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);      
+			}
+
+			bufptr = privbuf + wsize + subchunk1;
+			nbytes = subchunk2;
+
+			iRCCE_put(combuf + subchunk1, (t_vcharp) bufptr, nbytes, RCCE_IAM);
+			RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
+
+			RCCE_wait_until(*ready, RCCE_FLAG_SET);
+			RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);      
+		}
+
+		if(wsize>0) {
+			RCCE_wait_until(*sent, RCCE_FLAG_SET);
+			RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM);          
+		}
+	}
+#endif
+
+	remainder = size%chunk; 
+	// if nothing is left over, we are done 
+	if (!remainder) return(iRCCE_SUCCESS);
+
+	// send remainder of data--whole cache lines            
+	bufptr = privbuf + (size/chunk)*chunk;
+	nbytes = remainder - remainder%RCCE_LINE_SIZE;
+	if (nbytes) {
+		// copy private data to own comm buffer
+		iRCCE_put(combuf, (t_vcharp)bufptr, nbytes, RCCE_IAM);
+		RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
+		// wait for the destination to be ready to receive a message          
+		RCCE_wait_until(*ready, RCCE_FLAG_SET);
+		RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
+	}
+
+	remainder = remainder%RCCE_LINE_SIZE;
+	if (!remainder) return(iRCCE_SUCCESS);
+
+	// remainder is less than a cache line. This must be copied into appropriately sized 
+	// intermediate space before it can be sent to the receiver 
+	bufptr = privbuf + (size/chunk)*chunk + nbytes;
+	nbytes = RCCE_LINE_SIZE;
+
+	// copy private data to own comm buffer 
+	memcpy(padline,bufptr,remainder);
+	iRCCE_put(combuf, (t_vcharp)padline, nbytes, RCCE_IAM);
+	RCCE_flag_write(sent, RCCE_FLAG_SET, dest);
+
+	// wait for the destination to be ready to receive a message          
+	RCCE_wait_until(*ready, RCCE_FLAG_SET);
+	RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM);
+
+	return(iRCCE_SUCCESS);
+}
+
+
+//--------------------------------------------------------------------------------------
+// FUNCTION: iRCCE_send
+//--------------------------------------------------------------------------------------
+// pipelined send function (blocking!)
+//--------------------------------------------------------------------------------------
+int iRCCE_send(char *privbuf, size_t size, int dest) {
+
+	while(iRCCE_isend_queue != NULL) {
+		// wait for completion of pending non-blocking requests
+		iRCCE_isend_push();
+		iRCCE_irecv_push();
+	}
+
+	if (dest<0 || dest >= RCCE_NP) 
+		return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
+	else
+		return(iRCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk, 
+					&RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM], 
+					size, dest));
+}
+
+#endif
diff --git a/arch/x86/scc/iRCCE_synch.c b/arch/x86/scc/iRCCE_synch.c
new file mode 100644
index 00000000..5512728b
--- /dev/null
+++ b/arch/x86/scc/iRCCE_synch.c
@@ -0,0 +1,127 @@
+///*************************************************************************************
+// Synchronization functions. 
+// Single-bit and whole-cache-line flags are sufficiently different that we provide
+// separate implementations of the synchronization routines for each case
+//**************************************************************************************
+//
+// Author: Rob F. Van der Wijngaart
+//         Intel Corporation
+// Date:   008/30/2010
+//
+//**************************************************************************************
+// 
+// Copyright 2010 Intel Corporation
+// 
+//    Licensed under the Apache License, Version 2.0 (the "License");
+//    you may not use this file except in compliance with the License.
+//    You may obtain a copy of the License at
+// 
+//        http://www.apache.org/licenses/LICENSE-2.0
+// 
+//    Unless required by applicable law or agreed to in writing, software
+//    distributed under the License is distributed on an "AS IS" BASIS,
+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//    See the License for the specific language governing permissions and
+//    limitations under the License.
+// 
+//    [2010-10-25] added support for non-blocking send/recv operations
+//                 - iRCCE_isend(), ..._test(), ..._wait(), ..._push()
+//                 - iRCCE_irecv(), ..._test(), ..._wait(), ..._push()
+//                 by Carsten Clauss, Chair for Operating Systems,
+//                                    RWTH Aachen University
+//
+//    [2010-11-12] extracted non-blocking code into separate library
+//                 by Carsten Scholtes
+//
+//    [2011-01-21] updated the datatype of RCCE_FLAG according to the
+//                 recent version of RCCE
+//
+//    [2011-04-12] added marco test for rcce version
+//
+#include <metalsvm/stdlib.h>
+#include <metalsvm/string.h>
+
+#ifdef CONFIG_ROCKCREEK
+
+#include <asm/iRCCE_lib.h>
+
+#ifdef SINGLEBITFLAGS
+
+int iRCCE_test_flag(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result) {
+
+	t_vcharp cflag;
+
+#ifdef RCCE_VERSION
+	// this is a newer version than V1.0.13
+	t_vcharp flaga;
+#endif
+
+	cflag = flag.line_address;
+
+#ifdef RCCE_VERSION
+	// this is a newer version than V1.0.13
+	flaga = flag.flag_addr;
+#endif
+
+	// always flush/invalidate to ensure we read the most recent value of *flag
+	// keep reading it until it has the required value 
+
+#ifdef _OPENMP
+#pragma omp flush  
+#endif
+	RC_cache_invalidate();
+
+#ifdef RCCE_VERSION
+	// this is a newer version than V1.0.13
+	if(RCCE_bit_value(flaga, (flag.location)%RCCE_FLAGS_PER_BYTE) != val) {
+#else
+	if(RCCE_bit_value(cflag, flag.location) != val) {
+#endif
+		(*result) = 0;
+	}    
+	else {
+		(*result) = 1;
+	}
+
+	return(iRCCE_SUCCESS);
+} 
+
+#else
+
+//////////////////////////////////////////////////////////////////
+// LOCKLESS SYNCHRONIZATION USING ONE WHOLE CACHE LINE PER FLAG //
+//////////////////////////////////////////////////////////////////
+
+int iRCCE_test_flag(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result) {
+
+#ifdef RCCE_VERSION
+	// this is a newer version than V1.0.13
+	t_vcharp flaga = flag.flag_addr;
+#endif
+
+	// always flush/invalidate to ensure we read the most recent value of *flag
+	// keep reading it until it has the required value. We only need to read the
+	// first int of the MPB cache line containing the flag
+#ifdef _OPENMP
+#pragma omp flush   
+#endif
+	RC_cache_invalidate();
+
+#ifdef RCCE_VERSION
+	// this is a newer version than V1.0.13
+	if((RCCE_FLAG_STATUS)(*flaga) != val) {
+#else
+	if((*flag) != val) {
+#endif
+		(*result) = 0;
+	}    
+	else {
+		(*result) = 1;
+	}
+
+	return(iRCCE_SUCCESS);
+}
+
+#endif
+
+#endif

From ed226780119caa1a64f3ff5089d17005eecd3df6 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 19 Apr 2011 07:17:51 -0700
Subject: [PATCH 12/36] add code to initialize (i)RCCE and to test RCCE

---
 arch/x86/scc/scc_init.c           | 52 +++++++++++++++++++++++++------
 include/metalsvm/config.h.example |  2 +-
 2 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/arch/x86/scc/scc_init.c b/arch/x86/scc/scc_init.c
index 3279c967..33645aaa 100644
--- a/arch/x86/scc/scc_init.c
+++ b/arch/x86/scc/scc_init.c
@@ -17,7 +17,8 @@
 #include <metalsvm/processor.h>
 #include <metalsvm/errno.h>
 #include <asm/io.h>
-#include <asm/RCCE_lib.h>
+#include <asm/RCCE.h>
+#include <asm/iRCCE.h>
 #include <asm/SCC_API.h>
 
 #ifdef CONFIG_ROCKCREEK
@@ -35,6 +36,9 @@ bootinfo_t* bootinfo = (bootinfo_t*) SCC_BOOTINFO;
 static int scc_clear(void)
 {
 	int tmp, x, y, z, offset;
+
+	// Initialize API
+	InitAPI(0);
  
 	// Find out who I am...
 	tmp=ReadConfigReg(CRB_OWN+MYTILEID); 
@@ -57,6 +61,9 @@ static int scc_clear(void)
 	// Clear test&set register write. Next read-access will read "1" (lock granted).
 	SetConfigReg(CRB_ADDR(x,y)+((z)?LOCK1:LOCK0), 1); 
 
+	// frees Message Passing Buffer
+	MPBunalloc(&MPB);
+
 	return 0;
 }
 
@@ -64,31 +71,58 @@ int scc_init(void)
 {
 	int num_ranks;
 	int i, my_rank;
+	uint64_t start, end, ticks, freq = 533;
+	uint32_t cr4, msg = 0;
+
+	kputs("Initialize Rock Creek!\n");
+
+	/* Enable Messagepassing in CR4 */
+	cr4 = read_cr4();
+	cr4 = cr4 | _CR4_MPE;
+	write_cr4(cr4);
 
-	kprintf("Initialize Rock Creek!\n");
 	kprintf("address of the initrd: 0x%x\n", bootinfo->addr);
 	kprintf("size of the initrd: %d\n", bootinfo->size);
 	kprintf("rcce argc = %d\n", bootinfo->argc);
 	for(i=0; i<bootinfo->argc; i++)
 		kprintf("rcce argv[%d] = %s\n", i, bootinfo->argv[i]);
 
+	if (bootinfo->argc >= 3)
+		freq = atoi(bootinfo->argv[2]);
+
+	kputs("Reset SCC!\n");
+	scc_clear();
+	kputs("Wait some time...\n");
+	mb();
+	start = rdtsc();
+	do {
+		mb();
+		end = rdtsc();
+		ticks = end > start ? end - start : start - end;
+	} while(ticks*TIMER_FREQ < 1000ULL*freq*1000000ULL);
+	kprintf("ticks %llu\n", ticks);
+
 	if (RCCE_init(&bootinfo->argc, &bootinfo->argv) != RCCE_SUCCESS)
 		return -ENODEV;
+	if (iRCCE_init() != iRCCE_SUCCESS)
+		return -ENODEV;
 
 	my_rank   = RCCE_ue();
 	num_ranks = RCCE_num_ues();
 	kprintf("Got rank %d of %d ranks\n", my_rank, num_ranks);
 
-	/* Enable Messagepassing in CR4 */
-	uint32_t cr4 = read_cr4();
-	cr4 = cr4 | _CR4_MPE;
-	write_cr4(cr4);
-
 	i = ReadConfigReg(CRB_OWN+GLCFG0);
 	kprintf("glcfg0 0x%x\n", i);
 
-	/* synchronize before starting MetalSVM: */
-	//RCCE_barrier(&RCCE_COMM_WORLD);
+	RCCE_barrier(&RCCE_COMM_WORLD);
+
+	kputs("RCCE test...\t");
+	if (my_rank == 0)
+		msg = 0x4711;
+	if (RCCE_bcast((char*) &msg, sizeof(msg), 0, RCCE_COMM_WORLD) == RCCE_SUCCESS)
+		kprintf("successfull! (0x%x)\n", msg);
+	else
+		kprintf("failed! (0x%x)\n", msg);
 
 	kputs("Now, the SCC is initialized!\n");
 
diff --git a/include/metalsvm/config.h.example b/include/metalsvm/config.h.example
index 31c02682..b5c0536e 100644
--- a/include/metalsvm/config.h.example
+++ b/include/metalsvm/config.h.example
@@ -58,7 +58,7 @@ extern "C" {
 // RCCE specific flags
 #define SCC
 #define MS_BAREMETAL
-#define GORY
+//#define GORY
 //#define SHMADD
 //#define SHMADD_CACHEABLE
 /* default values for 16 GB system */

From f02655ccda797af0db2a66738c8255e394b0c33b Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 19 Apr 2011 07:18:35 -0700
Subject: [PATCH 13/36] add fallback code for memcpy

---
 libkern/string.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libkern/string.c b/libkern/string.c
index acf76be9..7d6a5220 100644
--- a/libkern/string.c
+++ b/libkern/string.c
@@ -8,9 +8,9 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */
 
-#include <metalsvm/string.h>
+#include <metalsvm/stddef.h>
 
-#ifndef HAVE_ARCH_MEMCPY
+//#ifndef HAVE_ARCH_MEMCPY
 void *memcpy(void *dest, const void *src, size_t count)
 {
 	size_t i;
@@ -23,7 +23,7 @@ void *memcpy(void *dest, const void *src, size_t count)
 	
 	return dest;
 }
-#endif
+//#endif
 
 #ifndef HAVE_ARCH_MEMSET
 void *memset(void *dest, int val, size_t count)

From a070ac6c5ae6732c1dce38f54cf72055920e2c33 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 19 Apr 2011 07:39:53 -0700
Subject: [PATCH 14/36] add Intel's bin2obj tool

- this is part of http://marcbug.scc-dc.com/svn/repository/trunk/linuxkernel/bin2obj
---
 tools/Makefile  |   9 ++-
 tools/bin2obj.c | 147 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 153 insertions(+), 3 deletions(-)
 create mode 100644 tools/bin2obj.c

diff --git a/tools/Makefile b/tools/Makefile
index e0375ae0..fb77c026 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -41,16 +41,19 @@ scc_bootinfo.asm: bootinfo.sh
 scc_bootinfo.bin: scc_bootinfo.asm
 	$(NASM) $(NASMFLAGS) -o $@ $<
 
-SCC: scc_bootinfo.bin scc_setup.bin reset_vector.bin initrd.img
+bin2obj: bin2obj.c
+	$(CC) $(CFLAGS) -o $@ $<
+
+SCC: scc_bootinfo.bin scc_setup.bin reset_vector.bin initrd.img bin2obj
 	cp ../metalsvm.elf .
 	$(CROSS_OBJCOPY) -j .mboot -j .text -j .data -j .rodata -j .bss -O binary metalsvm.elf metalsvm.bin
 	chmod a-x *.bin
 	. ./prepare.sh
-	/home/lankes/tools/bin2obj -m load.map -o metalsvm.obj
+	./bin2obj -m load.map -o metalsvm.obj
 	sccMerge -noimage -m 8 -n 12 -force ./metalsvm.mt
 
 clean:
-	$(RM) -rf *.o *~ make_initrd initrd.img *.bin *.obj *.hex *.elf obj 
+	$(RM) -rf *.o *~ bin2obj make_initrd initrd.img *.bin *.obj *.hex *.elf obj 
 
 depend:
 	$(CC) -MM $(CFLAGS) *.c > Makefile.dep
diff --git a/tools/bin2obj.c b/tools/bin2obj.c
new file mode 100644
index 00000000..45c07caa
--- /dev/null
+++ b/tools/bin2obj.c
@@ -0,0 +1,147 @@
+#include <stdio.h>
+#include <getopt.h>
+#include <string.h>
+
+
+const char BIN2OBJIDSTRING[] = "$Id: bin2obj.c 8016 2007-11-01 14:24:42Z tlehnig $";
+
+
+long long convertToHex(char *fn, unsigned long origin, FILE *outfile) {
+  FILE *datafile;
+  unsigned char data1, data2, data3, data4;
+  int res = 0;
+  long long count = 0;
+
+  datafile = fopen(fn, "r");
+  if (!datafile) {
+    printf("Datafile >%s< could not be opened, not writing data for this file\n", fn);
+    return -1;
+  }
+
+  printf("Converting file >%s< to .32.obj format at origin 0x%08lx (0x%08lx) ... ",
+	 fn, origin >> 2, origin);
+  fprintf(outfile, "/origin %08lx\n", origin >> 2);
+  
+  do {
+    data1 = 0;
+    data2 = 0;
+    data3 = 0;
+    data4 = 0;
+    
+    res = fscanf(datafile, "%c%c%c%c", &data1, &data2, &data3, &data4);
+    if (res > 0) {
+      count += res;
+
+      fprintf(outfile, "%02x%02x%02x%02x", data4, data3, data2, data1);
+
+      if ((count % 16) == 0)
+	fprintf(outfile, "\n");
+      else
+	fprintf(outfile, " ");
+    }
+
+  } while (res > 0);
+  
+  if ((count % 16) != 0) fprintf(outfile, "\n");
+
+  printf("done with %lli Bytes.\n", count);
+
+  fclose(datafile);
+
+  return count;
+
+}
+
+
+void print_help() {
+  printf("Usage: bin2obj [FLAGS] [OPTIONS]\n");
+  printf("\nFLAGS: -h, -v\n");
+  printf("-h            Print this help\n");
+  printf("-v            Print Version ID\n");
+  printf("\nOPTIONS: -m, -o\n");
+  printf("-m <mapfile>  Defines mapfile to use for bin2obj\n");
+  printf("-o <outfile>  Defines output file to use for bin2obj\n");
+  printf("\nbin2obj converts the binary files defined in the mapfile to a hex based textfile\n");
+  printf("used by MCEMU\n");
+}
+
+
+int main(int argc, char **argv) {
+
+  FILE *mapfile = NULL, *outfile = NULL;
+  unsigned long origin;
+  char datafn[255];
+  char outfn[255] = "output.obj";
+  char mapfn[255] = "load.map";
+  int res = 0; 
+  unsigned long long count = 0;
+  long long thiscount = 0;
+  int retval = 0;
+
+  int c, doOptLoop = 1;
+
+  while (doOptLoop) {
+    c = getopt(argc, argv, "m:o:hv");
+
+    if (c == -1) {
+      doOptLoop = 0;
+      break;
+    }
+
+    switch (c) {
+    case 'h':
+      print_help();
+      return 0;
+      break;
+    case 'v':
+      printf("%s %s\n", argv[0], BIN2OBJIDSTRING);
+      return 0;
+      break;
+    case 'm':
+      printf("Mapfile: >%s<\n", optarg);
+      strncpy(mapfn, optarg, 255);
+      break;
+    case 'o':
+      printf("Outfile: >%s<\n", optarg);
+      strncpy(outfn, optarg, 255);
+      break;
+    default:
+      print_help();
+      return 0;
+    }
+  }
+	    
+
+  mapfile = fopen(mapfn, "r");
+  if (!mapfile) {
+    printf("Mapfile >%s< not found, exiting.\n", mapfn);
+    return -1;
+  }
+
+  outfile = fopen(outfn, "w");
+  if (!outfile) {
+    printf("Outputfile >%s< could not be created, exiting\n", outfn);
+    return -1;
+  }
+
+  //  res = fscanf(mapfile, "%lx %s\n", &origin, datafn);
+  
+  while ((res = fscanf(mapfile, "%lx %s\n", &origin, datafn)) == 2) {
+    //printf("ReadMapFile origin: 0x%08lx, filename: >%s<\n", origin, datafn);
+
+    thiscount = convertToHex(datafn, origin, outfile);
+    if (thiscount < 0) {
+	    retval = -1;
+    }
+    else count += thiscount;
+	    
+  }
+
+  fprintf(outfile, "/eof\n");
+  fclose(mapfile);
+  fclose (outfile);
+
+  printf("Total conversion: %lli Bytes\n", count);
+
+  return retval;
+}

From f25beaa8b0884d390204b1ac4284266638595462 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 19 Apr 2011 08:36:00 -0700
Subject: [PATCH 15/36] define that we also load metalsvm.obj to pid 1

---
 tools/metalsvm.mt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/metalsvm.mt b/tools/metalsvm.mt
index 9d40eaab..e08b520d 100644
--- a/tools/metalsvm.mt
+++ b/tools/metalsvm.mt
@@ -1,2 +1,3 @@
 # pid mch-route mch-dest-id mch-offset-base testcase
 0x00 0x00 6 0x00 metalsvm.obj
+0x01 0x00 6 0x01 metalsvm.obj

From 145c2e91be9d6ba94349c1aa6975f2b4b01ee4f0 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 19 Apr 2011 08:36:58 -0700
Subject: [PATCH 16/36] enables runtime debug messages for RCCE library calls

---
 arch/x86/scc/RCCE_admin.c | 2 +-
 arch/x86/scc/scc_init.c   | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/scc/RCCE_admin.c b/arch/x86/scc/RCCE_admin.c
index 8b4cbccc..34f0e677 100644
--- a/arch/x86/scc/RCCE_admin.c
+++ b/arch/x86/scc/RCCE_admin.c
@@ -45,7 +45,7 @@
 //  #include <fcntl.h>
 
 // En-/ or disable debug prints...
-#define DEBUG 1
+#define DEBUG 0
 
 //......................................................................................
 // GLOBAL VARIABLES USED BY THE LIBRARY
diff --git a/arch/x86/scc/scc_init.c b/arch/x86/scc/scc_init.c
index 33645aaa..d9f2628a 100644
--- a/arch/x86/scc/scc_init.c
+++ b/arch/x86/scc/scc_init.c
@@ -99,14 +99,16 @@ int scc_init(void)
 		mb();
 		end = rdtsc();
 		ticks = end > start ? end - start : start - end;
-	} while(ticks*TIMER_FREQ < 1000ULL*freq*1000000ULL);
-	kprintf("ticks %llu\n", ticks);
+	} while(ticks*TIMER_FREQ < 300ULL*freq*1000000ULL);
 
 	if (RCCE_init(&bootinfo->argc, &bootinfo->argv) != RCCE_SUCCESS)
 		return -ENODEV;
 	if (iRCCE_init() != iRCCE_SUCCESS)
 		return -ENODEV;
 
+	// enable additional outputs	
+	RCCE_debug_set(RCCE_DEBUG_ALL);
+
 	my_rank   = RCCE_ue();
 	num_ranks = RCCE_num_ues();
 	kprintf("Got rank %d of %d ranks\n", my_rank, num_ranks);

From 23313590bef320908ecd0a09b78433bc717e99d0 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 19 Apr 2011 20:01:18 +0200
Subject: [PATCH 17/36] use logical operations instead of / and % to increase
 the performance

---
 arch/x86/mm/page.c                | 16 ++++++++--------
 include/metalsvm/config.h.example |  1 +
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index cd3f210b..4e5c4f7c 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -640,8 +640,8 @@ int arch_paging_init(void)
 	 * Set the page table and page directory entries for the kernel. We map the kernel's physical address 
 	 * to the same virtual address.
 	 */
-	npages = ((size_t) &kernel_end - (size_t) &kernel_start) / PAGE_SIZE;
-	if ((size_t)&kernel_end % PAGE_SIZE)
+	npages = ((size_t) &kernel_end - (size_t) &kernel_start) >> PAGE_SHIFT;
+	if ((size_t)&kernel_end & (PAGE_SIZE-1))
 		npages++;
 	map_region((size_t)&kernel_start, (size_t)&kernel_start, npages, MAP_KERNEL_SPACE);
 
@@ -686,8 +686,8 @@ int arch_paging_init(void)
 
 		for(i=0; i<mb_info->mods_count; i++, mmodule++) {
 			// map physical address to the same virtual address
-			npages = (mmodule->mod_end - mmodule->mod_start) / PAGE_SIZE;
-			if (mmodule->mod_end % PAGE_SIZE)
+			npages = (mmodule->mod_end - mmodule->mod_start) >> PAGE_SHIFT;
+			if (mmodule->mod_end & (PAGE_SIZE-1))
 				npages++;
 			map_region((size_t) (mmodule->mod_start), (size_t) (mmodule->mod_start), npages, MAP_KERNEL_SPACE);
 		}
@@ -699,17 +699,17 @@ int arch_paging_init(void)
 	map_region(SCC_BOOTINFO, SCC_BOOTINFO, 1, MAP_KERNEL_SPACE);
 
 	// map the initial ramdisk
-	npages = bootinfo->size / PAGE_SIZE;
-	if (bootinfo->size % PAGE_SIZE)
+	npages = bootinfo->size >> PAGE_SHIFT;
+	if (bootinfo->size & (PAGE_SIZE-1))
 		npages++;
 	map_region(bootinfo->addr, bootinfo->addr, npages, MAP_KERNEL_SPACE);
 
 	// map SCC's configuration registers
-	viraddr = map_region(CRB_X0_Y0, CRB_X0_Y0, (CRB_OWN-CRB_X0_Y0+16*1024*1024)/PAGE_SIZE, MAP_KERNEL_SPACE|MAP_NO_CACHE);
+	viraddr = map_region(CRB_X0_Y0, CRB_X0_Y0, (CRB_OWN-CRB_X0_Y0+16*1024*1024) >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE);
 	kprintf("Map configuration registers at 0x%x\n", viraddr);
 
 	// map SCC's message passing buffers
-	viraddr = map_region(MPB_X0_Y0, MPB_X0_Y0, (MPB_OWN-MPB_X0_Y0+16*1024*1024)/PAGE_SIZE, MAP_KERNEL_SPACE|MAP_MPE);
+	viraddr = map_region(MPB_X0_Y0, MPB_X0_Y0, (MPB_OWN-MPB_X0_Y0+16*1024*1024) >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_MPE);
 	kprintf("Map message passing buffers at 0x%x\n", viraddr);
 #endif
 
diff --git a/include/metalsvm/config.h.example b/include/metalsvm/config.h.example
index b5c0536e..bb405699 100644
--- a/include/metalsvm/config.h.example
+++ b/include/metalsvm/config.h.example
@@ -32,6 +32,7 @@ extern "C" {
 #define KERNEL_STACK_SIZE	8192
 #define KMSG_SIZE		(128*1024)
 #define PAGE_SIZE		4096
+#define PAGE_SHIFT		12
 #define CACHE_LINE		64
 #define MAILBOX_SIZE		8
 #define TIMER_FREQ		100	/* in HZ */

From 65d66171d7ecd9076843329c31e5e597bbec4b24 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 19 Apr 2011 20:18:38 +0200
Subject: [PATCH 18/36] add jacobi solver as example program

---
 newlib/examples/Makefile |   9 +-
 newlib/examples/jacobi.c | 200 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 208 insertions(+), 1 deletion(-)
 create mode 100644 newlib/examples/jacobi.c

diff --git a/newlib/examples/Makefile b/newlib/examples/Makefile
index 38a21cc6..e1a373f9 100644
--- a/newlib/examples/Makefile
+++ b/newlib/examples/Makefile
@@ -3,6 +3,7 @@ NEWLIB = ../x86/i586-metalsvm-elf32
 MAKE = make
 STRIP_DEBUG = --strip-debug
 KEEP_DEBUG = --only-keep-debug
+LDFLAGS =
 
 # other implicit rules
 %.o : %.c
@@ -10,7 +11,13 @@ KEEP_DEBUG = --only-keep-debug
 
 default: all
 	
-all: hello tests
+all: hello tests jacobi
+
+jacobi: jacobi.o
+	$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS)  $< -lm
+	$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
+	$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
+	chmod a-x $@.sym
 	
 tests: tests.o
 	$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $<
diff --git a/newlib/examples/jacobi.c b/newlib/examples/jacobi.c
new file mode 100644
index 00000000..a899d87e
--- /dev/null
+++ b/newlib/examples/jacobi.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright 2011 Stefan Lankes, Alexander Pilz, Maximilian Marx, Michael Ober,
+ * 		  Chair for Operating Systems, RWTH Aachen University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <time.h>
+#include <unistd.h>
+#include <errno.h>
+#undef errno
+extern int errno;
+
+#define MATRIX_SIZE 	256
+#define MAXVALUE 	1337
+#define PAGE_SIZE 	4096
+#define CACHE_SIZE      (256*1024)
+#define ALIGN(x,a)	(((x)+(a)-1)&~((a)-1))
+
+static int generate_empty_matrix(double*** A , unsigned int N) {
+	unsigned int iCnt;
+	int i,j;
+
+	*A = (double**) malloc((N+1)*sizeof(double*));
+
+	if (*A == NULL) 
+		return -2;	/* Error */
+
+	(*A)[0] = (double*) malloc((N+1)*N*sizeof(double));
+
+	if (**A == NULL)
+		return -2;	/* Error */
+
+	for(iCnt=1; iCnt<N; iCnt++) { /* Assign pointers in the first "real index"; Value from 1 to N (0 yet set, value N means N+1) */
+		(*A)[iCnt] = &((*A)[0][iCnt*(N+1)]);
+	}
+
+	memset(**A, 0, (N+1)*N*sizeof(double));      /* Fill matrix values with 0 */
+
+	srand( 42 /*(unsigned) time(NULL)*/ ) ; /* init random number generator */
+
+	/* 
+	 * initialize the system of linear equations
+	 * the result vector is one
+	 */
+	for (i = 0; i < N; i++) 
+	{
+		double sum = 0.0;
+
+		for (j = 0; j < N; j++) 
+		{
+			if (i != j) 
+			{
+				double c = ((double)rand()) / ((double)RAND_MAX) * MAXVALUE;
+
+				sum += fabs(c);
+				(*A)[i][j] = c;
+				(*A)[i][N] += c;
+			}
+		}
+
+		/*
+		 * The Jacobi method will always converge if the matrix A is strictly or irreducibly diagonally dominant. 
+		 * Strict row diagonal dominance means that for each row, the absolute value of the diagonal term is 
+		 * greater than the sum of absolute values of other terms: |A[i][i]| > Sum |A[i][j]| with (i != j)
+		 */
+
+		(*A)[i][i] = sum + 2.0;
+		(*A)[i][N] += sum + 2.0;
+	}
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	double*       temp;
+	unsigned int  i, j, iter_start, iter_end;
+	unsigned int  iterations = 0;
+	double        error, norm, norm_res, max = 0.0;
+	double** A=0;
+	double* X;
+	double* X_old, xi;
+	double start,stop;
+
+	if (generate_empty_matrix(&A,MATRIX_SIZE) < 0)
+	{
+		printf("generate_empty_matrix() failed...\n");
+		fflush(stdout);
+		exit(-1);
+
+	}
+
+	printf("generate_empty_matrix() done...\n");
+	fflush(stdout);
+
+	X=(double*) malloc(MATRIX_SIZE*sizeof(double));
+	X_old=(double*) malloc(MATRIX_SIZE*sizeof(double));
+	if(X == NULL || X_old == NULL)
+	{
+		printf("X or X_old is NULL...\n");
+		exit(-1);
+	}
+
+	for(i=0; i<MATRIX_SIZE; i++) 
+	{
+		X[i] = ((double)rand()) / ((double)RAND_MAX) * 10.0;
+		X_old[i] = 0.0;
+	}
+
+	printf("start calculation...\n");
+	fflush(stdout);
+
+	iter_start = 0;
+	iter_end = MATRIX_SIZE;
+
+	//start = RCCE_wtime();
+
+	while(1) 
+	{
+		iterations++;
+	
+		temp = X_old;
+		X_old = X;
+		X = temp;
+
+		for (i=iter_start; i<iter_end; i++) 
+		{	
+			for(j=0, xi=0.0; j<i; j++)
+				xi += A[i][j]* X_old[j];
+
+			for(j=i+1; j<MATRIX_SIZE; j++)
+				xi += A[i][j] * X_old[j];
+			X[i] = (A[i][MATRIX_SIZE] - xi) / A[i][i];
+		}
+
+		if (iterations % 5000 == 0 ) {/* calculate the Euclidean norm between X_old and X*/
+			norm_res = norm = 0.0;
+			for (i=iter_start; i<iter_end; i++)
+				norm += (X_old[i] - X[i]) * (X_old[i] - X[i]);
+
+			/* check the break condition */
+			norm_res /= (double) MATRIX_SIZE;
+			
+			if (norm_res < 0.0000001)
+				break;
+		}
+	}
+
+	//stop = RCCE_wtime();
+	
+	if (MATRIX_SIZE < 16) {
+		printf("Print the solution...\n");
+		/* print solution */
+		for(i=0; i<MATRIX_SIZE; i++) {
+			for(j=0; j<MATRIX_SIZE; j++) 
+				printf("%8.2f\t", A[i][j]);
+			printf("*\t%8.2f\t=\t%8.2f\n", X[i], A[i][MATRIX_SIZE]);
+		}
+	}
+	printf("Check the result...\n");
+
+	/* 
+	 * check the result 
+	 * X[i] have to be 1
+	 */
+	for(i=0; i<MATRIX_SIZE; i++) {
+		error = fabs(X[i] - 1.0f);
+
+		if (max < error)
+			max = error;
+			if (error > 0.01f)
+				printf("Result is on position %d wrong (%f != 1.0)\n", i, X[i]);
+	}
+	printf("maximal error is %f\n", max);
+
+	printf("\nmatrix size: %d x %d\n", MATRIX_SIZE, MATRIX_SIZE);
+	printf("number of iterations: %d\n", iterations);
+	//printf("calculation time: %f s\n", stop-start);
+
+	free((void*) X_old);
+	free((void*) X);
+
+	return 0;
+}

From a9c4b5ddc0a4e3fc363471295395984715bfb841 Mon Sep 17 00:00:00 2001
From: Jacek Galowicz <galowicz@lfbs.rwth-aachen.de>
Date: Tue, 19 Apr 2011 18:51:59 +0200
Subject: [PATCH 19/36] Encapsulated IDT-/GDT-descriptor configuring code into
 helper functions.

---
 arch/x86/include/asm/gdt.h | 10 ++++++++++
 arch/x86/include/asm/idt.h | 10 ++++++++++
 arch/x86/kernel/gdt.c      | 22 +++++++++++++++-------
 arch/x86/kernel/idt.c      | 19 ++++++++++++++-----
 4 files changed, 49 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/gdt.h b/arch/x86/include/asm/gdt.h
index cdb65d46..fb4bd4c1 100644
--- a/arch/x86/include/asm/gdt.h
+++ b/arch/x86/include/asm/gdt.h
@@ -115,6 +115,16 @@ typedef struct {
  */
 void gdt_install(void);
 
+/** @brief Configures and returns a GDT descriptor with chosen attributes
+ *
+ * Just feed this function with address, limit and the flags 
+ * you have seen in idt.h
+ *
+ * @return a preconfigured gdt descriptor
+ */
+gdt_entry_t configure_gdt_entry(unsigned long base, unsigned long limit,
+		unsigned char access, unsigned char gran);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/arch/x86/include/asm/idt.h b/arch/x86/include/asm/idt.h
index a43d4383..28fde65a 100644
--- a/arch/x86/include/asm/idt.h
+++ b/arch/x86/include/asm/idt.h
@@ -116,6 +116,16 @@ void idt_install(void);
 void idt_set_gate(unsigned char num, unsigned long base, unsigned short sel,
 		  unsigned char flags);
 
+/** @brief Configures and returns a IDT entry with chosen attributes
+ *
+ * Just feed this function with base, selector and the flags
+ * you have seen in idt.h
+ *
+ * @return a preconfigured idt descriptor
+ */
+idt_entry_t configure_idt_entry(unsigned long base, unsigned short sel,
+		  unsigned char flags);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/arch/x86/kernel/gdt.c b/arch/x86/kernel/gdt.c
index 33b136ee..5be409c6 100644
--- a/arch/x86/kernel/gdt.c
+++ b/arch/x86/kernel/gdt.c
@@ -157,19 +157,27 @@ int create_default_frame(task_t* task, entry_point_t ep, void* arg)
 static void gdt_set_gate(int num, unsigned long base, unsigned long limit,
 			  unsigned char access, unsigned char gran)
 {
+	gdt[num] = configure_gdt_entry(base, limit, access, gran);
+}
 
+gdt_entry_t configure_gdt_entry(unsigned long base, unsigned long limit,
+		unsigned char access, unsigned char gran)
+{
+	gdt_entry_t desc;
 	/* Setup the descriptor base address */
-	gdt[num].base_low = (base & 0xFFFF);
-	gdt[num].base_middle = (base >> 16) & 0xFF;
-	gdt[num].base_high = (base >> 24) & 0xFF;
+	desc.base_low = (base & 0xFFFF);
+	desc.base_middle = (base >> 16) & 0xFF;
+	desc.base_high = (base >> 24) & 0xFF;
 
 	/* Setup the descriptor limits */
-	gdt[num].limit_low = (limit & 0xFFFF);
-	gdt[num].granularity = ((limit >> 16) & 0x0F);
+	desc.limit_low = (limit & 0xFFFF);
+	desc.granularity = ((limit >> 16) & 0x0F);
 
 	/* Finally, set up the granularity and access flags */
-	gdt[num].granularity |= (gran & 0xF0);
-	gdt[num].access = access;
+	desc.granularity |= (gran & 0xF0);
+	desc.access = access;
+
+	return desc;
 }
 
 /* 
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index fd8156c4..fe3b2bf4 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -48,16 +48,25 @@ static idt_ptr_t idtp;
 void idt_set_gate(unsigned char num, unsigned long base, unsigned short sel,
 		  unsigned char flags)
 {
+	idt[num] = configure_idt_entry(base, sel, flags);
+}
+
+idt_entry_t configure_idt_entry(unsigned long base, unsigned short sel,
+		  unsigned char flags)
+{
+	idt_entry_t desc;
 
 	/* The interrupt routine's base address */
-	idt[num].base_lo = (base & 0xFFFF);
-	idt[num].base_hi = (base >> 16) & 0xFFFF;
+	desc.base_lo = (base & 0xFFFF);
+	desc.base_hi = (base >> 16) & 0xFFFF;
 
 	/* The segment or 'selector' that this IDT entry will use
 	 *  is set here, along with any access flags */
-	idt[num].sel = sel;
-	idt[num].always0 = 0;
-	idt[num].flags = flags;
+	desc.sel = sel;
+	desc.always0 = 0;
+	desc.flags = flags;
+
+	return desc;
 }
 
 extern void isrsyscall(void);

From d8ad0b8dce02f3713ede7addaef14c39c0397ad5 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Wed, 20 Apr 2011 11:34:52 +0200
Subject: [PATCH 20/36] add space to store the FPU context

---
 arch/x86/include/asm/tasks_types.h | 67 ++++++++++++++++++++++++++++++
 include/metalsvm/tasks_types.h     |  7 +++-
 2 files changed, 72 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/include/asm/tasks_types.h

diff --git a/arch/x86/include/asm/tasks_types.h b/arch/x86/include/asm/tasks_types.h
new file mode 100644
index 00000000..f0095bf3
--- /dev/null
+++ b/arch/x86/include/asm/tasks_types.h
@@ -0,0 +1,67 @@
+/* 
+ * Copyright 2011 Stefan Lankes, Chair for Operating Systems,
+ *                               RWTH Aachen University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This file is part of MetalSVM.
+ *
+ */
+
+#ifndef __ARCH_TASKS_TYPES__
+#define __ARCH_TASKS_TYPES__
+
+#include <metalsvm/stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+	long	cwd;
+	long	swd;
+	long	twd;
+	long	fip;
+	long	fcs;
+	long	foo;
+	long	fos;
+	long	st_space[20];
+	long	status;
+} i387_fsave_t;
+
+typedef struct i387_fxsave_struct {
+	unsigned short	cwd;
+	unsigned short	swd;
+	unsigned short	twd;
+	unsigned short	fop;
+	long	fip;
+	long	fcs;
+	long	foo;
+	long	fos;
+	long	mxcsr;
+	long	reserved;
+	long	st_space[32];
+	long	xmm_space[32];
+	long	padding[56];
+} i387_fxsave_t __attribute__ ((aligned (16)));
+
+union fpu_union {
+	i387_fsave_t	fsave;
+	i387_fxsave_t	fxsave;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h
index 73ca13d6..29fa5337 100644
--- a/include/metalsvm/tasks_types.h
+++ b/include/metalsvm/tasks_types.h
@@ -33,6 +33,7 @@
 #include <metalsvm/vma.h>
 #include <metalsvm/spinlock_types.h>
 #include <metalsvm/mailbox_types.h>
+#include <asm/tasks_types.h>
 #include <asm/atomic.h>
 
 #ifdef __cplusplus
@@ -57,9 +58,9 @@ typedef struct task {
 	uint32_t                status;
 	/// Usage in number of pages
 	atomic_int32_t   	user_usage;
-  /// Avoids concurrent access to the page directory
+	/// Avoids concurrent access to the page directory
 	spinlock_t		pgd_lock;	
-  /// pointer to the page directory
+	/// pointer to the page directory
 	struct page_dir*	pgd;            
 	/// Lock for the VMA_list
 	spinlock_t		vma_lock;
@@ -69,6 +70,8 @@ typedef struct task {
 	mailbox_wait_msg_t	inbox;	
 	/// Mail outbox array
 	mailbox_wait_msg_t*	outbox[MAX_TASKS];
+	/// FPU state
+	union fpu_union		fpu_state;
 } __attribute__((packed))  task_t;
 
 #ifdef __cplusplus

From 7cb05d8f4a8fdba02d8066460fe6b0f3ee383ac3 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Wed, 20 Apr 2011 15:16:22 +0200
Subject: [PATCH 21/36] first version to support the FPU

- seems to work
- currently, we didn't support SIMD instructions
---
 arch/x86/include/asm/tasks_types.h |  6 +++++-
 arch/x86/kernel/entry.asm          | 13 ++++++++-----
 arch/x86/kernel/isrs.c             | 20 +++++++++++++++++++-
 include/metalsvm/tasks_types.h     |  4 +++-
 kernel/tasks.c                     |  9 +++++++--
 5 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/tasks_types.h b/arch/x86/include/asm/tasks_types.h
index f0095bf3..138fbd1c 100644
--- a/arch/x86/include/asm/tasks_types.h
+++ b/arch/x86/include/asm/tasks_types.h
@@ -55,11 +55,15 @@ typedef struct i387_fxsave_struct {
 	long	padding[56];
 } i387_fxsave_t __attribute__ ((aligned (16)));
 
-union fpu_union {
+union fpu_state {
 	i387_fsave_t	fsave;
 	i387_fxsave_t	fxsave;
 };
 
+static inline void save_fpu_state(union fpu_state* state) {
+	asm volatile ("fsave %0; fwait" : "=m"((*state).fsave));
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/arch/x86/kernel/entry.asm b/arch/x86/kernel/entry.asm
index 1a325dba..ca3afaa1 100644
--- a/arch/x86/kernel/entry.asm
+++ b/arch/x86/kernel/entry.asm
@@ -64,16 +64,19 @@ ALIGN 4
 stublet:
 ; initialize stack pointer.
     mov esp, default_stack_pointer
-; enable cache and turn on FPU exceptions
     mov eax, cr0
-    ; enable cache
-    and eax, 0x9fffffff
-    ; ...and turn on FPU exceptions
-    or eax, 0x20
+; enable cache, disable paging and fpu emulation
+    and eax, 0x3ffffffb
+; ...monitor coprocessor and turn on FPU exceptions
+    or eax, 0x22
     mov cr0, eax
 ; clears the current pgd entry
     xor eax, eax
     mov cr3, eax
+; disable SSE support (TODO)
+    mov eax, cr4
+    and eax, 0xfffbf9ff
+    mov cr4, eax
 ; interpret multiboot information
     extern multiboot_init
     push ebx
diff --git a/arch/x86/kernel/isrs.c b/arch/x86/kernel/isrs.c
index 63075d00..6df9526e 100644
--- a/arch/x86/kernel/isrs.c
+++ b/arch/x86/kernel/isrs.c
@@ -74,6 +74,7 @@ extern void isr30(void);
 extern void isr31(void);
 
 static void fault_handler(struct state *s);
+static void fpu_handler(struct state *s);
 
 /* 
  * This is a very repetitive function... it's not hard, it's
@@ -158,6 +159,23 @@ void isrs_install(void)
 	// install the default handler
 	for(i=0; i<32; i++)
 		irq_install_handler(i, fault_handler);
+
+	// set hanlder for fpu exceptions
+	irq_uninstall_handler(7);
+	irq_install_handler(7, fpu_handler);
+}
+
+static void fpu_handler(struct state *s)
+{
+	task_t* task = per_core(current_task);
+
+	kputs("got FPU exception\n");
+	asm volatile ("clts"); // clear the TS flag of cr0
+	if (!task->fpu_used)  {
+		task->fpu_used = 1;
+		asm volatile ("finit");
+	} else 
+		asm volatile ("frstor %0" :: "m"(task->fpu.fsave)); // restore fpu state
 }
 
 /** @brief Exception messages
@@ -189,7 +207,7 @@ static void fault_handler(struct state *s)
 {
 	if (s->int_no < 32) {
 		kputs(exception_messages[s->int_no]);
-		kputs(" Exception.\n");
+		kprintf(" Exception. (%d)\n", s->int_no);
 		
 		/* Now, we signalize that we have handled the interrupt */
 		if (apic_is_enabled())
diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h
index 29fa5337..c46ec4b4 100644
--- a/include/metalsvm/tasks_types.h
+++ b/include/metalsvm/tasks_types.h
@@ -66,12 +66,14 @@ typedef struct task {
 	spinlock_t		vma_lock;
 	/// List of VMAs
 	vma_t*			vma_list;
+	/// Is set, when the FPU is used
+	uint32_t		fpu_used;
 	/// Mail inbox
 	mailbox_wait_msg_t	inbox;	
 	/// Mail outbox array
 	mailbox_wait_msg_t*	outbox[MAX_TASKS];
 	/// FPU state
-	union fpu_union		fpu_state;
+	union fpu_state		fpu;
 } __attribute__((packed))  task_t;
 
 #ifdef __cplusplus
diff --git a/kernel/tasks.c b/kernel/tasks.c
index 6be430ce..3ffb49b9 100644
--- a/kernel/tasks.c
+++ b/kernel/tasks.c
@@ -48,7 +48,7 @@ DEFINE_PER_CORE(task_t*, current_task, NULL);
  * A task's id will be its position in this array.
  */
 static task_t task_table[MAX_TASKS] = {[0 ... MAX_TASKS-1] = {0, TASK_INVALID, ATOMIC_INIT(0), \
-			 SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL}};
+			 SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0}};
 static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT;
 
 /** @brief helper function for the assembly code to determine the current task
@@ -67,6 +67,7 @@ int multitasking_init(void) {
 		memset(task_table[0].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
 		per_core(current_task) = task_table+0;
 		per_core(current_task)->pgd = get_boot_pgd();
+		task_table[0].fpu_used = 0;
 		return 0;
 	}
 
@@ -189,6 +190,7 @@ static int create_task(tid_t* id, entry_point_t ep, void* arg)
 
 			ret = create_default_frame(task_table+i, ep, arg);
 
+			task_table[i].fpu_used = 0;
 			task_table[i].status = TASK_READY;
 			break;
 		}
@@ -250,6 +252,7 @@ int sys_fork(void)
 			mailbox_wait_msg_init(&task_table[i].inbox);
 			memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
 			task_table[i].outbox[per_core(current_task)->id] = &per_core(current_task)->inbox; 
+			task_table[i].fpu_used = 0x00;
 
 			ret = arch_fork(task_table+i);
 
@@ -707,7 +710,9 @@ void scheduler(void)
 			if (per_core(current_task)->status == TASK_RUNNING)
 				per_core(current_task)->status = TASK_READY;
 			task_table[new_id].status = TASK_RUNNING;
-	
+
+			if (per_core(current_task)->fpu_used)
+				save_fpu_state(&(per_core(current_task)->fpu));	
 			per_core(current_task) = task_table+new_id;
 			goto get_task_out;
 		}

From a3176aac0b9d8df31a89dd3a8bbf94e985a0995e Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Wed, 20 Apr 2011 20:41:51 +0200
Subject: [PATCH 22/36] remove bug in the calulation of the break condition

---
 newlib/examples/jacobi.c | 44 ++++++++++++++++++----------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/newlib/examples/jacobi.c b/newlib/examples/jacobi.c
index a899d87e..6c0e7a20 100644
--- a/newlib/examples/jacobi.c
+++ b/newlib/examples/jacobi.c
@@ -1,6 +1,6 @@
 /*
- * Copyright 2011 Stefan Lankes, Alexander Pilz, Maximilian Marx, Michael Ober,
- * 		  Chair for Operating Systems, RWTH Aachen University
+ * Copyright 2010-2011 Stefan Lankes
+ *                     Chair for Operating Systems, RWTH Aachen University
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,10 +26,10 @@
 #undef errno
 extern int errno;
 
-#define MATRIX_SIZE 	256
-#define MAXVALUE 	1337
-#define PAGE_SIZE 	4096
-#define CACHE_SIZE      (256*1024)
+#define MATRIX_SIZE 	128
+#define MAXVALUE	1337
+#define PAGE_SIZE	4096
+#define CACHE_SIZE	(256*1024)
 #define ALIGN(x,a)	(((x)+(a)-1)&~((a)-1))
 
 static int generate_empty_matrix(double*** A , unsigned int N) {
@@ -89,28 +89,26 @@ static int generate_empty_matrix(double*** A , unsigned int N) {
 
 int main(int argc, char **argv)
 {
-	double*       temp;
-	unsigned int  i, j, iter_start, iter_end;
-	unsigned int  iterations = 0;
-	double        error, norm, norm_res, max = 0.0;
-	double** A=0;
-	double* X;
-	double* X_old, xi;
-	double start,stop;
+	double*		temp;
+	unsigned int	i, j, iter_start, iter_end;
+	unsigned int	iterations = 0;
+	double		error, norm, max = 0.0;
+	double**	A=0;
+	double*		X;
+	double*		X_old, xi;
+	double		start,stop;
 
 	if (generate_empty_matrix(&A,MATRIX_SIZE) < 0)
 	{
 		printf("generate_empty_matrix() failed...\n");
-		fflush(stdout);
 		exit(-1);
 
 	}
 
 	printf("generate_empty_matrix() done...\n");
-	fflush(stdout);
 
-	X=(double*) malloc(MATRIX_SIZE*sizeof(double));
-	X_old=(double*) malloc(MATRIX_SIZE*sizeof(double));
+	X = (double*) malloc(MATRIX_SIZE*sizeof(double));
+	X_old = (double*) malloc(MATRIX_SIZE*sizeof(double));
 	if(X == NULL || X_old == NULL)
 	{
 		printf("X or X_old is NULL...\n");
@@ -124,7 +122,6 @@ int main(int argc, char **argv)
 	}
 
 	printf("start calculation...\n");
-	fflush(stdout);
 
 	iter_start = 0;
 	iter_end = MATRIX_SIZE;
@@ -142,7 +139,7 @@ int main(int argc, char **argv)
 		for (i=iter_start; i<iter_end; i++) 
 		{	
 			for(j=0, xi=0.0; j<i; j++)
-				xi += A[i][j]* X_old[j];
+				xi += A[i][j] * X_old[j];
 
 			for(j=i+1; j<MATRIX_SIZE; j++)
 				xi += A[i][j] * X_old[j];
@@ -150,14 +147,13 @@ int main(int argc, char **argv)
 		}
 
 		if (iterations % 5000 == 0 ) {/* calculate the Euclidean norm between X_old and X*/
-			norm_res = norm = 0.0;
+			norm = 0.0;
 			for (i=iter_start; i<iter_end; i++)
 				norm += (X_old[i] - X[i]) * (X_old[i] - X[i]);
 
 			/* check the break condition */
-			norm_res /= (double) MATRIX_SIZE;
-			
-			if (norm_res < 0.0000001)
+			norm /= (double) MATRIX_SIZE;		
+			if (norm < 0.0000001)
 				break;
 		}
 	}

From 0c411fd702c15c005c25d05720c8b3919453ba06 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Wed, 20 Apr 2011 21:23:22 +0200
Subject: [PATCH 23/36] add basic support of sbrk and fix bug in saving of the
 fpu context

---
 arch/x86/kernel/isrs.c              | 11 ++++++---
 arch/x86/mm/page.c                  | 23 ++++++++++++++++-
 include/metalsvm/tasks_types.h      | 12 +++++++--
 kernel/syscall.c                    | 33 +++++++++++++++++++++++++
 kernel/tasks.c                      | 38 ++++++++++++++++++++---------
 kernel/tests.c                      |  1 +
 newlib/src/libgloss/metalsvm/sbrk.c | 29 ++++++++++------------
 7 files changed, 113 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kernel/isrs.c b/arch/x86/kernel/isrs.c
index 6df9526e..cfb45389 100644
--- a/arch/x86/kernel/isrs.c
+++ b/arch/x86/kernel/isrs.c
@@ -169,13 +169,16 @@ static void fpu_handler(struct state *s)
 {
 	task_t* task = per_core(current_task);
 
-	kputs("got FPU exception\n");
 	asm volatile ("clts"); // clear the TS flag of cr0
-	if (!task->fpu_used)  {
-		task->fpu_used = 1;
+	if (!(task->flags & TASK_FPU_INIT))  {
+		// use the FPU at the first time => Initialize FPU
 		asm volatile ("finit");
-	} else 
+		task->flags = task->flags|TASK_FPU_INIT|TASK_FPU_USED;
+	} else {
+		// restore the FPU context 
 		asm volatile ("frstor %0" :: "m"(task->fpu.fsave)); // restore fpu state
+		task->flags |= TASK_FPU_USED;
+	}
 }
 
 /** @brief Exception messages
diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index 4e5c4f7c..75f34015 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -588,7 +588,28 @@ int print_paging_tree(size_t viraddr)
 
 static void pagefault_handler(struct state *s)
 {
-	kprintf("PAGE FAULT: Task %u got page fault at %p (irq %d)\n", per_core(current_task)->id, read_cr2(), s->int_no);
+	task_t* task = per_core(current_task);
+	size_t viraddr = read_cr2();
+	size_t phyaddr;
+
+	if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) {
+		viraddr = viraddr & 0xFFFFF000;
+
+		phyaddr = get_page();
+		if (BUILTIN_EXPECT(!phyaddr, 0))
+			goto default_handler;
+
+		if (map_region(viraddr, phyaddr, 1, MAP_USER_SPACE|MAP_HEAP) == viraddr) {
+			memset((void*) viraddr, 0x00, PAGE_SIZE);
+			return;
+		}
+		
+		kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr);
+		put_page(phyaddr);
+	}
+
+default_handler:
+	kprintf("PAGE FAULT: Task %u got page fault at %p (irq %d)\n", task->id, viraddr, s->int_no);
 	kprintf("Register state: eax = 0x%x, ebx = 0x%x, ecx = 0x%x, edx = 0x%x, edi = 0x%x, esi = 0x%x, ebp = 0x%x, esp = 0x%x\n", 
 		s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp);
 
diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h
index c46ec4b4..58f65cf2 100644
--- a/include/metalsvm/tasks_types.h
+++ b/include/metalsvm/tasks_types.h
@@ -47,6 +47,10 @@ extern "C" {
 #define TASK_FINISHED	4
 #define TASK_IDLE	5
 
+#define TASK_DEFAULT_FLAGS	0
+#define TASK_FPU_INIT		(1 << 0)
+#define TASK_FPU_USED		(1 << 1)
+
 typedef int (STDCALL *entry_point_t)(void*);
 struct page_dir;
 
@@ -66,8 +70,12 @@ typedef struct task {
 	spinlock_t		vma_lock;
 	/// List of VMAs
 	vma_t*			vma_list;
-	/// Is set, when the FPU is used
-	uint32_t		fpu_used;
+	/// Additional status flags. For instance, to signalize the using of the FPU
+	uint32_t		flags;
+	/// Start address of the heap
+	uint32_t		start_heap;
+	/// End address of the heap
+	uint32_t		end_heap;
 	/// Mail inbox
 	mailbox_wait_msg_t	inbox;	
 	/// Mail outbox array
diff --git a/kernel/syscall.c b/kernel/syscall.c
index 76ce8e10..95a39fc6 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -22,6 +22,7 @@
 #include <metalsvm/syscall.h>
 #include <metalsvm/tasks.h>
 #include <metalsvm/errno.h>
+#include <metalsvm/spinlock.h>
 
 static int sys_write(int fildes, const char *buf, size_t len)
 {
@@ -37,6 +38,32 @@ static int sys_write(int fildes, const char *buf, size_t len)
 	return len;
 }
 
+static int sys_sbrk(int incr)
+{
+	task_t* task = per_core(current_task);
+	vma_t* tmp = NULL;
+	int ret;
+
+	spinlock_lock(&task->vma_lock);
+
+	tmp = task->vma_list;
+        while(tmp && !((task->end_heap >= tmp->start) && (task->end_heap <= tmp->end)))
+		tmp = tmp->next;
+
+	ret = (int) task->end_heap;
+	task->end_heap += incr;
+	if (task->end_heap < task->start_heap)
+		task->end_heap = task->start_heap;
+	
+	// resize virtual memory area
+	if (tmp && (tmp->end <= task->end_heap))
+		tmp->end = task->end_heap;
+
+	spinlock_unlock(&task->vma_lock);
+
+	return ret;
+}
+
 int syscall_handler(uint32_t sys_nr, ...)
 {
 	int ret = -EINVAL;
@@ -64,6 +91,12 @@ int syscall_handler(uint32_t sys_nr, ...)
 	case __NR_close:
 		ret = 0;
 		break;
+	case __NR_sbrk: {
+			int incr = va_arg(vl, int);
+
+			ret = sys_sbrk(incr);
+			break;
+		}
 	case __NR_getpid:
 		ret = per_core(current_task)->id;
 		break;
diff --git a/kernel/tasks.c b/kernel/tasks.c
index 3ffb49b9..ae699687 100644
--- a/kernel/tasks.c
+++ b/kernel/tasks.c
@@ -48,7 +48,7 @@ DEFINE_PER_CORE(task_t*, current_task, NULL);
  * A task's id will be its position in this array.
  */
 static task_t task_table[MAX_TASKS] = {[0 ... MAX_TASKS-1] = {0, TASK_INVALID, ATOMIC_INIT(0), \
-			 SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0}};
+			 SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0}};
 static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT;
 
 /** @brief helper function for the assembly code to determine the current task
@@ -67,7 +67,7 @@ int multitasking_init(void) {
 		memset(task_table[0].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
 		per_core(current_task) = task_table+0;
 		per_core(current_task)->pgd = get_boot_pgd();
-		task_table[0].fpu_used = 0;
+		task_table[0].flags = TASK_DEFAULT_FLAGS;
 		return 0;
 	}
 
@@ -190,7 +190,9 @@ static int create_task(tid_t* id, entry_point_t ep, void* arg)
 
 			ret = create_default_frame(task_table+i, ep, arg);
 
-			task_table[i].fpu_used = 0;
+			task_table[i].flags = TASK_DEFAULT_FLAGS;
+			task_table[i].start_heap = 0;
+			task_table[i].end_heap = 0;
 			task_table[i].status = TASK_READY;
 			break;
 		}
@@ -252,7 +254,10 @@ int sys_fork(void)
 			mailbox_wait_msg_init(&task_table[i].inbox);
 			memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
 			task_table[i].outbox[per_core(current_task)->id] = &per_core(current_task)->inbox; 
-			task_table[i].fpu_used = 0x00;
+			task_table[i].flags = per_core(current_task)->flags;
+			memcpy(&(task_table[i].fpu), &(per_core(current_task)->fpu), sizeof(union fpu_state));
+			task_table[i].start_heap = 0;
+			task_table[i].end_heap = 0;
 
 			ret = arch_fork(task_table+i);
 
@@ -352,8 +357,8 @@ static int load_task(load_args_t* largs)
 			if (!prog_header.virt_addr)
 				continue;
 
-			npages = (prog_header.mem_size / PAGE_SIZE);
-			if (prog_header.mem_size % PAGE_SIZE)
+			npages = (prog_header.mem_size >> PAGE_SHIFT);
+			if (prog_header.mem_size & (PAGE_SIZE-1))
 				npages++;
 
 			addr = get_pages(npages);
@@ -369,6 +374,10 @@ static int load_task(load_args_t* largs)
 			// clear pages
 			memset((void*) prog_header.virt_addr, 0, npages*PAGE_SIZE);
 
+			// set starting point of the heap
+			if (per_core(current_task)->start_heap < prog_header.virt_addr+prog_header.mem_size)
+				per_core(current_task)->start_heap = per_core(current_task)->end_heap = prog_header.virt_addr+prog_header.mem_size;
+
 			// load program
 			read_fs(node, (uint8_t*)prog_header.virt_addr, prog_header.file_size, prog_header.offset);
 
@@ -387,8 +396,8 @@ static int load_task(load_args_t* largs)
 
 		case ELF_PT_GNU_STACK: // Indicates stack executability
 			// create user-level stack
-			npages = DEFAULT_STACK_SIZE / PAGE_SIZE;
-			if (DEFAULT_STACK_SIZE % PAGE_SIZE)
+			npages = DEFAULT_STACK_SIZE >> PAGE_SHIFT;
+			if (DEFAULT_STACK_SIZE & (PAGE_SIZE-1))
 				npages++;
 
 			addr = get_pages(npages); 
@@ -475,6 +484,9 @@ static int load_task(load_args_t* largs)
 
 	kfree(largs, sizeof(load_args_t));
 
+	// clear fpu state
+	per_core(current_task)->flags &= ~(TASK_FPU_USED|TASK_FPU_INIT);
+
 	jump_to_user_code(header.entry, stack+offset);
 
 	return 0;
@@ -703,6 +715,12 @@ void scheduler(void)
 	if (per_core(current_task)->status == TASK_FINISHED)
 		per_core(current_task)->status = TASK_INVALID; 
 
+	/* if the task is using the FPU, we need to save the FPU context */
+	if (per_core(current_task)->flags & TASK_FPU_USED) {
+		save_fpu_state(&(per_core(current_task)->fpu));
+		per_core(current_task)->flags &= ~TASK_FPU_USED;
+	}
+
 	for(i=1, new_id=(per_core(current_task)->id + 1) % MAX_TASKS; 
 		i<MAX_TASKS; i++, new_id=(new_id+1) % MAX_TASKS) 
 	{
@@ -710,10 +728,8 @@ void scheduler(void)
 			if (per_core(current_task)->status == TASK_RUNNING)
 				per_core(current_task)->status = TASK_READY;
 			task_table[new_id].status = TASK_RUNNING;
-
-			if (per_core(current_task)->fpu_used)
-				save_fpu_state(&(per_core(current_task)->fpu));	
 			per_core(current_task) = task_table+new_id;
+
 			goto get_task_out;
 		}
 	}
diff --git a/kernel/tests.c b/kernel/tests.c
index fe26ffc6..08f3d45f 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -114,6 +114,7 @@ int test_init(void)
 	//create_kernel_task(NULL, consumer, NULL);
 	//create_user_task(NULL, "/bin/hello", argv);
 	create_user_task(NULL, "/bin/tests", argv);
+	//create_user_task(NULL, "/bin/jacobi", argv);
 
 	return 0;
 }
diff --git a/newlib/src/libgloss/metalsvm/sbrk.c b/newlib/src/libgloss/metalsvm/sbrk.c
index 356c5ee7..78002469 100644
--- a/newlib/src/libgloss/metalsvm/sbrk.c
+++ b/newlib/src/libgloss/metalsvm/sbrk.c
@@ -22,23 +22,20 @@
 #include <errno.h>
 #undef errno
 extern int errno;
+#include "warning.h"
+#include "syscall.h"
 
-#ifndef NULL
-#define NULL	((void*) 0)
-#endif
+void*
+_DEFUN (sbrk, (incr),
+        int incr)
+{
+	int ret;
 
-void *
-sbrk (incr)
-     int incr;
-{ 
-	extern char _end; // set by linker
-	static char *heap_end = NULL;
-	char *prev_heap_end;
+	ret = SYSCALL1(__NR_sbrk, incr);
+	if (ret < 0x1000) {
+		errno = -ret;
+		ret = -1;
+	}
 
-	if (!heap_end)
-		heap_end = &_end;
-	prev_heap_end = heap_end;
-	heap_end += incr;
-
-	return (void *) prev_heap_end;
+	return (void*) ret;
 } 

From f3b620a9be3473f429acd6b9f9ab05942a9a185d Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Thu, 21 Apr 2011 10:13:58 +0200
Subject: [PATCH 24/36] add rudimental support of the system call times

- no full support of the POSIX API
- however, the libc function clock works correctly
---
 arch/x86/kernel/timer.c                | 14 ++++++++++++++
 include/metalsvm/syscall.h             |  1 +
 include/metalsvm/tasks_types.h         |  2 ++
 include/metalsvm/time.h                | 20 ++++++++++++++++++++
 kernel/syscall.c                       | 10 +++++++++-
 kernel/tasks.c                         |  5 ++++-
 kernel/tests.c                         |  4 ++--
 newlib/examples/jacobi.c               |  8 ++++----
 newlib/src/libgloss/metalsvm/syscall.h |  1 +
 newlib/src/libgloss/metalsvm/times.c   | 16 +++++++++++-----
 10 files changed, 68 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/timer.c b/arch/x86/kernel/timer.c
index b66646ba..617d47d9 100644
--- a/arch/x86/kernel/timer.c
+++ b/arch/x86/kernel/timer.c
@@ -22,6 +22,7 @@
 #include <metalsvm/tasks.h>
 #include <metalsvm/time.h>
 #include <metalsvm/processor.h>
+#include <metalsvm/errno.h>
 #include <asm/irq.h>
 #include <asm/irqflags.h>
 #include <asm/gdt.h>
@@ -39,6 +40,19 @@ uint64_t get_clock_tick(void)
 	return timer_ticks;
 }
 
+int sys_times(struct tms* buffer, clock_t* clock)
+{
+	if (BUILTIN_EXPECT(!buffer, 0))
+		return -EINVAL;
+	if (BUILTIN_EXPECT(!clock, 0))
+		return -EINVAL;
+
+	memset(buffer, 0x00, sizeof(struct tms));
+	*clock = buffer->tms_utime = (clock_t) ((timer_ticks - per_core(current_task)->start_tick) * CLOCKS_PER_SEC / TIMER_FREQ);
+
+	return 0;
+}
+
 /* 
  * Handles the timer. In this case, it's very simple: We
  * increment the 'timer_ticks' variable every time the
diff --git a/include/metalsvm/syscall.h b/include/metalsvm/syscall.h
index 97785247..b47fc284 100644
--- a/include/metalsvm/syscall.h
+++ b/include/metalsvm/syscall.h
@@ -49,6 +49,7 @@ extern "C" {
 #define __NR_fork		12
 #define __NR_wait		13
 #define __NR_execve		14
+#define __NR_times		15
 
 #ifdef __cplusplus
 }
diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h
index 58f65cf2..e607dc3c 100644
--- a/include/metalsvm/tasks_types.h
+++ b/include/metalsvm/tasks_types.h
@@ -72,6 +72,8 @@ typedef struct task {
 	vma_t*			vma_list;
 	/// Additional status flags. For instance, to signalize the using of the FPU
 	uint32_t		flags;
+	/// starting time/tick of the task
+	uint64_t		start_tick;
 	/// Start address of the heap
 	uint32_t		start_heap;
 	/// End address of the heap
diff --git a/include/metalsvm/time.h b/include/metalsvm/time.h
index 4c2cdf51..702207b9 100644
--- a/include/metalsvm/time.h
+++ b/include/metalsvm/time.h
@@ -30,6 +30,26 @@
 extern "C" {
 #endif
 
+typedef uint32_t clock_t;
+
+struct tms {
+	clock_t tms_utime;
+	clock_t tms_stime;
+	clock_t tms_cutime;
+	clock_t tms_cstime;
+};
+
+#ifndef CLOCKS_PER_SEC
+// newlib's default value
+#define CLOCKS_PER_SEC 1000
+#endif
+
+/** @brief Determines the time in CLK_TCK's
+ *
+ * System call, which returns the value of time in CLK_TCK's
+ */
+int sys_times(struct tms*, clock_t* clock);
+
 /** @brief Initialize Timer interrupts 
  *
  * This procedure installs IRQ handlers for timer interrupts
diff --git a/kernel/syscall.c b/kernel/syscall.c
index 95a39fc6..d3ed8a41 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -23,6 +23,7 @@
 #include <metalsvm/tasks.h>
 #include <metalsvm/errno.h>
 #include <metalsvm/spinlock.h>
+#include <metalsvm/time.h>
 
 static int sys_write(int fildes, const char *buf, size_t len)
 {
@@ -105,7 +106,7 @@ int syscall_handler(uint32_t sys_nr, ...)
 		break;
 	case __NR_wait: {
 			int32_t* status = va_arg(vl, int32_t*);
-		
+	
 			ret = wait(status);
 			break;
 		}
@@ -117,6 +118,13 @@ int syscall_handler(uint32_t sys_nr, ...)
 			ret = sys_execve(name, argv, env);
 			break;
 		}
+	case __NR_times: {
+			struct tms* buffer = va_arg(vl, struct tms*);
+			clock_t* clock = va_arg(vl, clock_t*);
+
+			ret = sys_times(buffer, clock);
+			break;
+		}
 	default:
 		kputs("invalid system call\n");
 		ret = -ENOSYS;
diff --git a/kernel/tasks.c b/kernel/tasks.c
index ae699687..26774de7 100644
--- a/kernel/tasks.c
+++ b/kernel/tasks.c
@@ -38,6 +38,7 @@
 #include <metalsvm/mailbox.h>
 #include <metalsvm/syscall.h>
 #include <metalsvm/fs.h>
+#include <metalsvm/time.h>
 #include <asm/apic.h>
 #include <asm/elf.h>
 
@@ -48,7 +49,7 @@ DEFINE_PER_CORE(task_t*, current_task, NULL);
  * A task's id will be its position in this array.
  */
 static task_t task_table[MAX_TASKS] = {[0 ... MAX_TASKS-1] = {0, TASK_INVALID, ATOMIC_INIT(0), \
-			 SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0}};
+			 SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}};
 static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT;
 
 /** @brief helper function for the assembly code to determine the current task
@@ -193,6 +194,7 @@ static int create_task(tid_t* id, entry_point_t ep, void* arg)
 			task_table[i].flags = TASK_DEFAULT_FLAGS;
 			task_table[i].start_heap = 0;
 			task_table[i].end_heap = 0;
+			task_table[i].start_tick = get_clock_tick();
 			task_table[i].status = TASK_READY;
 			break;
 		}
@@ -256,6 +258,7 @@ int sys_fork(void)
 			task_table[i].outbox[per_core(current_task)->id] = &per_core(current_task)->inbox; 
 			task_table[i].flags = per_core(current_task)->flags;
 			memcpy(&(task_table[i].fpu), &(per_core(current_task)->fpu), sizeof(union fpu_state));
+			task_table[i].start_tick = get_clock_tick();
 			task_table[i].start_heap = 0;
 			task_table[i].end_heap = 0;
 
diff --git a/kernel/tests.c b/kernel/tests.c
index 08f3d45f..fd1c200f 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -113,8 +113,8 @@ int test_init(void)
 	//create_kernel_task(NULL, producer, NULL);
 	//create_kernel_task(NULL, consumer, NULL);
 	//create_user_task(NULL, "/bin/hello", argv);
-	create_user_task(NULL, "/bin/tests", argv);
-	//create_user_task(NULL, "/bin/jacobi", argv);
+	//create_user_task(NULL, "/bin/tests", argv);
+	create_user_task(NULL, "/bin/jacobi", argv);
 
 	return 0;
 }
diff --git a/newlib/examples/jacobi.c b/newlib/examples/jacobi.c
index 6c0e7a20..f4804aa3 100644
--- a/newlib/examples/jacobi.c
+++ b/newlib/examples/jacobi.c
@@ -96,7 +96,7 @@ int main(int argc, char **argv)
 	double**	A=0;
 	double*		X;
 	double*		X_old, xi;
-	double		start,stop;
+	clock_t		start, end;
 
 	if (generate_empty_matrix(&A,MATRIX_SIZE) < 0)
 	{
@@ -126,7 +126,7 @@ int main(int argc, char **argv)
 	iter_start = 0;
 	iter_end = MATRIX_SIZE;
 
-	//start = RCCE_wtime();
+	start = clock();
 
 	while(1) 
 	{
@@ -158,7 +158,7 @@ int main(int argc, char **argv)
 		}
 	}
 
-	//stop = RCCE_wtime();
+	end = clock();
 	
 	if (MATRIX_SIZE < 16) {
 		printf("Print the solution...\n");
@@ -187,7 +187,7 @@ int main(int argc, char **argv)
 
 	printf("\nmatrix size: %d x %d\n", MATRIX_SIZE, MATRIX_SIZE);
 	printf("number of iterations: %d\n", iterations);
-	//printf("calculation time: %f s\n", stop-start);
+	printf("calculation time: %f s\n", (float) (end-start) / (float) CLOCKS_PER_SEC);
 
 	free((void*) X_old);
 	free((void*) X);
diff --git a/newlib/src/libgloss/metalsvm/syscall.h b/newlib/src/libgloss/metalsvm/syscall.h
index 5390a27b..6d137508 100644
--- a/newlib/src/libgloss/metalsvm/syscall.h
+++ b/newlib/src/libgloss/metalsvm/syscall.h
@@ -38,6 +38,7 @@ extern "C" {
 #define __NR_fork		12
 #define __NR_wait		13
 #define __NR_execve		14
+#define __NR_times		15
 
 #define _STR(token)             #token
 #define _SYSCALLSTR(x)          "int $" _STR(x) " "
diff --git a/newlib/src/libgloss/metalsvm/times.c b/newlib/src/libgloss/metalsvm/times.c
index ed7baa16..5c4ebcd7 100644
--- a/newlib/src/libgloss/metalsvm/times.c
+++ b/newlib/src/libgloss/metalsvm/times.c
@@ -24,14 +24,20 @@
 #include <errno.h>
 #undef errno
 extern int errno;
-#include "warning.h"
+#include "syscall.h"
 
 clock_t
 _DEFUN (_times, (buf),
         struct tms *buf)
 {
-  errno = ENOSYS;
-  return -1;
-}
+	clock_t clock = 0;
+	int ret;
 
-stub_warning(_times)
+	ret = SYSCALL2(__NR_times, buf, &clock);
+	if (ret < 0) {
+		errno = -ret;
+		return (clock_t) -1;
+	}
+
+	return clock;
+}

From 2a5411c987f3c6d5292bded64791cef9d6a3b900 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Thu, 21 Apr 2011 09:28:56 -0700
Subject: [PATCH 25/36] fix bug in the initialization of caching strategy

---
 arch/x86/kernel/entry.asm | 8 ++++----
 kernel/tests.c            | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/entry.asm b/arch/x86/kernel/entry.asm
index ca3afaa1..29d8a506 100644
--- a/arch/x86/kernel/entry.asm
+++ b/arch/x86/kernel/entry.asm
@@ -65,10 +65,10 @@ stublet:
 ; initialize stack pointer.
     mov esp, default_stack_pointer
     mov eax, cr0
-; enable cache, disable paging and fpu emulation
-    and eax, 0x3ffffffb
-; ...monitor coprocessor and turn on FPU exceptions
-    or eax, 0x22
+; enable caching, disable paging and fpu emulation
+    and eax, 0x1ffffffb
+; ...and turn on FPU exceptions
+    or eax, 0x20
     mov cr0, eax
 ; clears the current pgd entry
     xor eax, eax
diff --git a/kernel/tests.c b/kernel/tests.c
index fd1c200f..08f3d45f 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -113,8 +113,8 @@ int test_init(void)
 	//create_kernel_task(NULL, producer, NULL);
 	//create_kernel_task(NULL, consumer, NULL);
 	//create_user_task(NULL, "/bin/hello", argv);
-	//create_user_task(NULL, "/bin/tests", argv);
-	create_user_task(NULL, "/bin/jacobi", argv);
+	create_user_task(NULL, "/bin/tests", argv);
+	//create_user_task(NULL, "/bin/jacobi", argv);
 
 	return 0;
 }

From a563c756eebefefc31d66d5c6f34b774669ca9ec Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Thu, 21 Apr 2011 09:28:56 -0700
Subject: [PATCH 26/36] fix bug in the setup of the caching strategy

---
 arch/x86/kernel/entry.asm | 8 ++++----
 kernel/tests.c            | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/entry.asm b/arch/x86/kernel/entry.asm
index ca3afaa1..29d8a506 100644
--- a/arch/x86/kernel/entry.asm
+++ b/arch/x86/kernel/entry.asm
@@ -65,10 +65,10 @@ stublet:
 ; initialize stack pointer.
     mov esp, default_stack_pointer
     mov eax, cr0
-; enable cache, disable paging and fpu emulation
-    and eax, 0x3ffffffb
-; ...monitor coprocessor and turn on FPU exceptions
-    or eax, 0x22
+; enable caching, disable paging and fpu emulation
+    and eax, 0x1ffffffb
+; ...and turn on FPU exceptions
+    or eax, 0x20
     mov cr0, eax
 ; clears the current pgd entry
     xor eax, eax
diff --git a/kernel/tests.c b/kernel/tests.c
index fd1c200f..08f3d45f 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -113,8 +113,8 @@ int test_init(void)
 	//create_kernel_task(NULL, producer, NULL);
 	//create_kernel_task(NULL, consumer, NULL);
 	//create_user_task(NULL, "/bin/hello", argv);
-	//create_user_task(NULL, "/bin/tests", argv);
-	create_user_task(NULL, "/bin/jacobi", argv);
+	create_user_task(NULL, "/bin/tests", argv);
+	//create_user_task(NULL, "/bin/jacobi", argv);
 
 	return 0;
 }

From ea4a7477c5f4e216439d9bd8180c541eff81edae Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Thu, 21 Apr 2011 19:46:55 +0200
Subject: [PATCH 27/36] set CR0's MP flag to monitor the coprocessor

---
 arch/x86/kernel/entry.asm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/entry.asm b/arch/x86/kernel/entry.asm
index 29d8a506..5610c6b8 100644
--- a/arch/x86/kernel/entry.asm
+++ b/arch/x86/kernel/entry.asm
@@ -68,7 +68,7 @@ stublet:
 ; enable caching, disable paging and fpu emulation
     and eax, 0x1ffffffb
 ; ...and turn on FPU exceptions
-    or eax, 0x20
+    or eax, 0x22
     mov cr0, eax
 ; clears the current pgd entry
     xor eax, eax

From 900c67c2a039d5b83e0eeb3eef6f8306bb360dad Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Fri, 22 Apr 2011 09:31:33 +0200
Subject: [PATCH 28/36] remove obsolete flags

---
 arch/x86/mm/page.c        | 2 +-
 drivers/net/rtl8139.c     | 4 ++--
 include/metalsvm/stdlib.h | 4 ++--
 mm/memory.c               | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index 75f34015..fe44d95d 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -599,7 +599,7 @@ static void pagefault_handler(struct state *s)
 		if (BUILTIN_EXPECT(!phyaddr, 0))
 			goto default_handler;
 
-		if (map_region(viraddr, phyaddr, 1, MAP_USER_SPACE|MAP_HEAP) == viraddr) {
+		if (map_region(viraddr, phyaddr, 1, MAP_USER_SPACE) == viraddr) {
 			memset((void*) viraddr, 0x00, PAGE_SIZE);
 			return;
 		}
diff --git a/drivers/net/rtl8139.c b/drivers/net/rtl8139.c
index 848164e3..54f74906 100644
--- a/drivers/net/rtl8139.c
+++ b/drivers/net/rtl8139.c
@@ -327,7 +327,7 @@ err_t rtl8139if_init(struct netif* netif)
 	memset(rtl8139if, 0, sizeof(rtl1839if_t));
 
 	/* allocate the receive buffer */
-	rtl8139if->rx_buffer = mem_allocation(8192+16, MAP_KERNEL_SPACE|MAP_HEAP|MAP_NO_CACHE);
+	rtl8139if->rx_buffer = mem_allocation(8192+16, MAP_KERNEL_SPACE|MAP_NO_CACHE);
 	if (!(rtl8139if->rx_buffer)) {
 		LWIP_DEBUGF(NETIF_DEBUG, ("rtl8139if_init: out of memory\n"));
 		kfree(rtl8139if, sizeof(rtl1839if_t));
@@ -336,7 +336,7 @@ err_t rtl8139if_init(struct netif* netif)
 	memset(rtl8139if->rx_buffer, 0, 8192+16);
 
 	/* allocate the send buffers */
-	rtl8139if->tx_buffer[0] = mem_allocation(4*4096, MAP_KERNEL_SPACE|MAP_HEAP|MAP_NO_CACHE);
+	rtl8139if->tx_buffer[0] = mem_allocation(4*4096, MAP_KERNEL_SPACE|MAP_NO_CACHE);
 	if (!(rtl8139if->tx_buffer[0])) {
 		LWIP_DEBUGF(NETIF_DEBUG, ("rtl8139if_init: out of memory\n"));
 		kfree(rtl8139if->rx_buffer, 8192+16);
diff --git a/include/metalsvm/stdlib.h b/include/metalsvm/stdlib.h
index fe824207..ad1cecdb 100644
--- a/include/metalsvm/stdlib.h
+++ b/include/metalsvm/stdlib.h
@@ -41,8 +41,8 @@ extern "C" {
 #define MAP_USER_SPACE		(1 << 1)
 #define MAP_PAGE_TABLE		(1 << 2)
 #define MAP_NO_CACHE		(1 << 3)
-#define MAP_STACK		(1 << 4)
-#define MAP_HEAP		(1 << 5)
+//#define MAP_STACK		(1 << 4)
+//#define MAP_HEAP		(1 << 5)
 #define MAP_CODE		(1 << 6)
 #define MAP_READONLY		(1 << 7)
 #ifdef CONFIG_ROCKCREEK
diff --git a/mm/memory.c b/mm/memory.c
index ed6057e7..376bb81e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -293,7 +293,7 @@ void* mem_allocation(size_t sz, uint32_t flags)
 
 void* kmalloc(size_t sz)
 {
-	return mem_allocation(sz, MAP_KERNEL_SPACE|MAP_HEAP);
+	return mem_allocation(sz, MAP_KERNEL_SPACE);
 }
 
 void kfree(void* addr, size_t sz)

From eab148011382cc3998a0c958a2edb952ecb0bc4a Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Fri, 22 Apr 2011 09:55:27 +0200
Subject: [PATCH 29/36] remove typo

---
 drivers/net/rtl8139.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/rtl8139.h b/drivers/net/rtl8139.h
index e0b9de05..43bd6fe4 100644
--- a/drivers/net/rtl8139.h
+++ b/drivers/net/rtl8139.h
@@ -19,8 +19,8 @@
  * This code based mostly on the online manual http://www.lowlevel.eu/wiki/RTL8139
  */
 
-#ifndef __HAVE_RTL839_H__
-#define __HAVE_RL8139_H__
+#ifndef __HAVE_RTL8139_H__
+#define __HAVE_RTL8139_H__
 
 #include <metalsvm/stddef.h>
 #include <metalsvm/mailbox.h>

From 1157306567e6189080e4e9bbed0fe1e22bff4ac8 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Fri, 22 Apr 2011 23:19:53 -0700
Subject: [PATCH 30/36] remove typo

---
 drivers/net/rtl8139.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/rtl8139.c b/drivers/net/rtl8139.c
index 54f74906..4fbf150a 100644
--- a/drivers/net/rtl8139.c
+++ b/drivers/net/rtl8139.c
@@ -482,7 +482,7 @@ err_t rtl8139if_init(struct netif* netif)
 	/* maximum transfer unit */
 	netif->mtu = 1500;
 	/* broadcast capability */
-	netif->flags = NETIF_FLAG_BROADCAST | NETIF_FLAG_ETHARP | NETIF_FLAG_LINK_UP;
+	netif->flags |= NETIF_FLAG_BROADCAST | NETIF_FLAG_ETHARP | NETIF_FLAG_LINK_UP;
 	/* hardware address length */
 	netif->hwaddr_len = 6;
 

From 29ab43f28932f663a320d84800e3c2416eb87317 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Sat, 23 Apr 2011 00:13:12 -0700
Subject: [PATCH 31/36] First steps to realize Inter-Core-Communication via
 RCCE

---
 arch/x86/include/asm/icc.h         | 60 ++++++++++++++++++++++++++++++
 arch/x86/include/asm/processor.h   | 13 -------
 arch/x86/mm/page.c                 |  1 +
 arch/x86/scc/Makefile              |  2 +-
 arch/x86/scc/{scc_init.c => icc.c} | 22 ++++++-----
 fs/initrd.c                        |  3 ++
 include/metalsvm/tasks.h           | 15 ++++++++
 kernel/main.c                      |  5 ++-
 kernel/syscall.c                   |  2 +
 kernel/tasks.c                     |  2 +
 mm/memory.c                        |  3 ++
 11 files changed, 104 insertions(+), 24 deletions(-)
 create mode 100644 arch/x86/include/asm/icc.h
 rename arch/x86/scc/{scc_init.c => icc.c} (93%)

diff --git a/arch/x86/include/asm/icc.h b/arch/x86/include/asm/icc.h
new file mode 100644
index 00000000..0ce1e64f
--- /dev/null
+++ b/arch/x86/include/asm/icc.h
@@ -0,0 +1,60 @@
+/* 
+ * Copyright 2011 Stefan Lankes, Chair for Operating Systems,
+ *                               RWTH Aachen University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This file is part of MetalSVM.
+ */
+
+/*
+ * Function and prototypes for MetalSVM's inter core communication
+ */
+
+#ifndef __ARCH_ICC_H__
+#define __ARCH_ICC_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef CONFIG_ROCKCREEK
+
+typedef struct {
+	uint32_t addr;  // address of the initrd
+	uint32_t size;  // size of the initrd
+	int32_t  argc;  // number of RCCE arguments
+	char**   argv;  // RCCE arguments
+} bootinfo_t;
+
+extern bootinfo_t* bootinfo;
+
+typedef struct {
+	uint8_t type;
+	uint8_t tag;
+	uint32_t length;
+} icc_header_t;
+
+#define ICC_TYPE_IP	(1 << 0)
+#define ICC_TYPE_SVM	(1 << 1)
+
+int icc_init(void);
+void check_icc(void);
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index faea9d96..a1f6246a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -39,19 +39,6 @@
 extern "C" {
 #endif
 
-#ifdef CONFIG_ROCKCREEK
-typedef struct {
-	uint32_t addr;	// address of the initrd
-	uint32_t size;	// size of the initrd
-	int32_t  argc;	// number of RCCE arguments
-	char**   argv;	// RCCE arguments
-} bootinfo_t;
-
-extern bootinfo_t* bootinfo;
-
-int scc_init(void);
-#endif
-
 /** @brief Read out time stamp counter
  *
  * The rdtsc asm command puts a 64 bit time stamp value
diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index fe44d95d..184f8865 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -33,6 +33,7 @@
 #ifdef CONFIG_ROCKCREEK
 #include <asm/RCCE_lib.h>
 #include <asm/SCC_API.h>
+#include <asm/icc.h>
 #endif
 
 /*
diff --git a/arch/x86/scc/Makefile b/arch/x86/scc/Makefile
index f32bc11f..2003a259 100644
--- a/arch/x86/scc/Makefile
+++ b/arch/x86/scc/Makefile
@@ -1,4 +1,4 @@
-C_source := scc_init.c SCC_API.c iRCCE_admin.c iRCCE_send.c iRCCE_isend.c iRCCE_irecv.c iRCCE_recv.c iRCCE_get.c iRCCE_put.c iRCCE_synch.c RCCE_malloc.c RCCE_shmalloc.c RCCE_debug.c RCCE_qsort.c RCCE_DCMflush.c RCCE_send.c RCCE_recv.c RCCE_flags.c RCCE_comm.c RCCE_put.c RCCE_get.c RCCE_synch.c RCCE_bcast.c RCCE_admin.c # RCCE_power_management.c
+C_source := icc.c SCC_API.c iRCCE_admin.c iRCCE_send.c iRCCE_isend.c iRCCE_irecv.c iRCCE_recv.c iRCCE_get.c iRCCE_put.c iRCCE_synch.c RCCE_malloc.c RCCE_shmalloc.c RCCE_debug.c RCCE_qsort.c RCCE_DCMflush.c RCCE_send.c RCCE_recv.c RCCE_flags.c RCCE_comm.c RCCE_put.c RCCE_get.c RCCE_synch.c RCCE_bcast.c RCCE_admin.c # RCCE_power_management.c
 ASM_source := 
 MODULE := arch_x86_scc
 
diff --git a/arch/x86/scc/scc_init.c b/arch/x86/scc/icc.c
similarity index 93%
rename from arch/x86/scc/scc_init.c
rename to arch/x86/scc/icc.c
index d9f2628a..adcd26fb 100644
--- a/arch/x86/scc/scc_init.c
+++ b/arch/x86/scc/icc.c
@@ -17,13 +17,14 @@
 #include <metalsvm/processor.h>
 #include <metalsvm/errno.h>
 #include <asm/io.h>
+#ifdef CONFIG_ROCKCREEK
 #include <asm/RCCE.h>
 #include <asm/iRCCE.h>
 #include <asm/SCC_API.h>
-
-#ifdef CONFIG_ROCKCREEK
+#include <asm/icc.h>
 
 bootinfo_t* bootinfo = (bootinfo_t*) SCC_BOOTINFO;
+static int num_ues, my_ue;
 
 /* PSE bit for Pentium+ equals MPE (message buffer enable) flag in RCK! So, use it to create _PAGE_MPB symbol... */
 #define _CR4_MPE 0x00000800
@@ -67,10 +68,9 @@ static int scc_clear(void)
 	return 0;
 }
 
-int scc_init(void)
+int icc_init(void)
 {
-	int num_ranks;
-	int i, my_rank;
+	int i;
 	uint64_t start, end, ticks, freq = 533;
 	uint32_t cr4, msg = 0;
 
@@ -109,9 +109,9 @@ int scc_init(void)
 	// enable additional outputs	
 	RCCE_debug_set(RCCE_DEBUG_ALL);
 
-	my_rank   = RCCE_ue();
-	num_ranks = RCCE_num_ues();
-	kprintf("Got rank %d of %d ranks\n", my_rank, num_ranks);
+	my_ue   = RCCE_ue();
+	num_ues = RCCE_num_ues();
+	kprintf("Got rank %d of %d ranks\n", my_ue, num_ues);
 
 	i = ReadConfigReg(CRB_OWN+GLCFG0);
 	kprintf("glcfg0 0x%x\n", i);
@@ -119,7 +119,7 @@ int scc_init(void)
 	RCCE_barrier(&RCCE_COMM_WORLD);
 
 	kputs("RCCE test...\t");
-	if (my_rank == 0)
+	if (my_ue == 0)
 		msg = 0x4711;
 	if (RCCE_bcast((char*) &msg, sizeof(msg), 0, RCCE_COMM_WORLD) == RCCE_SUCCESS)
 		kprintf("successfull! (0x%x)\n", msg);
@@ -131,4 +131,8 @@ int scc_init(void)
 	return 0;
 }
 
+void check_icc(void)
+{
+}
+
 #endif
diff --git a/fs/initrd.c b/fs/initrd.c
index c3094b44..a0932eff 100644
--- a/fs/initrd.c
+++ b/fs/initrd.c
@@ -24,6 +24,9 @@
 #include <metalsvm/errno.h>
 #include <asm/multiboot.h>
 #include <asm/processor.h>
+#ifdef CONFIG_ROCKCREEK
+#include <asm/icc.h>
+#endif
 
 static vfs_node_t initrd_root;
 
diff --git a/include/metalsvm/tasks.h b/include/metalsvm/tasks.h
index 904f737a..36c9fb9a 100644
--- a/include/metalsvm/tasks.h
+++ b/include/metalsvm/tasks.h
@@ -32,6 +32,10 @@
 #include <metalsvm/stddef.h>
 #include <metalsvm/tasks_types.h>
 #include <asm/tasks.h>
+#include <asm/irqflags.h>
+#ifdef CONFIG_ROCKCREEK
+#include <asm/icc.h>
+#endif
 
 #ifdef __cplusplus
 extern "C" {
@@ -134,6 +138,17 @@ int sys_fork(void);
  */
 int sys_execve(const char* fname, char** argv, char** env);
 
+static inline void check_workqueues(void)
+{
+	uint32_t flags = irq_nested_disable();
+
+#ifdef CONFIG_ROCKCREEK
+	check_icc();
+#endif
+
+	irq_nested_enable(flags);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/kernel/main.c b/kernel/main.c
index 8be29f22..0b7ef87f 100644
--- a/kernel/main.c
+++ b/kernel/main.c
@@ -30,6 +30,9 @@
 #include <asm/irq.h>
 #include <asm/irqflags.h>
 #include <asm/kb.h>
+#ifdef CONFIG_ROCKCREEK
+#include <asm/icc.h>
+#endif
 
 extern int test_init(void);
 
@@ -90,7 +93,7 @@ int main(void)
 	multitasking_init();
 	mmu_init();
 #ifdef CONFIG_ROCKCREEK
-	scc_init();
+	icc_init();
 #endif
    	initrd_init();
 
diff --git a/kernel/syscall.c b/kernel/syscall.c
index d3ed8a41..d3eba333 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -70,6 +70,8 @@ int syscall_handler(uint32_t sys_nr, ...)
 	int ret = -EINVAL;
 	va_list vl;
 
+	check_workqueues();
+
 	va_start(vl, sys_nr);
 
 	switch(sys_nr) 
diff --git a/kernel/tasks.c b/kernel/tasks.c
index 26774de7..b85f3cd1 100644
--- a/kernel/tasks.c
+++ b/kernel/tasks.c
@@ -710,6 +710,8 @@ void scheduler(void)
 	unsigned int i;
 	unsigned int new_id;
 
+	check_workqueues();
+
 #if MAX_CORES > 1
 	spinlock_irqsave_lock(&table_lock);
 #endif
diff --git a/mm/memory.c b/mm/memory.c
index 376bb81e..f70a84ee 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -29,6 +29,9 @@
 #ifdef CONFIG_MULTIBOOT
 #include <asm/multiboot.h>
 #endif
+#ifdef CONFIG_ROCKCREEK
+#include <asm/icc.h>
+#endif
 
 /* 
  * 0 => free

From 02f7fecc617dd221aade2921821c1876e1ffe300 Mon Sep 17 00:00:00 2001
From: Marian Ohligs <marian.ohligs@rwth-aachen.de>
Date: Sat, 23 Apr 2011 11:04:32 +0200
Subject: [PATCH 32/36] first stdin, stout, stderr implementation (still buggy)

---
 drivers/stdin/stdin.c   |  4 +++-
 drivers/stdout/stdout.c |  2 +-
 kernel/syscall.c        | 22 ++++++++--------------
 kernel/tasks.c          |  6 +++++-
 kernel/tests.c          |  2 +-
 newlib/examples/hello.c | 29 +++++++++++++++++++++--------
 newlib/examples/test    |  1 +
 7 files changed, 40 insertions(+), 26 deletions(-)

diff --git a/drivers/stdin/stdin.c b/drivers/stdin/stdin.c
index 6b848082..7904f62a 100644
--- a/drivers/stdin/stdin.c
+++ b/drivers/stdin/stdin.c
@@ -28,7 +28,9 @@
 
 static ssize_t stdin_read(vfs_node_t* node, uint8_t* buffer, size_t size, off_t offset)
 {
-	kprintf("Keine Eingabe implementiert");
+	while(size) {
+		size = kputs((char*)buffer);
+	}
 	return size;
 }
 
diff --git a/drivers/stdout/stdout.c b/drivers/stdout/stdout.c
index 268a4009..29fbb035 100644
--- a/drivers/stdout/stdout.c
+++ b/drivers/stdout/stdout.c
@@ -33,7 +33,7 @@ static ssize_t stdout_read(vfs_node_t* node, uint8_t* buffer, size_t size, off_t
 
 static ssize_t stdout_write(vfs_node_t* node, uint8_t* buffer, size_t size,  off_t offset)
 {
-	kprintf("%s", buffer);
+	size = kprintf("%s\0", buffer);
 	return size;
 }
 
diff --git a/kernel/syscall.c b/kernel/syscall.c
index 809ea4ec..ca0651f5 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -32,7 +32,7 @@ static int sys_read(int fd, const char *buf, size_t len)
 			(uint8_t*)buf, len, 
 			per_core(current_task)->fildes_table[fd].offset);
 	per_core(current_task)->fildes_table[fd].offset += readbytes;
-	/*kprintf("fd:%i, Dateilaenge:%i, Dateiinhalt: %s \n", fd, len, buf);*/
+	//kprintf("fd:%i, Dateilaenge:%i, Dateiinhalt: %s \n", fd, len, buf);
 	/* Beware: still reading above file limit! */
 	return readbytes; 
 }
@@ -40,12 +40,8 @@ static int sys_read(int fd, const char *buf, size_t len)
 static int sys_write(int fd, const char *buf, size_t len)
 {
 	unsigned int wrotebytes;
-	/* per_core(current_task)->fildes_table[fd].node->write = 1; */
-	wrotebytes = write_fs(
-			per_core(current_task)->fildes_table[fd].node, 
-			(uint8_t*)buf, len, 0);
-	/* per_core(current_task)->fildes_table[fd].offset); */
-	/* kprintf("ins Dateis. geschr. -- fd:%i, Dateilaenge:%i, Dateiinhalt: %s \n", fd, len, buf); */
+	wrotebytes = write_fs(per_core(current_task)->fildes_table[fd].node, (uint8_t*)buf, len, per_core(current_task)->fildes_table[fd].offset);
+	//kprintf("ins Dateis. geschr. -- fd:%i, Dateilaenge:%i, Dateiinhalt: %s \n", fd, wrotebytes, buf);
 	per_core(current_task)->fildes_table[fd].offset += wrotebytes;
 
 	return wrotebytes;
@@ -54,7 +50,7 @@ static int sys_write(int fd, const char *buf, size_t len)
 static int sys_open(const char* file, int flags, int mode)
 {
 	int fd;
-	for (fd = 0; fd < MAX_FILDES; fd++) {
+	for (fd = 3; fd < MAX_FILDES; fd++) {
 		if (per_core(current_task)->fildes_table[fd].node == NULL) {
 			per_core(current_task)->fildes_table[fd].node = findnode_fs((char*) file);
 			return fd;
@@ -118,28 +114,26 @@ int syscall_handler(uint32_t sys_nr, ...)
 			int fd = va_arg(vl, int);
 			const char* buf = va_arg(vl, const char*);
  			size_t len = va_arg(vl, size_t);
-			kprintf("%s", buf);
-			//ret = sys_read(fd, buf, len);
+			ret = sys_read(fd, buf, len);
 			break;
 		}
 	case __NR_write: {
 			int fd = va_arg(vl, int);
 			const char* buf = va_arg(vl, const char*);
 			size_t len = va_arg(vl, size_t);
-			kprintf("%s", buf);
-			//ret = sys_write(fd, buf, len);
+			ret = sys_write(fd, buf, len);
 			break;
 		}
 	case __NR_open: {
 			const char* file = va_arg(vl, const char*);
 			int flags = va_arg(vl, int);
 			int mode = va_arg(vl, int);
-			//ret = sys_open(file, flags, mode);
+			ret = sys_open(file, flags, mode);
 			break;
 		}
 	case __NR_close: {
 			int fd = va_arg(vl, int);
-			//ret = sys_close(fd);
+			ret = sys_close(fd);
  			break;
  		}
 	case __NR_sbrk: {
diff --git a/kernel/tasks.c b/kernel/tasks.c
index e9341558..5414c1b9 100644
--- a/kernel/tasks.c
+++ b/kernel/tasks.c
@@ -184,6 +184,10 @@ static int create_task(tid_t* id, entry_point_t ep, void* arg)
 			mailbox_wait_msg_init(&task_table[i].inbox);
 			memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
 			task_table[i].outbox[per_core(current_task)->id] = &per_core(current_task)->inbox; 
+			task_table[i].fildes_table[0].node = findnode_fs("/dev/stdin");
+			task_table[i].fildes_table[1].node = findnode_fs("/dev/stdout");
+			task_table[i].fildes_table[2].node = findnode_fs("/dev/stderr");
+
 
 			if (id)
 				*id = i;	
@@ -533,7 +537,7 @@ int create_user_task(tid_t* id, const char* fname, char** argv)
 		}
 	}
 
-	if (argc < 0)
+	if (argc <= 0)
 		return -EINVAL;
 	if (buffer_size >= MAX_ARGS)
 		return -EINVAL;
diff --git a/kernel/tests.c b/kernel/tests.c
index c2d9a47b..71b91e20 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -102,7 +102,7 @@ static int STDCALL join_test(void* arg)
 
 int test_init(void)
 {
-	char* argv[] = {"/bin/tests", NULL};
+	char* argv[] = {"/bin/hello", NULL};
 
 	sem_init(&producing, 1);
 	sem_init(&consuming, 0);
diff --git a/newlib/examples/hello.c b/newlib/examples/hello.c
index 32382d08..861cb6f6 100644
--- a/newlib/examples/hello.c
+++ b/newlib/examples/hello.c
@@ -28,14 +28,27 @@ extern int errno;
 int main(int argc, char** argv)
 {
 	//int i;
-	//char* str = (char *)malloc(40 * sizeof(char));
-	//FILE* testfile;
- 	printf("hallo");
-	//testfile = fopen("/bin/test", "w+r");
-	//setbuf(testfile, NULL);
-	//fread(str, 1, 10, testfile);
-	//fwrite("wtest1\n", 1, 7, testfile);
-	//fread(str, 1, 10, testfile);
+	char* str = (char *)malloc(20 * sizeof(char));
+	char* str2 = (char *)malloc(20 * sizeof(char));
+
+	FILE* testfile;
+	testfile = fopen("/bin/test", "w+r");
+	setbuf(testfile, NULL);
+	fflush(NULL);
+	fread(str2, 1, 10, testfile);
+	fflush(NULL);
+	printf("Datei gelesen (/bin/test):%s\n", str2);
+	fflush(NULL);
+
+	setbuf(testfile, NULL);
+	fflush(NULL);
+	fwrite("wtest1", 1, 7, testfile);
+	setbuf(testfile, NULL);
+
+	fread(str, 1, 10, testfile);
+	fflush(NULL);
+
+	//printf("Aus Datei gelesen (/bin/test):%s\n", str);
 
 	return errno;
 }
diff --git a/newlib/examples/test b/newlib/examples/test
index 51d96d29..3d6ab3f2 100644
--- a/newlib/examples/test
+++ b/newlib/examples/test
@@ -1 +1,2 @@
 HalloXA!!
+

From 3a0e42eef1a3bdc365940872628782d23841dc0d Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Sun, 24 Apr 2011 07:27:27 -0700
Subject: [PATCH 33/36] add ping pong test and some bug fixes in the ICC part

---
 arch/x86/include/asm/icc.h |   9 ++--
 arch/x86/scc/icc.c         | 100 ++++++++++++++++++++++++++++++++++++-
 include/metalsvm/tasks.h   |   2 +-
 include/metalsvm/time.h    |   8 ++-
 kernel/tests.c             |  18 +++++++
 5 files changed, 131 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/icc.h b/arch/x86/include/asm/icc.h
index 0ce1e64f..31fdb2ef 100644
--- a/arch/x86/include/asm/icc.h
+++ b/arch/x86/include/asm/icc.h
@@ -45,11 +45,14 @@ typedef struct {
 	uint32_t length;
 } icc_header_t;
 
-#define ICC_TYPE_IP	(1 << 0)
-#define ICC_TYPE_SVM	(1 << 1)
+#define ICC_TYPE_IP		(1 << 0)
+#define ICC_TYPE_SVM		(1 << 1)
+#define ICC_TYPE_PINGREQUEST	(1 << 2)
+#define ICC_TYPE_PINGRESPONSE	(1 << 3)
 
 int icc_init(void);
-void check_icc(void);
+int icc_ping();
+void icc_check(void);
 
 #endif
 
diff --git a/arch/x86/scc/icc.c b/arch/x86/scc/icc.c
index adcd26fb..58a89be5 100644
--- a/arch/x86/scc/icc.c
+++ b/arch/x86/scc/icc.c
@@ -17,6 +17,7 @@
 #include <metalsvm/processor.h>
 #include <metalsvm/errno.h>
 #include <asm/io.h>
+#include <asm/irqflags.h>
 #ifdef CONFIG_ROCKCREEK
 #include <asm/RCCE.h>
 #include <asm/iRCCE.h>
@@ -29,6 +30,9 @@ static int num_ues, my_ue;
 /* PSE bit for Pentium+ equals MPE (message buffer enable) flag in RCK! So, use it to create _PAGE_MPB symbol... */
 #define _CR4_MPE 0x00000800
 
+/* maximal number of SCC's cores */
+#define MAX_SCC_CORES	(NUM_ROWS*NUM_COLS*NUM_CORES)
+
 /*
  * This is the modified MPB program, which is part of the RCCE distribution (src/mpb.c).
  *
@@ -131,8 +135,102 @@ int icc_init(void)
 	return 0;
 }
 
-void check_icc(void)
+int icc_ping(int ue)
 {
+	icc_header_t ping_header = {ICC_TYPE_PINGREQUEST, 0, sizeof(uint64_t)};
+	uint64_t tsc;
+	uint32_t flags;
+	iRCCE_SEND_REQUEST send_request;
+
+	if (BUILTIN_EXPECT(ue == my_ue, 0))
+		return -EINVAL;
+	if (BUILTIN_EXPECT((ue < 0) || (ue >= num_ues), 0))
+		return -EINVAL;
+
+	tsc = rdtsc();
+
+	// iRCCE is not thread save => disable interrupts
+	flags = irq_nested_disable();
+
+	iRCCE_isend((char*) &ping_header, sizeof(icc_header_t), ue, NULL);
+	iRCCE_isend((char*) &tsc, sizeof(uint64_t), ue, &send_request);
+
+	// waiting for the completion
+	while(iRCCE_isend_test(&send_request, NULL) != iRCCE_SUCCESS)
+		icc_check(); // oh, we have time to check incoming requests
+
+	irq_nested_enable(flags);
+
+	return 0;
+}
+
+static void interpret_header(icc_header_t* header, int recv_ue)
+{
+	//kprintf("Got ICC message %d from %d\n", header->type, recv_ue);
+
+	switch(header->type)
+	{
+	case ICC_TYPE_PINGREQUEST: {
+			icc_header_t response = {ICC_TYPE_PINGRESPONSE, 0, sizeof(uint64_t)};
+			iRCCE_RECV_REQUEST recv_req;
+			uint64_t tsc;
+
+			iRCCE_isend((char*) &response, sizeof(icc_header_t), recv_ue, NULL);
+			if (iRCCE_irecv((char*) &tsc, sizeof(uint64_t), recv_ue, &recv_req) != iRCCE_SUCCESS) 
+				iRCCE_irecv_wait(&recv_req);
+
+			iRCCE_isend((char*) &tsc, sizeof(uint64_t), recv_ue, NULL);
+			iRCCE_isend_push();
+		}
+		break;
+	case ICC_TYPE_PINGRESPONSE: {
+			uint64_t start, end;
+			iRCCE_RECV_REQUEST recv_req;
+
+			if (iRCCE_irecv((char*) &start, sizeof(uint64_t), recv_ue, &recv_req) != iRCCE_SUCCESS)
+				iRCCE_irecv_wait(&recv_req);
+			end = rdtsc();
+			kprintf("Receive ping response. Ticks: %d\n", end-start);
+		}
+		break;
+	default:
+		kprintf("Receive unknown ICC message (%d)\n", header->type);
+	}
+}
+
+/*
+ * By entering this function, interrupts are already disables
+ * => No race by using the static variables
+ */
+void icc_check(void)
+{
+	static icc_header_t header[MAX_SCC_CORES];
+	static iRCCE_RECV_REQUEST request[MAX_SCC_CORES];
+	static int8_t pending[MAX_SCC_CORES] = {[0 ... MAX_SCC_CORES-1] = 0};
+	int i, ret;
+
+	// pushes the progress of non-blocking communication requests
+	iRCCE_isend_push();
+	iRCCE_irecv_push();
+
+	for(i=0; i<num_ues; i++) {
+		if (i == my_ue)
+			continue;
+
+		if (!pending[i]) {
+			ret = iRCCE_irecv((char*) (header+i), sizeof(icc_header_t), i, request+i);
+			if (ret == iRCCE_SUCCESS)
+				interpret_header(header+i, i);
+			else
+				pending[i] = 1;
+		} else {
+			ret = iRCCE_irecv_test(request+i, NULL);
+			if (ret == iRCCE_SUCCESS) {
+				interpret_header(header+i, i);
+				pending[i] = 0;
+			}
+		}
+	}	
 }
 
 #endif
diff --git a/include/metalsvm/tasks.h b/include/metalsvm/tasks.h
index 36c9fb9a..5329fc61 100644
--- a/include/metalsvm/tasks.h
+++ b/include/metalsvm/tasks.h
@@ -143,7 +143,7 @@ static inline void check_workqueues(void)
 	uint32_t flags = irq_nested_disable();
 
 #ifdef CONFIG_ROCKCREEK
-	check_icc();
+	icc_check();
 #endif
 
 	irq_nested_enable(flags);
diff --git a/include/metalsvm/time.h b/include/metalsvm/time.h
index 702207b9..aa47e47a 100644
--- a/include/metalsvm/time.h
+++ b/include/metalsvm/time.h
@@ -69,7 +69,13 @@ void timer_wait(unsigned int ticks);
  */
 uint64_t get_clock_tick(void);
 
-static inline void sleep(unsigned int i) { timer_wait(i*TIMER_FREQ); }
+/** @brief sleep some seconds
+ *
+ * This function sleeps some seconds
+ *
+ * @paran sec Amount of seconds to wait
+ */
+static inline void sleep(unsigned int sec) { timer_wait(sec*TIMER_FREQ); }
 
 #ifdef __cplusplus
 }
diff --git a/kernel/tests.c b/kernel/tests.c
index 08f3d45f..72869600 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -24,6 +24,9 @@
 #include <metalsvm/semaphore.h>
 #include <metalsvm/mailbox.h>
 #include <metalsvm/syscall.h>
+#ifdef CONFIG_ROCKCREEK
+#include <asm/icc.h>
+#endif
 
 static sem_t 		consuming, producing;
 static mailbox_int32_t	mbox;
@@ -83,6 +86,20 @@ static int STDCALL foo(void* arg)
 	return 42;
 }
 
+#ifdef CONFIG_ROCKCREEK
+static int STDCALL ping(void* arg)
+{
+	int i;
+
+	for(i=0; i<20; i++) {
+		icc_ping(1);
+		sleep(1);
+	}
+
+	return 0;
+}
+#endif
+
 static int STDCALL join_test(void* arg)
 {
 	tid_t 	id, ret;
@@ -112,6 +129,7 @@ int test_init(void)
 	//create_kernel_task(NULL, join_test, NULL);
 	//create_kernel_task(NULL, producer, NULL);
 	//create_kernel_task(NULL, consumer, NULL);
+	//create_kernel_task(NULL, ping, NULL);
 	//create_user_task(NULL, "/bin/hello", argv);
 	create_user_task(NULL, "/bin/tests", argv);
 	//create_user_task(NULL, "/bin/jacobi", argv);

From 4c370ad3774128917ff1f2f9b73248fbe5a4bf19 Mon Sep 17 00:00:00 2001
From: Marian Ohligs <marian.ohligs@rwth-aachen.de>
Date: Tue, 26 Apr 2011 16:43:57 +0200
Subject: [PATCH 34/36] Bug fixed in stdout: printf is working

some test changes in initrd_write
---
 drivers/stdout/stdout.c |  5 ++-
 fs/initrd.c             | 80 ++++++++++++++++++++++++++++++++++++++---
 include/metalsvm/fs.h   |  3 ++
 kernel/syscall.c        |  2 +-
 newlib/examples/hello.c | 18 +++-------
 newlib/examples/test    |  3 +-
 6 files changed, 90 insertions(+), 21 deletions(-)

diff --git a/drivers/stdout/stdout.c b/drivers/stdout/stdout.c
index 29fbb035..99d82ad7 100644
--- a/drivers/stdout/stdout.c
+++ b/drivers/stdout/stdout.c
@@ -33,7 +33,10 @@ static ssize_t stdout_read(vfs_node_t* node, uint8_t* buffer, size_t size, off_t
 
 static ssize_t stdout_write(vfs_node_t* node, uint8_t* buffer, size_t size,  off_t offset)
 {
-	size = kprintf("%s\0", buffer);
+	int i;
+	for (i = 0; i<size; i++, buffer++) {
+		kputchar(*buffer);
+	}
 	return size;
 }
 
diff --git a/fs/initrd.c b/fs/initrd.c
index 9f3f17da..747b2e9a 100644
--- a/fs/initrd.c
+++ b/fs/initrd.c
@@ -95,13 +95,85 @@ static ssize_t initrd_read(vfs_node_t* node, uint8_t* buffer, size_t size, off_t
  
 static ssize_t initrd_write(vfs_node_t* node, uint8_t* buffer, size_t size, off_t offset)
 {
+	uint32_t i, writtenbytes = 0, writebytes = 0;
+	char* data = NULL;
 	block_list_t* blist = &node->block_list;
-	char* data = (char*) blist->data[0];
 
-	//////////ACHTUNG keine Überprüfung der Blcoklängen etc.
-	memcpy(data, buffer, size);
+	do {
+		data = (char*) blist->data[0];
+		if ((size - writtenbytes) >= MAX_DATABLOCKS) 
+			writebytes = MAX_DATABLOCKS;
+		else
+			writebytes = size - writtenbytes;
+		
+		memcpy(data, buffer, writebytes);
+		writtenbytes += writebytes;
+		//kprintf("geschrieben: %i", writtenbytes);
 
-	return size;
+		if (!blist->next) {
+			blist->next = (block_list_t*) kmalloc(sizeof(block_list_t));
+			if (blist->next) {
+				memset(blist->next, 0x00, sizeof(block_list_t));
+			}
+		}
+		
+		blist = blist->next;
+	} while(size > writtenbytes);
+
+	return writtenbytes;
+
+/*
+	uint32_t i, pos = 0, found = 0;
+	char* data = NULL;
+	block_list_t* blist = &node->block_list;
+
+	kprintf("tatsachen offset %i\n", offset);	
+
+	// searching for the valid data block 
+	if (offset) {
+		pos = offset / node->block_size;
+		offset = offset % node->block_size;
+	}
+	kprintf("Pos: %i, Offset: %i, %i", pos, offset, node->block_size);
+
+
+
+	do {
+		for(i=0; i<MAX_DATABLOCKS && !data; i++) {
+			if (blist->data[i]) {
+				found++;
+				if (found > pos)
+					data = (char*) blist->data[i];
+					break;
+			}	
+		}
+		  if all blocks have already been used, we have  to allocate a new one 
+		if (!blist->next) {
+			blist->next = (block_list_t*) kmalloc(sizeof(block_list_t));
+			if (blist->next) {
+				kprintf("?");
+				memset(blist->next, 0x00, sizeof(block_list_t));
+			}
+		}
+
+		blist = blist->next;
+	} while(blist && !data);
+
+	if (BUILTIN_EXPECT(!data, 0))
+		return 0;
+*/
+	/* 
+	 * If the data block is not large engough, 
+	 * we copy only the rest of the current block.
+	 * The user has to restart the write operation
+	 * for the next block.
+         */
+/*	if (offset+size >= node->block_size)
+		size = node->block_size - offset;
+
+	memcpy(data + offset, buffer, size);
+
+*/	//return size;
 }
 
 
diff --git a/include/metalsvm/fs.h b/include/metalsvm/fs.h
index b9dbfb2e..58897aa2 100644
--- a/include/metalsvm/fs.h
+++ b/include/metalsvm/fs.h
@@ -196,6 +196,9 @@ vfs_node_t* findnode_fs(const char* name);
 /* @} */
 
 int null_init(vfs_node_t* node, const char* name);
+int stdin_init(vfs_node_t* node, const char* name);
+int stdout_init(vfs_node_t* node, const char* name);
+int stderr_init(vfs_node_t* node, const char* name);
 int initrd_init(void);
 
 #endif
diff --git a/kernel/syscall.c b/kernel/syscall.c
index 9fafcef0..85290fca 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -42,7 +42,7 @@ static int sys_write(int fd, const char *buf, size_t len)
 {
 	unsigned int wrotebytes;
 	wrotebytes = write_fs(per_core(current_task)->fildes_table[fd].node, (uint8_t*)buf, len, per_core(current_task)->fildes_table[fd].offset);
-	//kprintf("ins Dateis. geschr. -- fd:%i, Dateilaenge:%i, Dateiinhalt: %s \n", fd, wrotebytes, buf);
+	//kprintf("ins Dateis. geschr. -- fd:%i, Dateilaenge:%i, Schreiblaenge: %i, Dateiinhalt: %s \n", fd, len, wrotebytes, buf);
 	per_core(current_task)->fildes_table[fd].offset += wrotebytes;
 
 	return wrotebytes;
diff --git a/newlib/examples/hello.c b/newlib/examples/hello.c
index 861cb6f6..61f9efc5 100644
--- a/newlib/examples/hello.c
+++ b/newlib/examples/hello.c
@@ -29,26 +29,18 @@ int main(int argc, char** argv)
 {
 	//int i;
 	char* str = (char *)malloc(20 * sizeof(char));
-	char* str2 = (char *)malloc(20 * sizeof(char));
-
 	FILE* testfile;
 	testfile = fopen("/bin/test", "w+r");
 	setbuf(testfile, NULL);
 	fflush(NULL);
-	fread(str2, 1, 10, testfile);
-	fflush(NULL);
-	printf("Datei gelesen (/bin/test):%s\n", str2);
-	fflush(NULL);
+	fwrite("wsblablaxxxyyyyzzzzzz", 1, 19, testfile);
+	fclose(testfile);
 
+	testfile = fopen("/bin/test", "w+r");
 	setbuf(testfile, NULL);
+	fread(str, 1, 20, testfile);
 	fflush(NULL);
-	fwrite("wtest1", 1, 7, testfile);
-	setbuf(testfile, NULL);
-
-	fread(str, 1, 10, testfile);
-	fflush(NULL);
-
-	//printf("Aus Datei gelesen (/bin/test):%s\n", str);
+	printf("Aus Datei gelesen (/bin/test):%s\n", str);
 
 	return errno;
 }
diff --git a/newlib/examples/test b/newlib/examples/test
index 3d6ab3f2..d00491fd 100644
--- a/newlib/examples/test
+++ b/newlib/examples/test
@@ -1,2 +1 @@
-HalloXA!!
-
+1

From f785bcf9a4fd293952f2b25be090987c23f28096 Mon Sep 17 00:00:00 2001
From: Marian Ohligs <marian.ohligs@rwth-aachen.de>
Date: Tue, 26 Apr 2011 16:49:10 +0200
Subject: [PATCH 35/36] Fix Bug in stderr (same as stdout)

---
 drivers/stderr/stderr.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/stderr/stderr.c b/drivers/stderr/stderr.c
index effc5942..80b83300 100644
--- a/drivers/stderr/stderr.c
+++ b/drivers/stderr/stderr.c
@@ -33,7 +33,11 @@ static ssize_t stderr_read(vfs_node_t* node, uint8_t* buffer, size_t size, off_t
 
 static ssize_t stderr_write(vfs_node_t* node, uint8_t* buffer, size_t size,  off_t offset)
 {
-	kprintf("\nFehler: %s", buffer);
+	kprintf("\nFehler: ");
+	int i;
+	for (i = 0; i<size; i++, buffer++) {
+		kputchar(*buffer);
+	}
 	return size;
 }
 

From 2f6bf8219f61400a8afc0012af9e4d5e371500b8 Mon Sep 17 00:00:00 2001
From: Marian Ohligs <marian.ohligs@rwth-aachen.de>
Date: Tue, 26 Apr 2011 17:01:09 +0200
Subject: [PATCH 36/36] add some drivers Makefiles

---
 drivers/stderr/Makefile | 4 ++++
 drivers/stdin/Makefile  | 4 ++++
 drivers/stdout/Makefile | 4 ++++
 3 files changed, 12 insertions(+)
 create mode 100644 drivers/stderr/Makefile
 create mode 100644 drivers/stdin/Makefile
 create mode 100644 drivers/stdout/Makefile

diff --git a/drivers/stderr/Makefile b/drivers/stderr/Makefile
new file mode 100644
index 00000000..05d06d5c
--- /dev/null
+++ b/drivers/stderr/Makefile
@@ -0,0 +1,4 @@
+C_source := stderr.c
+MODULE := drivers_stderr
+
+include $(TOPDIR)/Makefile.inc
diff --git a/drivers/stdin/Makefile b/drivers/stdin/Makefile
new file mode 100644
index 00000000..f2429096
--- /dev/null
+++ b/drivers/stdin/Makefile
@@ -0,0 +1,4 @@
+C_source := stdin.c
+MODULE := drivers_stdin
+
+include $(TOPDIR)/Makefile.inc
diff --git a/drivers/stdout/Makefile b/drivers/stdout/Makefile
new file mode 100644
index 00000000..a7053f06
--- /dev/null
+++ b/drivers/stdout/Makefile
@@ -0,0 +1,4 @@
+C_source := stdout.c
+MODULE := drivers_stdout
+
+include $(TOPDIR)/Makefile.inc