From e2c546a782e89be59df0091648b062a325d03992 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 2 Aug 2011 07:17:49 -0700
Subject: [PATCH 01/37] first steps to realize a SVM subsystem

---
 arch/x86/include/asm/svm.h | 48 ++++++++++++++++++++++++++++++++++++++
 arch/x86/mm/Makefile       |  2 +-
 arch/x86/mm/svm.c          | 38 ++++++++++++++++++++++++++++++
 arch/x86/scc/RCCE_admin.c  |  2 +-
 4 files changed, 88 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/include/asm/svm.h
 create mode 100644 arch/x86/mm/svm.c

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
new file mode 100644
index 00000000..090d1072
--- /dev/null
+++ b/arch/x86/include/asm/svm.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2011 Stefan Lankes, Chair for Operating Systems,
+ *                               RWTH Aachen University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This file is part of MetalSVM. 
+ */
+
+#ifndef __ARCH_SVM_H__
+#define __ARCH_SVM_H__
+
+#include <metalsvm/stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @brief Memory allocator of the SVM subsystem.
+ *
+ * Like RCCE function, belongs svmmalloc to the synchronous 
+ * function. 
+ *
+ * @return Pointer to the new memory range
+ */
+void* svmmalloc(size_t size);
+
+/** @brief Frees memory, which is managed by the SVM subsystem
+ *
+ * Like RCCE function, belongs svmfree to the synchronous function. 
+ */
+void svmfree(void* addr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 2f9b1834..09392539 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,4 +1,4 @@
-C_source := page.c
+C_source := page.c svm.c
 MODULE := arch_x86_mm
 
 include $(TOPDIR)/Makefile.inc
diff --git a/arch/x86/mm/svm.c b/arch/x86/mm/svm.c
new file mode 100644
index 00000000..06e4d3d4
--- /dev/null
+++ b/arch/x86/mm/svm.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2011 Stefan Lankes, Chair for Operating Systems,
+ *                               RWTH Aachen University
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This file is part of MetalSVM. 
+ */
+
+#include <metalsvm/stddef.h>
+#include <metalsvm/stdio.h>
+#include <metalsvm/stdlib.h>
+#include <asm/svm.h>
+#ifdef CONFIG_ROCKCREEK
+#include <asm/RCCE_lib.h>
+#include <asm/SCC_API.h>
+#include <asm/icc.h>
+#endif
+
+void* svmmalloc(size_t size)
+{
+	return (void*) RCCE_shmalloc(size);
+}
+
+void svmfree(void* addr)
+{
+	RCCE_free((t_vcharp) addr);
+}
diff --git a/arch/x86/scc/RCCE_admin.c b/arch/x86/scc/RCCE_admin.c
index dc7db27d..0c9f9303 100644
--- a/arch/x86/scc/RCCE_admin.c
+++ b/arch/x86/scc/RCCE_admin.c
@@ -336,7 +336,7 @@ int RCCE_init(
   RCCE_malloc_init(RCCE_comm_buffer[RCCE_IAM],RCCE_BUFF_SIZE);
 #ifdef SHMADD
 
-  RCCE_shmalloc_init(map_region(NULL, RC_SHM_BUFFER_START()+RCCE_SHM_BUFFER_offset, RCCE_SHM_SIZE_MAX/PAGE_SIZE, MAP_KERNEL_SPACE|MAP_NO_CACHE), RCCE_SHM_SIZE_MAX);
+  RCCE_shmalloc_init(map_region(NULL, RC_SHM_BUFFER_START()+RCCE_SHM_BUFFER_offset, RCCE_SHM_SIZE_MAX/PAGE_SIZE, MAP_KERNEL_SPACE|MAP_MPE), RCCE_SHM_SIZE_MAX);
 #ifdef SHMDBG
   kprintf("\n%d:%s:%d: RCCE_SHM_BUFFER_offset, RCCE_SHM_SIZE_MAX: %x %x\n", RCCE_IAM, 
     __FILE__,__LINE__,RCCE_SHM_BUFFER_offset ,RCCE_SHM_SIZE_MAX);

From 86b874250ddbca879009d9a97bc4d4be9ac8872a Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Mon, 15 Aug 2011 01:08:19 -0700
Subject: [PATCH 02/37] remove compiling errors by disabling the LwIP stack

---
 kernel/tasks.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/tasks.c b/kernel/tasks.c
index 2dec1348..413ce994 100644
--- a/kernel/tasks.c
+++ b/kernel/tasks.c
@@ -243,7 +243,9 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg)
 
 			task_table[i].start_heap = 0;
 			task_table[i].end_heap = 0;
+#ifdef CONFIG_LWIP
 			task_table[i].lwip_err = 0;
+#endif
 			task_table[i].start_tick = get_clock_tick();
 			break;
 		}
@@ -309,7 +311,9 @@ int sys_fork(void)
 			task_table[i].start_tick = get_clock_tick();
 			task_table[i].start_heap = 0;
 			task_table[i].end_heap = 0;
+#ifdef CONFIG_LWIP
 			task_table[i].lwip_err = 0;
+#endif
 
 			ret = arch_fork(task_table+i);
 

From d641f5a2117e48fbc074babf771e316af0b52868 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Mon, 15 Aug 2011 02:53:01 -0700
Subject: [PATCH 03/37] fix bug in reading the FPGA registers to determine the
 size of the private memory

---
 mm/memory.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 8b12f4f3..efb1a0ae 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -152,8 +152,8 @@ int mmu_init(void)
 		}
 	} 
 #elif defined(CONFIG_ROCKCREEK)
-	/* of course, the first twenty slots belong to the private memory */
-	for(addr=0x00; addr<20*0x1000000; addr+=PAGE_SIZE) {
+	/* of course, the first slots belong to the private memory */
+	for(addr=0x00; addr<1*0x1000000; addr+=PAGE_SIZE) {
 		page_clear_mark(addr >> PAGE_SHIFT);
 		if (addr > addr + PAGE_SIZE)
 			break;
@@ -161,15 +161,6 @@ int mmu_init(void)
 		atomic_int32_inc(&total_available_pages);	
 	}
 
-	// Note: The last slot belongs always to the private memory.
-	for(addr=0xFF000000; addr<0xFFFFFFFF; addr+=PAGE_SIZE) {
-		page_clear_mark(addr >> PAGE_SHIFT);
-		if (addr > addr + PAGE_SIZE)
-			break;
-		atomic_int32_inc(&total_pages);
-		atomic_int32_inc(&total_available_pages);
-	}
-
 	/*
 	 * Mark the bootinfo as used.
 	 */
@@ -219,14 +210,14 @@ int mmu_init(void)
 	 * Now, we are able to read the FPGA registers and to
 	 * determine the number of slots for private memory.
 	 */
-	uint32_t slots = *((volatile uint32_t*) (FPGA_BASE + 0x8244));
+	uint32_t slots = *((volatile uint8_t*) (FPGA_BASE + 0x8244));
 	if (slots == 0)
-		slots = 21;
+		slots = 1;
 
 	kprintf("MetalSVM use %d slots for private memory\n", slots);
 
 	// define the residual private slots as free
-	for(addr=20*0x1000000; addr<(slots-1)*0x1000000; addr+=PAGE_SIZE) {
+	for(addr=1*0x1000000; addr<slots*0x1000000; addr+=PAGE_SIZE) {
 		page_clear_mark(addr >> PAGE_SHIFT);
 		if (addr > addr + PAGE_SIZE)
 			break;

From 329acebfcd7d644652267e24d19fcc89c7f174f6 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Mon, 15 Aug 2011 02:56:26 -0700
Subject: [PATCH 04/37] Merge Slots 0x00 and 0xff to a single 16MB slot

---
 tools/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/Makefile b/tools/Makefile
index baf8febe..624860a1 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -50,7 +50,7 @@ SCC: scc_bootinfo.bin scc_setup.bin reset_vector.bin initrd.img bin2obj
 	chmod a-x *.bin
 	. ./prepare.sh
 	./bin2obj -m load.map -o metalsvm.obj
-	sccMerge -noimage -m 8 -n 12 -force ./metalsvm.mt
+	sccMerge -noimage -1slot -m 8 -n 12 -force ./metalsvm.mt
 
 clean:
 	$(RM) -rf *.o *~ bin2obj make_initrd initrd.img *.bin *.obj *.hex *.elf obj 

From aacfcd31ed8c125bd8dd8d7d5af12278d3527b05 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Mon, 15 Aug 2011 04:49:38 -0700
Subject: [PATCH 05/37] do not longer support the sccMerge flag "-1slot"

---
 mm/memory.c    | 9 +++++++++
 tools/Makefile | 2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/mm/memory.c b/mm/memory.c
index efb1a0ae..6cad2520 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -161,6 +161,15 @@ int mmu_init(void)
 		atomic_int32_inc(&total_available_pages);	
 	}
 
+	// Note: The last slot belongs always to the private memory.
+	for(addr=0xFF000000; addr<0xFFFFFFFF; addr+=PAGE_SIZE) {
+		page_clear_mark(addr >> PAGE_SHIFT);
+		if (addr > addr + PAGE_SIZE)
+			break;
+		atomic_int32_inc(&total_pages);
+		atomic_int32_inc(&total_available_pages);
+	}
+
 	/*
 	 * Mark the bootinfo as used.
 	 */
diff --git a/tools/Makefile b/tools/Makefile
index 624860a1..baf8febe 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -50,7 +50,7 @@ SCC: scc_bootinfo.bin scc_setup.bin reset_vector.bin initrd.img bin2obj
 	chmod a-x *.bin
 	. ./prepare.sh
 	./bin2obj -m load.map -o metalsvm.obj
-	sccMerge -noimage -1slot -m 8 -n 12 -force ./metalsvm.mt
+	sccMerge -noimage -m 8 -n 12 -force ./metalsvm.mt
 
 clean:
 	$(RM) -rf *.o *~ bin2obj make_initrd initrd.img *.bin *.obj *.hex *.elf obj 

From c1cb54ae90c71245644ec30ecb7b0a88a88edd6d Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Mon, 15 Aug 2011 06:36:38 -0700
Subject: [PATCH 06/37] cosmetic changes

---
 arch/x86/include/asm/page.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 0874259a..343362b3 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -41,7 +41,7 @@
 #define _PAGE_BIT_PSE		7	/* 4 MB (or 2MB) page */
 #define _PAGE_BIT_PAT		7	/* on 4KB pages */
 #define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */
-#define _PAGE_BIT_RESERVED	9	/* mark a virtual address range as reserved */
+#define _PAGE_BIT_SVM		9	/* mark a virtual address range as used by the SVM system */
 #define _PAGE_BIT_PAT_LARGE	12	/* On 2MB or 1GB pages */
 
 /// Page is present
@@ -64,10 +64,10 @@
 #define PG_MPE		PG_PSE
 /// Global TLB entry (Pentium Pro and later)
 #define PG_GLOBAL	(1 << _PAGE_BIT_GLOBAL)
-/// This virtual address range is reserved as marked
-#define PG_RESERVED	(1 << _PAGE_BIT_RESERVED)
 /// Pattern flag
 #define PG_PAT		(1 << _PAGE_BIT_PAT)
+/// This virtual address range is used by SVM system as marked
+#define PG_SVM		(1 << _PAGE_BIT_SVM)
 /// Large page pattern flag
 #define PG_PAT_LARGE	(1 << _PAGE_BIT_PAT_LARGE)
 
@@ -152,7 +152,7 @@ int unmap_region(size_t viraddr, uint32_t npages);
  *
  * @param viraddr Desired virtual address
  * @param phyaddr Physical address to map from
- * @param npages The Region's size in pages
+ * @param npages The region's size in number of pages
  * @param flags Further page flags
  *
  * @return

From 7980fd5bd7e26e0ad9e6fe2c9c1cf679ba2098c6 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Mon, 15 Aug 2011 07:13:31 -0700
Subject: [PATCH 07/37] RCCE_shmalloc manages only physical memory regions

=> users have to map these regions in the virtual address space by their own
=> temporary disabling of mmnif
---
 arch/x86/scc/RCCE_admin.c | 4 ++--
 kernel/init.c             | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/scc/RCCE_admin.c b/arch/x86/scc/RCCE_admin.c
index 0c9f9303..c2f85b09 100644
--- a/arch/x86/scc/RCCE_admin.c
+++ b/arch/x86/scc/RCCE_admin.c
@@ -336,13 +336,13 @@ int RCCE_init(
   RCCE_malloc_init(RCCE_comm_buffer[RCCE_IAM],RCCE_BUFF_SIZE);
 #ifdef SHMADD
 
-  RCCE_shmalloc_init(map_region(NULL, RC_SHM_BUFFER_START()+RCCE_SHM_BUFFER_offset, RCCE_SHM_SIZE_MAX/PAGE_SIZE, MAP_KERNEL_SPACE|MAP_MPE), RCCE_SHM_SIZE_MAX);
+  RCCE_shmalloc_init(RC_SHM_BUFFER_START()+RCCE_SHM_BUFFER_offset, RCCE_SHM_SIZE_MAX);
 #ifdef SHMDBG
   kprintf("\n%d:%s:%d: RCCE_SHM_BUFFER_offset, RCCE_SHM_SIZE_MAX: %x %x\n", RCCE_IAM, 
     __FILE__,__LINE__,RCCE_SHM_BUFFER_offset ,RCCE_SHM_SIZE_MAX);
 #endif
 #else
-  RCCE_shmalloc_init(map_region(NULL, RC_SHM_BUFFER_START(), RCCE_SHM_SIZE_MAX/PAGE_SIZE, MAP_KERNEL_SPACE|MAP_NO_CACHE), RCCE_SHM_SIZE_MAX);
+  RCCE_shmalloc_init(map_region(RC_SHM_BUFFER_START(), RCCE_SHM_SIZE_MAX);
 #endif
 
   // initialize the (global) flag bookkeeping data structure
diff --git a/kernel/init.c b/kernel/init.c
index a29807ac..462578e1 100644
--- a/kernel/init.c
+++ b/kernel/init.c
@@ -153,7 +153,7 @@ int network_init(void)
 		}
 	}
 #else
-	mmnif_open();
+	//mmnif_open();
 #endif
 
 	// start echo and ping server
@@ -169,7 +169,7 @@ int network_shutdown(void)
 {
 #ifdef CONFIG_LWIP
 #ifdef CONFIG_ROCKCREEK
-	mmnif_close();
+	//mmnif_close();
 #elif defined(CONFIG_PCI)
 	dhcp_release(default_netif);
 	dhcp_stop(default_netif);

From 62e7ead997b4bcbd9bafe212ea08a12e80999447 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Mon, 15 Aug 2011 07:16:12 -0700
Subject: [PATCH 08/37] add SVM demo example and redesign of the SVM system

---
 arch/x86/include/asm/svm.h |  6 ++-
 arch/x86/mm/page.c         |  7 ++++
 arch/x86/mm/svm.c          | 52 ++++++++++++++++++++++---
 include/metalsvm/stdlib.h  |  3 ++
 kernel/tests.c             | 80 ++++++++++++++++++++++++++++++++++++--
 5 files changed, 138 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 090d1072..6c1e2d61 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -26,6 +26,8 @@
 extern "C" {
 #endif
 
+#ifdef CONFIG_ROCKCREEK
+
 /** @brief Memory allocator of the SVM subsystem.
  *
  * Like RCCE function, belongs svmmalloc to the synchronous 
@@ -39,7 +41,9 @@ void* svmmalloc(size_t size);
  *
  * Like RCCE function, belongs svmfree to the synchronous function. 
  */
-void svmfree(void* addr);
+void svmfree(void* addr, size_t size);
+
+#endif
 
 #ifdef __cplusplus
 }
diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index 6eadd3d1..353f0626 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -354,6 +354,10 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
 		if (flags & MAP_MPE)
 			pgt->entries[index] |= PG_MPE;
 #endif
+		if (flags & MAP_SVM)
+			pgt->entries[index] |= PG_SVM;
+		if (flags & MAP_NO_ACCESS)
+			pgt->entries[index] &= ~(PG_PRESENT|PG_RW);
 
 		if (flags & MAP_USER_SPACE)
 			atomic_int32_inc(&task->user_usage);
@@ -395,6 +399,9 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags)
 				phyaddr = pgt->entries[index2] & 0xFFFFF000;
 				newflags = pgt->entries[index2] & 0xFFF;  // get old flags
 
+				if ((newflags & PG_SVM) && !(newflags & PG_PRESENT) && (flags & (VMA_WRITE|VMA_READ)))
+					newflags |= PG_PRESENT;
+
 				// update flags
 				if (!(flags & VMA_WRITE))
 					newflags &= ~PG_RW;
diff --git a/arch/x86/mm/svm.c b/arch/x86/mm/svm.c
index 06e4d3d4..2fecbba7 100644
--- a/arch/x86/mm/svm.c
+++ b/arch/x86/mm/svm.c
@@ -20,19 +20,61 @@
 #include <metalsvm/stddef.h>
 #include <metalsvm/stdio.h>
 #include <metalsvm/stdlib.h>
-#include <asm/svm.h>
+#include <metalsvm/mmu.h>
+#include <metalsvm/page.h>
+#include <asm/irqflags.h>
 #ifdef CONFIG_ROCKCREEK
 #include <asm/RCCE_lib.h>
 #include <asm/SCC_API.h>
 #include <asm/icc.h>
-#endif
+#include <asm/svm.h>
 
 void* svmmalloc(size_t size)
 {
-	return (void*) RCCE_shmalloc(size);
+	size_t phyaddr;
+	size_t viraddr;
+	uint32_t flags;
+	uint32_t map_flags = MAP_KERNEL_SPACE|MAP_MPE|MAP_SVM;
+
+	// currently, we allocate memory in page size granulation
+	size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
+
+	// iRCCE is not thread save => disable interrupts
+	flags = irq_nested_disable();
+	phyaddr = (size_t) RCCE_shmalloc(size);
+
+	if (RCCE_ue())
+		map_flags |= MAP_NO_ACCESS;
+	irq_nested_enable(flags);
+
+	viraddr = map_region(0, phyaddr, size >> PAGE_SHIFT, map_flags);
+
+	//kprintf("shmmalloc: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, viraddr, size);
+
+	return (void*) viraddr;
 }
 
-void svmfree(void* addr)
+void svmfree(void* addr, size_t size)
 {
-	RCCE_free((t_vcharp) addr);
+	size_t phyaddr;
+	uint32_t flags;
+
+	if (BUILTIN_EXPECT(!addr || !size, 0))
+		return;
+
+	phyaddr = virt_to_phys((size_t) addr);
+
+	// currently, we allocate memory in page size granulation
+	size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
+
+	//kprintf("shmmalloc: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, addr, size);
+
+	unmap_region((size_t) addr, size >> PAGE_SHIFT);
+
+	// iRCCE is not thread save => disable interrupts
+	flags = irq_nested_disable();
+	RCCE_shfree((t_vcharp) phyaddr);
+	irq_nested_enable(flags);
 }
+
+#endif
diff --git a/include/metalsvm/stdlib.h b/include/metalsvm/stdlib.h
index ad1cecdb..3849745e 100644
--- a/include/metalsvm/stdlib.h
+++ b/include/metalsvm/stdlib.h
@@ -48,6 +48,9 @@ extern "C" {
 #ifdef CONFIG_ROCKCREEK
 #define MAP_MPE			(1 << 8)
 #endif
+#define MAP_SVM			(1 << 9)
+#define MAP_NO_ACCESS		(1 << 10)
+
 void NORETURN abort(void);
 
 /** @brief Kernel's memory allocator function.
diff --git a/kernel/tests.c b/kernel/tests.c
index 5b67a06d..a5113c09 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -24,12 +24,14 @@
 #include <metalsvm/semaphore.h>
 #include <metalsvm/mailbox.h>
 #include <metalsvm/syscall.h>
+#include <metalsvm/vma.h>
 #ifdef CONFIG_ROCKCREEK
 #include <asm/icc.h>
 #include <asm/RCCE.h>
 #include <asm/RCCE_lib.h>
 #include <asm/iRCCE.h>
 #include <asm/iRCCE_lib.h>
+#include <asm/svm.h>
 
 #include <asm/SCC_API.h>
 #include <lwip/sockets.h>
@@ -111,6 +113,75 @@ int mail_ping(void* arg) {
 
 	return 0;
 }
+
+static inline void cache_invalidate(void) 
+{
+	asm volatile ( ".byte 0x0f; .byte 0x0a;\n" ); // CL1FLUSHMB
+}
+
+#define N	1024
+
+volatile static int* A[N];
+volatile static int* B[N];
+volatile static int* C[N];
+
+static int svm_test(void *arg)
+{
+	uint32_t i, j, k, flags;
+	int my_ue, num_ues;
+
+	// iRCCE is not thread save => disable interrupts
+	flags = irq_nested_disable();
+	RCCE_barrier(&RCCE_COMM_WORLD);
+	my_ue = RCCE_ue();
+	num_ues = RCCE_num_ues();
+	irq_nested_enable(flags);
+
+	// allocate and initialize SVM region
+	A[0] = (int*) svmmalloc(3*N*N*sizeof(int));
+	if (!my_ue)
+		memset((void*) A[0], 0x00, 3*N*N*sizeof(int));
+
+	// initialize matrices
+	for(i=0; i<N; i++) {
+		A[i] = A[0] + i*N;
+		B[i] = A[0] + i*N + N*N;
+		C[i] = A[0] + i*N + 2*N*N;
+	}
+	if (!my_ue) {
+		for(i=0; i<N; i++) {
+			A[i][i] = 1;
+			for(j=0; j<N; j++)
+				B[i][j] = j;
+		}
+	}
+
+	// CL1FLUSH
+	cache_invalidate();
+
+	// Now, we need only read access on A and B
+	change_page_permissions(A[0], A[0]+2*N*N, VMA_CACHEABLE|VMA_READ);
+
+	// iRCCE is not thread save => disable interrupts
+	flags = irq_nested_disable();
+	RCCE_barrier(&RCCE_COMM_WORLD);
+	irq_nested_enable(flags);
+
+	// start calculation
+	for(i=my_ue*(N/num_ues); i<(my_ue+1)*(N/num_ues); i++)
+		for(j=0; j<N; j++)
+			for(k=0; k<N; k++)
+				; //C[i][j] = A[i][k] * B[k][j];
+
+	// iRCCE is not thread save => disable interrupts
+	flags = irq_nested_disable();
+	RCCE_barrier(&RCCE_COMM_WORLD);
+	irq_nested_enable(flags);
+
+	svmfree(A[0], 3*N*sizeof(int));
+
+	return 0;
+}
 #endif
 
 static int join_test(void* arg)
@@ -273,16 +344,17 @@ int test_init(void)
 //		create_kernel_task(NULL,client_task,NULL);
 #endif
 
-	create_kernel_task(NULL, foo, "Hello from foo1");
-	create_kernel_task(NULL, join_test, NULL);
+	//create_kernel_task(NULL, foo, "Hello from foo1");
+	//create_kernel_task(NULL, join_test, NULL);
 	//create_kernel_task(NULL, producer, NULL);
 	//create_kernel_task(NULL, consumer, NULL);
 	//create_kernel_task(NULL, mail_ping, NULL);
+	create_kernel_task(NULL, svm_test, NULL);
 	//create_user_task(NULL, "/bin/hello", argv);
-	create_user_task(NULL, "/bin/tests", argv);
+	//create_user_task(NULL, "/bin/tests", argv);
 	//create_user_task(NULL, "/bin/jacobi", argv);
 	//create_user_task(NULL, "/bin/jacobi", argv);
-	create_user_task(NULL, "/bin/server", server_argv);
+	//create_user_task(NULL, "/bin/server", server_argv);
 	//sleep(5);
 	//create_user_task(NULL, "/bin/client", client_argv);
 

From c8e22fe08e19ae2618548f15d5d26223a00e5a87 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Mon, 15 Aug 2011 08:01:47 -0700
Subject: [PATCH 09/37] add faster memcpy function for the SCC

---
 arch/x86/include/asm/string.h | 55 +++++++++++++++++++++++++++++++++++
 arch/x86/scc/scc_memcpy.h     |  9 +++++-
 kernel/tests.c                |  6 ++--
 3 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h
index 51d288a5..fd6480e0 100644
--- a/arch/x86/include/asm/string.h
+++ b/arch/x86/include/asm/string.h
@@ -35,6 +35,59 @@ void copy_page_physical(void* dest, const void * src);
 
 #ifdef HAVE_ARCH_MEMCPY
 
+#ifdef CONFIG_ROCKCREEK
+/** @brief Fast procedure to get a byte range from RAM into on-die memory.
+ *
+ * A write access, which cache line is not present, doesn't perform (on the
+ * current SCC architecture) a cache line fill. Therefore, the core writes 
+ * in this case directly to the memory. 
+ *
+ * The following function copies by prefetching its destintation. Therefore, 
+ * the  function avoids the bad behavior of a "write miss".
+ *
+ * @param dest Destination address
+ * @param src Source address
+ * @param count Range size in bytes
+ */ 
+inline static void *memcpy(void *dest, const void *src, size_t count)
+{
+	int32_t h, i, j, k, l, m;
+
+	asm volatile   ("cld;\n\t"
+			"1: cmpl $0, %%eax ; je 3f\n\t"
+			"movl (%%esi), %%ecx\n\t"
+			"movl (%%edi), %%edx\n\t"
+			"cmpl $1, %%eax ; je 2f\n\t"
+			"movl 32(%%esi), %%ecx\n\t"
+			"movl 32(%%edi), %%edx\n\t"
+			"2: movl 0(%%esi), %%ecx\n\t"
+			"movl 4(%%esi), %%edx\n\t"
+			"movl %%ecx, 0(%%edi)\n\t"
+			"movl %%edx, 4(%%edi)\n\t"
+			"movl 8(%%esi), %%ecx\n\t"
+			"movl 12(%%esi), %%edx\n\t"
+			"movl %%ecx, 8(%%edi)\n\t"
+			"movl %%edx, 12(%%edi)\n\t"
+			"movl 16(%%esi), %%ecx\n\t"
+			"movl 20(%%esi), %%edx\n\t"
+			"movl %%ecx, 16(%%edi)\n\t"
+			"movl %%edx, 20(%%edi)\n\t"
+			"movl 24(%%esi), %%ecx\n\t"
+			"movl 28(%%esi), %%edx\n\t"
+			"movl %%ecx, 24(%%edi)\n\t"
+			"movl %%edx, 28(%%edi)\n\t"
+			"addl $32, %%esi\n\t"
+			"addl $32, %%edi\n\t"
+			"dec %%eax ; jmp 1b\n\t"
+			"3: movl %%ebx, %%ecx\n\t"
+			"movl (%%edi), %%edx\n\t"
+			"andl $31, %%ecx\n\t"
+			"rep ; movsb\n\t":"=&a" (h), "=&D"(i), "=&S"(j), "=&b"(k), "=&c"(l), "=&d"(m)
+			: "0"(count / 32), "1"(dest), "2"(src), "3"(count) : "memory");
+
+        return dest;
+}
+#else
 /** @brief Copy a byte range from source to dest
  *
  * @param dest Destination address
@@ -60,6 +113,8 @@ inline static void *memcpy(void* dest, const void *src, size_t count)
 }
 #endif
 
+#endif
+
 #ifdef HAVE_ARCH_MEMSET
 
 /** @brief Repeated write of a value to a whole range of bytes
diff --git a/arch/x86/scc/scc_memcpy.h b/arch/x86/scc/scc_memcpy.h
index 31eb47c1..8748a496 100644
--- a/arch/x86/scc/scc_memcpy.h
+++ b/arch/x86/scc/scc_memcpy.h
@@ -85,7 +85,13 @@ inline static void *memcpy_get(void *dest, const void *src, size_t count)
 	return dest;
 }
 
-
+#if 1
+/* 
+ * In our kernel, we didn't want to use FPU registers.
+ * Therefore, we use standard memcpy routine
+ */
+#define memcpy_put 	memcpy
+#else
 /** @brief Fast procedure to get a byte range from on-die memory into RAM.
  *
  * If the destination is located on on-die memory (MPB), classical prefetching 
@@ -166,6 +172,7 @@ inline static void *memcpy_put(void *dest, const void *src, size_t count)
 
 	return dest;
 }
+#endif
 
 #endif
 
diff --git a/kernel/tests.c b/kernel/tests.c
index 5b67a06d..67c3a430 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -274,15 +274,15 @@ int test_init(void)
 #endif
 
 	create_kernel_task(NULL, foo, "Hello from foo1");
-	create_kernel_task(NULL, join_test, NULL);
+	//create_kernel_task(NULL, join_test, NULL);
 	//create_kernel_task(NULL, producer, NULL);
 	//create_kernel_task(NULL, consumer, NULL);
 	//create_kernel_task(NULL, mail_ping, NULL);
 	//create_user_task(NULL, "/bin/hello", argv);
-	create_user_task(NULL, "/bin/tests", argv);
+	//create_user_task(NULL, "/bin/tests", argv);
 	//create_user_task(NULL, "/bin/jacobi", argv);
 	//create_user_task(NULL, "/bin/jacobi", argv);
-	create_user_task(NULL, "/bin/server", server_argv);
+	//create_user_task(NULL, "/bin/server", server_argv);
 	//sleep(5);
 	//create_user_task(NULL, "/bin/client", client_argv);
 

From a0bae35a884091d22c26bd7f7e47b057e59fec67 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 16 Aug 2011 03:29:54 -0700
Subject: [PATCH 10/37] add untested prototype of an SVM system

---
 arch/x86/include/asm/icc.h       |   2 +-
 arch/x86/include/asm/processor.h |   9 +-
 arch/x86/include/asm/svm.h       |  22 +++++
 arch/x86/mm/page.c               |  22 ++++-
 arch/x86/mm/svm.c                | 149 +++++++++++++++++++++++++++++++
 arch/x86/scc/icc.c               |   6 +-
 include/metalsvm/vma.h           |   9 +-
 kernel/main.c                    |   2 +
 kernel/tests.c                   |  12 ++-
 9 files changed, 220 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/icc.h b/arch/x86/include/asm/icc.h
index 9cca3390..33316d2f 100644
--- a/arch/x86/include/asm/icc.h
+++ b/arch/x86/include/asm/icc.h
@@ -40,7 +40,7 @@ typedef struct {
 extern bootinfo_t* bootinfo;
 
 #define ICC_TAG_IP		0
-#define ICC_TAG_SVM		1
+#define ICC_TAG_SVMREQUEST	1
 #define ICC_TAG_PINGREQUEST	2
 #define ICC_TAG_PINGRESPONSE	3
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index da3b3556..5d79e4ac 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -106,7 +106,7 @@ inline static void flush_cache(void) {
  * The invd asm instruction which invalidates cache without writing back
  * is used here
  */
-inline static void invalid_cache(void) {
+inline static void invalidate_cache(void) {
 	asm volatile ("invd");
 }
 
@@ -123,6 +123,13 @@ inline static int get_return_value(void) {
 	return ret;
 }
 
+#ifdef CONFIG_ROCKCREEK
+static inline void invalidate_cl1(void)
+{
+	asm volatile ( ".byte 0x0f; .byte 0x0a;\n" ); // CL1FLUSHMB
+}
+#endif
+
 /* Force strict CPU ordering */
 #ifdef CONFIG_ROCKCREEK
 inline static void mb(void) { asm volatile ("lock; addl $0,0(%%esp)" ::: "memory"); }
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 6c1e2d61..667d10a2 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -28,6 +28,14 @@ extern "C" {
 
 #ifdef CONFIG_ROCKCREEK
 
+/** @brief Init routine of the SVM subsystem 
+ *
+ * @return
+ * - 0 on success
+ * - -ENOMEM not enough memory
+ */
+int svm_init(void);
+
 /** @brief Memory allocator of the SVM subsystem.
  *
  * Like RCCE function, belongs svmmalloc to the synchronous 
@@ -43,6 +51,20 @@ void* svmmalloc(size_t size);
  */
 void svmfree(void* addr, size_t size);
 
+/** @brief Request for exlusive access 
+ *
+ * @return
+ * - 0 on success
+ */
+int svm_access_request(size_t addr);
+
+/** @brief emit page to core ue
+ *
+ * @return 
+ * - 0 on success
+ */
+int svm_emit_page(size_t addr, int ue);
+
 #endif
 
 #ifdef __cplusplus
diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index 353f0626..5577e808 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -34,6 +34,7 @@
 #ifdef CONFIG_ROCKCREEK
 #include <asm/RCCE_lib.h>
 #include <asm/SCC_API.h>
+#include <asm/svm.h>
 #include <asm/icc.h>
 #endif
 
@@ -357,7 +358,7 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
 		if (flags & MAP_SVM)
 			pgt->entries[index] |= PG_SVM;
 		if (flags & MAP_NO_ACCESS)
-			pgt->entries[index] &= ~(PG_PRESENT|PG_RW);
+			pgt->entries[index] &= ~PG_PRESENT;
 
 		if (flags & MAP_USER_SPACE)
 			atomic_int32_inc(&task->user_usage);
@@ -399,8 +400,10 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags)
 				phyaddr = pgt->entries[index2] & 0xFFFFF000;
 				newflags = pgt->entries[index2] & 0xFFF;  // get old flags
 
-				if ((newflags & PG_SVM) && !(newflags & PG_PRESENT) && (flags & (VMA_WRITE|VMA_READ)))
+				if ((newflags & PG_SVM) && !(newflags & PG_PRESENT) && (flags & (VMA_READ|VMA_WRITE) && !(flags & VMA_NOACCESS)))
 					newflags |= PG_PRESENT;
+				if ((newflags & PG_SVM) && (newflags & PG_PRESENT) && (flags & VMA_NOACCESS))
+					newflags &= ~PG_PRESENT;
 
 				// update flags
 				if (!(flags & VMA_WRITE))
@@ -598,8 +601,11 @@ int print_paging_tree(size_t viraddr)
 static void pagefault_handler(struct state *s)
 {
 	task_t* task = per_core(current_task);
+	page_dir_t* pgd = task->pgd;
+	page_table_t* pgt = NULL;
 	size_t viraddr = read_cr2();
 	size_t phyaddr;
+	uint32_t index1, index2;
 
 	if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) {
 		viraddr = viraddr & 0xFFFFF000;
@@ -617,6 +623,18 @@ static void pagefault_handler(struct state *s)
 		put_page(phyaddr);
 	}
 
+	index1 = viraddr >> 22;
+	index2 = (viraddr >> 12) & 0x3FF;
+
+	if (pgd)
+                pgt = (page_table_t*) (pgd->entries[index1] & 0xFFFFF000);
+	if (!pgt)
+		goto default_handler;
+
+	if (pgt->entries[index2] & PG_SVM)
+		if (!svm_access_request(viraddr))
+			return;
+
 default_handler:
 	kprintf("PAGE FAULT: Task %u got page fault at %p (irq %d, cs:eip 0x%x:0x%x)\n", task->id, viraddr, s->int_no, s->cs, s->eip);
 	kprintf("Register state: eax = 0x%x, ebx = 0x%x, ecx = 0x%x, edx = 0x%x, edi = 0x%x, esi = 0x%x, ebp = 0x%x, esp = 0x%x\n", 
diff --git a/arch/x86/mm/svm.c b/arch/x86/mm/svm.c
index 2fecbba7..0b5e0ae0 100644
--- a/arch/x86/mm/svm.c
+++ b/arch/x86/mm/svm.c
@@ -22,13 +22,113 @@
 #include <metalsvm/stdlib.h>
 #include <metalsvm/mmu.h>
 #include <metalsvm/page.h>
+#include <metalsvm/errno.h>
 #include <asm/irqflags.h>
 #ifdef CONFIG_ROCKCREEK
+#include <asm/RCCE.h>
 #include <asm/RCCE_lib.h>
+#include <asm/iRCCE.h>
 #include <asm/SCC_API.h>
 #include <asm/icc.h>
 #include <asm/svm.h>
 
+#define SHARED_PAGES		(RCCE_SHM_SIZE_MAX >> PAGE_SHIFT)
+#define OWNER_SIZE		((SHARED_PAGES * sizeof(uint8_t) + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
+
+t_vcharp RC_SHM_BUFFER_START();
+
+/*
+ * This array describes the owner of a specific page.
+ * Only the owner of a page is able to change the possession.
+ * => No lock is needded.
+ */
+static volatile uint8_t*	page_owner = NULL;
+
+// helper array to convert a physical to a virtual address
+static size_t phys2virt[SHARED_PAGES] = {[0 ... SHARED_PAGES-1] = 0};
+static size_t shmbegin = 0;
+static int my_ue = 0;
+
+int svm_init(void)
+{
+	size_t phyaddr;
+	uint32_t flags;
+
+	// iRCCE is not thread save => disable interrupts
+	flags = irq_nested_disable();
+	my_ue = RCCE_ue();
+	shmbegin = (size_t)RC_SHM_BUFFER_START();
+	phyaddr = (size_t) RCCE_shmalloc(OWNER_SIZE);
+	irq_nested_enable(flags);
+	if (BUILTIN_EXPECT(!phyaddr, 0))
+		return -ENOMEM;
+
+	page_owner = (uint8_t*) map_region(0, phyaddr, OWNER_SIZE >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE/*MAP_MPE*/|MAP_SVM);
+	if (BUILTIN_EXPECT(!page_owner, 0)) {
+		flags = irq_nested_disable();
+		RCCE_shfree((t_vcharp) phyaddr);
+		irq_nested_enable(flags);
+		return -ENOMEM;
+	}
+
+	// per default is core 0 owner
+	if (!my_ue)
+		memset((void*)page_owner, 0x00, OWNER_SIZE);
+
+	// iRCCE is not thread save => disable interrupts
+	flags = irq_nested_disable();
+	RCCE_barrier(&RCCE_COMM_WORLD);
+	irq_nested_enable(flags);
+	
+	return 0;
+}
+
+/*
+ * This function is called by the pagefault handler
+ * => the interrupt flags is already cleared 
+ */
+int svm_access_request(size_t addr)
+{
+	size_t phyaddr = virt_to_phys(addr);
+	uint32_t pageid;
+	int remote_rank;
+	uint8_t payload[iRCCE_MAIL_HEADER_PAYLOAD];
+
+	if (phyaddr < shmbegin)
+		return -EINVAL;
+	if (phyaddr >= shmbegin + RCCE_SHM_SIZE_MAX)
+		return -EINVAL;
+	pageid = (phyaddr-shmbegin) >> PAGE_SHIFT;
+
+	invalidate_cl1();
+	if (page_owner[pageid] == my_ue)
+		return 0;
+
+	kprintf("send access request to %d of 0x%x\n", remote_rank, phyaddr);
+
+	remote_rank = page_owner[pageid];
+	((size_t*) payload)[0] = my_ue;
+	((size_t*) payload)[1] = phyaddr;
+
+	/* send ping request */
+	iRCCE_mail_send(sizeof(size_t), ICC_TAG_SVMREQUEST, 0, payload, remote_rank);
+
+        NOP8;
+        icc_send_irq(remote_rank);
+
+	/* check for incoming messages */
+	icc_mail_check();
+
+	invalidate_cl1();
+	while (page_owner[pageid] != my_ue)
+	{
+		NOP4;
+		invalidate_cl1();
+	};
+
+	return change_page_permissions(addr, addr+PAGE_SIZE, VMA_READ|VMA_WRITE|VMA_CACHEABLE);
+}
+
 void* svmmalloc(size_t size)
 {
 	size_t phyaddr;
@@ -46,8 +146,11 @@ void* svmmalloc(size_t size)
 	if (RCCE_ue())
 		map_flags |= MAP_NO_ACCESS;
 	irq_nested_enable(flags);
+	if (BUILTIN_EXPECT(!phyaddr, 0))
+		return NULL;
 
 	viraddr = map_region(0, phyaddr, size >> PAGE_SHIFT, map_flags);
+	phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT] = viraddr;
 
 	//kprintf("shmmalloc: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, viraddr, size);
 
@@ -70,6 +173,7 @@ void svmfree(void* addr, size_t size)
 	//kprintf("shmmalloc: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, addr, size);
 
 	unmap_region((size_t) addr, size >> PAGE_SHIFT);
+	phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT] = 0;
 
 	// iRCCE is not thread save => disable interrupts
 	flags = irq_nested_disable();
@@ -77,4 +181,49 @@ void svmfree(void* addr, size_t size)
 	irq_nested_enable(flags);
 }
 
+/*
+ * This function is called by icc_mail_check.
+ * => Interrupt flag is alread cleared.
+ */
+int svm_emit_page(size_t phyaddr, int ue)
+{	
+	uint32_t pageid;
+
+	kprintf("Try to emit page 0x%x to %d\n", phyaddr, ue);
+
+	if (phyaddr < shmbegin)
+		return -EINVAL;
+	if (phyaddr >= shmbegin + RCCE_SHM_SIZE_MAX)
+		return -EINVAL;
+	pageid = (phyaddr-shmbegin) >> PAGE_SHIFT;
+
+	invalidate_cl1();
+	if (page_owner[pageid] != my_ue) {
+		// Core is nor owner => forward request to new owner
+		int remote_rank;
+		uint8_t payload[iRCCE_MAIL_HEADER_PAYLOAD];
+
+		kprintf("Ups, core %d is not owner of page 0x%x\n", my_ue, phyaddr);
+
+		remote_rank = page_owner[pageid];
+		((size_t*) payload)[0] = ue;
+		((size_t*) payload)[1] = phyaddr;
+
+		/* send ping request */
+		iRCCE_mail_send(sizeof(size_t), ICC_TAG_SVMREQUEST, 0, payload, remote_rank);
+
+		 NOP8;
+		icc_send_irq(remote_rank);
+	} else {
+		size_t viraddr = phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT];
+
+		change_page_permissions(viraddr, viraddr+PAGE_SIZE, VMA_NOACCESS|VMA_READ|VMA_CACHEABLE);
+
+		page_owner[pageid] = ue;
+		invalidate_cl1();
+	}
+
+	return 0;
+}
+
 #endif
diff --git a/arch/x86/scc/icc.c b/arch/x86/scc/icc.c
index 75bfdc78..2785f6d6 100644
--- a/arch/x86/scc/icc.c
+++ b/arch/x86/scc/icc.c
@@ -25,6 +25,7 @@
 #include <asm/iRCCE.h>
 #include <asm/SCC_API.h>
 #include <asm/icc.h>
+#include <asm/svm.h>
 
 #include <net/mmnif.h>
 
@@ -83,7 +84,7 @@ static void intr_handler(struct state *s)
 	int tmp, z;
 
 #ifdef CONFIG_LWIP
-	mmnif_irqhandler();
+//	mmnif_irqhandler();
 #endif
 
 	z = Z_PID(RC_COREID[my_ue]);
@@ -279,6 +280,9 @@ void icc_mail_check(void)
 			timer = rdtsc() - *((uint64_t*) header->payload);
 			kprintf( "Response received in %d ticks!\n", timer );   
 			break;
+		case ICC_TAG_SVMREQUEST:
+			svm_emit_page(((size_t*) header->payload)[1], ((size_t*) header->payload)[0]);
+			break;
 		default:
 			kprintf("Invalid mail: tag = %d\n", header->tag);
 			break;
diff --git a/include/metalsvm/vma.h b/include/metalsvm/vma.h
index 449e81da..74c63233 100644
--- a/include/metalsvm/vma.h
+++ b/include/metalsvm/vma.h
@@ -32,10 +32,11 @@
 extern "C" {
 #endif
 
-#define VMA_READ	0x01
-#define VMA_WRITE	0x02
-#define VMA_EXECUTE	0x04
-#define VMA_CACHEABLE	0x08
+#define VMA_READ	(1 << 0)
+#define VMA_WRITE	(1 << 1)
+#define VMA_EXECUTE	(1 << 2)
+#define VMA_CACHEABLE	(1 << 3)
+#define VMA_NOACCESS	(1 << 4)
 
 struct vma;
 
diff --git a/kernel/main.c b/kernel/main.c
index f89d2bc7..78594caa 100644
--- a/kernel/main.c
+++ b/kernel/main.c
@@ -32,6 +32,7 @@
 #include <asm/kb.h>
 #ifdef CONFIG_ROCKCREEK
 #include <asm/icc.h>
+#include <asm/svm.h>
 #endif
 
 /* 
@@ -75,6 +76,7 @@ int main(void)
 	mmu_init();
 #ifdef CONFIG_ROCKCREEK
 	icc_init();
+	svm_init();
 #endif
    	initrd_init();
 
diff --git a/kernel/tests.c b/kernel/tests.c
index a5113c09..bce1c56f 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -25,6 +25,7 @@
 #include <metalsvm/mailbox.h>
 #include <metalsvm/syscall.h>
 #include <metalsvm/vma.h>
+#include <metalsvm/page.h>
 #ifdef CONFIG_ROCKCREEK
 #include <asm/icc.h>
 #include <asm/RCCE.h>
@@ -157,28 +158,31 @@ static int svm_test(void *arg)
 	}
 
 	// CL1FLUSH
-	cache_invalidate();
+	invalidate_cl1();
 
 	// Now, we need only read access on A and B
-	change_page_permissions(A[0], A[0]+2*N*N, VMA_CACHEABLE|VMA_READ);
+	change_page_permissions((size_t) A[0], (size_t) (A[0]+2*N*N), VMA_CACHEABLE|VMA_READ);
 
 	// iRCCE is not thread save => disable interrupts
 	flags = irq_nested_disable();
 	RCCE_barrier(&RCCE_COMM_WORLD);
 	irq_nested_enable(flags);
 
+	if (!my_ue)
+		kprintf("Start calculation...\n");
+
 	// start calculation
 	for(i=my_ue*(N/num_ues); i<(my_ue+1)*(N/num_ues); i++)
 		for(j=0; j<N; j++)
 			for(k=0; k<N; k++)
-				; //C[i][j] = A[i][k] * B[k][j];
+				C[i][j] = A[i][k] * B[k][j];
 
 	// iRCCE is not thread save => disable interrupts
 	flags = irq_nested_disable();
 	RCCE_barrier(&RCCE_COMM_WORLD);
 	irq_nested_enable(flags);
 
-	svmfree(A[0], 3*N*sizeof(int));
+	svmfree((void*) A[0], 3*N*sizeof(int));
 
 	return 0;
 }

From 6e55a3a8747bf959c3f378b09e4001fbed938bc9 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 16 Aug 2011 16:08:04 -0700
Subject: [PATCH 11/37] add some debug messages and remove obsolete macros

---
 arch/x86/include/asm/page.h       |  3 ---
 arch/x86/mm/page.c                |  3 +++
 arch/x86/mm/svm.c                 | 15 ++++++++-----
 arch/x86/scc/iRCCE_get.c          |  3 ++-
 arch/x86/scc/iRCCE_put.c          |  3 ++-
 arch/x86/scc/icc.c                |  5 +++--
 arch/x86/scc/scc_memcpy.h         | 36 +++++--------------------------
 include/metalsvm/config.h.example |  1 +
 kernel/tests.c                    | 11 +++++-----
 9 files changed, 32 insertions(+), 48 deletions(-)

diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 343362b3..5a07a76d 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -42,7 +42,6 @@
 #define _PAGE_BIT_PAT		7	/* on 4KB pages */
 #define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */
 #define _PAGE_BIT_SVM		9	/* mark a virtual address range as used by the SVM system */
-#define _PAGE_BIT_PAT_LARGE	12	/* On 2MB or 1GB pages */
 
 /// Page is present
 #define PG_PRESENT	(1 << _PAGE_BIT_PRESENT)
@@ -68,8 +67,6 @@
 #define PG_PAT		(1 << _PAGE_BIT_PAT)
 /// This virtual address range is used by SVM system as marked
 #define PG_SVM		(1 << _PAGE_BIT_SVM)
-/// Large page pattern flag
-#define PG_PAT_LARGE	(1 << _PAGE_BIT_PAT_LARGE)
 
 /// This is a whole set of flags (PRESENT,RW,ACCESSED,DIRTY) for kernelspace tables
 #define KERN_TABLE	(PG_PRESENT|PG_RW|PG_ACCESSED|PG_DIRTY)
diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index 5577e808..2cb7ff5f 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -626,14 +626,17 @@ static void pagefault_handler(struct state *s)
 	index1 = viraddr >> 22;
 	index2 = (viraddr >> 12) & 0x3FF;
 
+	kprintf("page fault: pgd 0x%p\n", pgd);
 	if (pgd)
                 pgt = (page_table_t*) (pgd->entries[index1] & 0xFFFFF000);
+	kprintf("page fault: pgt 0x%p\n", pgt);
 	if (!pgt)
 		goto default_handler;
 
 	if (pgt->entries[index2] & PG_SVM)
 		if (!svm_access_request(viraddr))
 			return;
+	kprintf("pgt->entries[%d] = 0x%x\n", index2, pgt->entries[index2]);
 
 default_handler:
 	kprintf("PAGE FAULT: Task %u got page fault at %p (irq %d, cs:eip 0x%x:0x%x)\n", task->id, viraddr, s->int_no, s->cs, s->eip);
diff --git a/arch/x86/mm/svm.c b/arch/x86/mm/svm.c
index 0b5e0ae0..3c3f8458 100644
--- a/arch/x86/mm/svm.c
+++ b/arch/x86/mm/svm.c
@@ -63,7 +63,9 @@ int svm_init(void)
 	if (BUILTIN_EXPECT(!phyaddr, 0))
 		return -ENOMEM;
 
-	page_owner = (uint8_t*) map_region(0, phyaddr, OWNER_SIZE >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE/*MAP_MPE*/|MAP_SVM);
+	kprintf("Shared memory starts at the physical address 0x%x\n", shmbegin);
+ 
+	page_owner = (uint8_t*) map_region(0, phyaddr, OWNER_SIZE >> PAGE_SHIFT, MAP_SVM|MAP_KERNEL_SPACE|MAP_NO_CACHE/*MAP_MPE*/);
 	if (BUILTIN_EXPECT(!page_owner, 0)) {
 		flags = irq_nested_disable();
 		RCCE_shfree((t_vcharp) phyaddr);
@@ -94,6 +96,8 @@ int svm_access_request(size_t addr)
 	int remote_rank;
 	uint8_t payload[iRCCE_MAIL_HEADER_PAYLOAD];
 
+	kprintf("enter svm_access_request\n");
+
 	if (phyaddr < shmbegin)
 		return -EINVAL;
 	if (phyaddr >= shmbegin + RCCE_SHM_SIZE_MAX)
@@ -104,14 +108,13 @@ int svm_access_request(size_t addr)
 	if (page_owner[pageid] == my_ue)
 		return 0;
 
-	kprintf("send access request to %d of 0x%x\n", remote_rank, phyaddr);
-
 	remote_rank = page_owner[pageid];
 	((size_t*) payload)[0] = my_ue;
 	((size_t*) payload)[1] = phyaddr;
 
+	kprintf("send access request to %d of 0x%x\n", remote_rank, phyaddr);
 	/* send ping request */
-	iRCCE_mail_send(sizeof(size_t), ICC_TAG_SVMREQUEST, 0, payload, remote_rank);
+	iRCCE_mail_send(2*sizeof(size_t), ICC_TAG_SVMREQUEST, 0, payload, remote_rank);
 
         NOP8;
         icc_send_irq(remote_rank);
@@ -210,7 +213,7 @@ int svm_emit_page(size_t phyaddr, int ue)
 		((size_t*) payload)[1] = phyaddr;
 
 		/* send ping request */
-		iRCCE_mail_send(sizeof(size_t), ICC_TAG_SVMREQUEST, 0, payload, remote_rank);
+		iRCCE_mail_send(2*sizeof(size_t), ICC_TAG_SVMREQUEST, 0, payload, remote_rank);
 
 		 NOP8;
 		icc_send_irq(remote_rank);
@@ -219,7 +222,9 @@ int svm_emit_page(size_t phyaddr, int ue)
 
 		change_page_permissions(viraddr, viraddr+PAGE_SIZE, VMA_NOACCESS|VMA_READ|VMA_CACHEABLE);
 
+		invalidate_cl1();
 		page_owner[pageid] = ue;
+		mb();
 		invalidate_cl1();
 	}
 
diff --git a/arch/x86/scc/iRCCE_get.c b/arch/x86/scc/iRCCE_get.c
index 1b2efb83..cd7a5b39 100644
--- a/arch/x86/scc/iRCCE_get.c
+++ b/arch/x86/scc/iRCCE_get.c
@@ -42,7 +42,8 @@
 void* iRCCE_memcpy_get(void *dest, const void *src, size_t count)
 {
 #ifdef COPPERRIDGE
-  return memcpy_from_mpb(dest, src, count);
+  return memcpy_get(dest, src, count);
+  //return memcpy_from_mpb(dest, src, count);
 #else
   return memcpy(dest, src, count);
 #endif
diff --git a/arch/x86/scc/iRCCE_put.c b/arch/x86/scc/iRCCE_put.c
index 93cea070..a97e2ee1 100644
--- a/arch/x86/scc/iRCCE_put.c
+++ b/arch/x86/scc/iRCCE_put.c
@@ -42,7 +42,8 @@
 void* iRCCE_memcpy_put(void *dest, const void *src, size_t count)
 {
 #ifdef COPPERRIDGE
-  return memcpy_to_mpb(dest, src, count);
+  return memcpy_put(dest, src, count);
+  //return memcpy_to_mpb(dest, src, count);
 #else
   return memcpy(dest, src, count);
 #endif
diff --git a/arch/x86/scc/icc.c b/arch/x86/scc/icc.c
index 2785f6d6..865e86c6 100644
--- a/arch/x86/scc/icc.c
+++ b/arch/x86/scc/icc.c
@@ -263,12 +263,13 @@ int icc_mail_ping( void )
 void icc_mail_check(void)
 {
 	iRCCE_MAIL_HEADER* header = NULL;
-	int res;
 	uint64_t timer;
 	//char* recv_buffer;
 
 	// empty mailbox and interpret headers
-	while( 	(res = iRCCE_mail_recv( &header )) == iRCCE_SUCCESS ) {
+	while( 	iRCCE_mail_recv( &header ) == iRCCE_SUCCESS ) {
+		iRCCE_mailbox_print_header(header);
+
 		switch(header->tag)
 		{
 		case ICC_TAG_PINGREQUEST:
diff --git a/arch/x86/scc/scc_memcpy.h b/arch/x86/scc/scc_memcpy.h
index 8748a496..0386b26e 100644
--- a/arch/x86/scc/scc_memcpy.h
+++ b/arch/x86/scc/scc_memcpy.h
@@ -17,25 +17,10 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  */ 
 
-/**
- * @author Stefan Lankey, Carsten Clauss
- * @file arch/x86/scc/scc_memcpy.h
- * @brief Special memcpy related implementations for the Intel SCC
- *
- * This file contains special SCC-efficient memcpy implementations 
- * to get memory from the RAM into the on-die memory or from the 
- * on-die memory into the RAM.
- */
-
 #ifndef __SCC_MEMCPY_H_
 #define __SCC_MEMPCY_H_
 
-#include <metalsvm/stddef.h>
-
-#ifdef CONFIG_ROCKCREEK
-
-/** @brief Fast procedure to get a byte range from RAM into on-die memory.
- *
+/*
  * A write access, which cache line is not present, doesn't perform (on the
  * current SCC architecture) a cache line fill. Therefore, the core writes 
  * in this case directly to the memory. 
@@ -43,14 +28,10 @@
  * The following function copies from the on-die  memory (MPB) to the off-die
  * memory and prefetchs its destintation. Therefore, the  function avoids the 
  * bad behavior of a "write miss".
- *
- * @param dest Destination address
- * @param src Source address
- * @param count Range size in bytes
  */ 
 inline static void *memcpy_get(void *dest, const void *src, size_t count)
 {
-	int32_t h, i, j, k, l, m;
+	int h, i, j, k, l, m;
 
 	asm volatile ("cld;\n\t"
 		      "1: cmpl $0, %%eax ; je 2f\n\t"
@@ -90,20 +71,15 @@ inline static void *memcpy_get(void *dest, const void *src, size_t count)
  * In our kernel, we didn't want to use FPU registers.
  * Therefore, we use standard memcpy routine
  */
-#define memcpy_put 	memcpy
+#define memcpy_put	memcpy
 #else
-/** @brief Fast procedure to get a byte range from on-die memory into RAM.
- *
+/*
  * If the destination is located on on-die memory (MPB), classical prefetching 
  * techniques will be used to increase the performance.
- *
- * @param dest Destination address
- * @param src Source address
- * @param count range size in bytes
  */
 inline static void *memcpy_put(void *dest, const void *src, size_t count)
 {
-	int32_t i, j, k, l;
+	int i, j, k, l;
 
 	/* 
 	 * We use the floating point registers to
@@ -175,5 +151,3 @@ inline static void *memcpy_put(void *dest, const void *src, size_t count)
 #endif
 
 #endif
-
-#endif
diff --git a/include/metalsvm/config.h.example b/include/metalsvm/config.h.example
index bd4c0b73..90f8b7c8 100644
--- a/include/metalsvm/config.h.example
+++ b/include/metalsvm/config.h.example
@@ -60,6 +60,7 @@ extern "C" {
 
 // RCCE specific flags
 #define SCC
+#define COPPERRIDGE
 #define MS_BAREMETAL
 //#define GORY
 #define SHMADD
diff --git a/kernel/tests.c b/kernel/tests.c
index bce1c56f..e5e1a8fe 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -161,15 +161,14 @@ static int svm_test(void *arg)
 	invalidate_cl1();
 
 	// Now, we need only read access on A and B
-	change_page_permissions((size_t) A[0], (size_t) (A[0]+2*N*N), VMA_CACHEABLE|VMA_READ);
+	//change_page_permissions((size_t) A[0], (size_t) (A[0]+2*N*N), VMA_CACHEABLE|VMA_READ);
 
 	// iRCCE is not thread save => disable interrupts
 	flags = irq_nested_disable();
 	RCCE_barrier(&RCCE_COMM_WORLD);
 	irq_nested_enable(flags);
 
-	if (!my_ue)
-		kprintf("Start calculation...\n");
+	kputs("Start calculation...\n");
 
 	// start calculation
 	for(i=my_ue*(N/num_ues); i<(my_ue+1)*(N/num_ues); i++)
@@ -182,6 +181,8 @@ static int svm_test(void *arg)
 	RCCE_barrier(&RCCE_COMM_WORLD);
 	irq_nested_enable(flags);
 
+	kputs("Calculation finished...\n");
+
 	svmfree((void*) A[0], 3*N*sizeof(int));
 
 	return 0;
@@ -352,8 +353,8 @@ int test_init(void)
 	//create_kernel_task(NULL, join_test, NULL);
 	//create_kernel_task(NULL, producer, NULL);
 	//create_kernel_task(NULL, consumer, NULL);
-	//create_kernel_task(NULL, mail_ping, NULL);
-	create_kernel_task(NULL, svm_test, NULL);
+	create_kernel_task(NULL, mail_ping, NULL);
+	//create_kernel_task(NULL, svm_test, NULL);
 	//create_user_task(NULL, "/bin/hello", argv);
 	//create_user_task(NULL, "/bin/tests", argv);
 	//create_user_task(NULL, "/bin/jacobi", argv);

From 4c9855c83a773efd5ea88c6474ece5cca52df916 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Wed, 17 Aug 2011 13:51:19 +0200
Subject: [PATCH 12/37] redesign of the scheduler, defining of a runqueue per
 core

=> Currently, we work stealing isn't supported
---
 arch/x86/include/asm/processor.h |  16 ++
 arch/x86/kernel/timer.c          |   7 +-
 include/metalsvm/fs.h            |   6 +-
 include/metalsvm/semaphore.h     |   7 +-
 include/metalsvm/tasks.h         |  21 +-
 include/metalsvm/tasks_types.h   |  45 +++-
 kernel/client.c                  |   2 +-
 kernel/main.c                    |   3 +-
 kernel/server.c                  |   4 +-
 kernel/tasks.c                   | 400 +++++++++++++++++++++++--------
 kernel/tests.c                   |  14 +-
 lwip/src/arch/sys_arch.c         |   2 +-
 lwip/src/include/lwipopts.h      |  14 ++
 13 files changed, 400 insertions(+), 141 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index da3b3556..182db412 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -272,6 +272,22 @@ static inline uint32_t read_eflags(void)
 	return result;
 }
 
+/** @brief search the first bit, which is set
+ *
+ * @param i source operand
+ * @return first bit, which is set in the source operand
+ */
+static inline uint32_t last_set(uint32_t i)
+{
+	uint32_t ret;
+
+	if (!i)
+		return 0;
+	asm volatile ("bsr %0, %1" : "=r"(ret) : "r"(i));
+
+	return ret;
+}
+
 /** @brief Read extended instruction pointer
  * @return The EIP's value
  */
diff --git a/arch/x86/kernel/timer.c b/arch/x86/kernel/timer.c
index 8a806eb6..ffc7bf00 100644
--- a/arch/x86/kernel/timer.c
+++ b/arch/x86/kernel/timer.c
@@ -104,12 +104,7 @@ int timer_wait(unsigned int ticks)
 		check_workqueues();
 
 		if (timer_ticks < eticks) {
-			uint32_t flags = irq_nested_disable();
-			curr_task->timeout = eticks;
-			curr_task->flags |= TASK_TIMER_USED;
-			curr_task->status = TASK_BLOCKED;
-			irq_nested_enable(flags);
-
+			set_timer(eticks);
 			reschedule();
 		}
 	}
diff --git a/include/metalsvm/fs.h b/include/metalsvm/fs.h
index 56eba752..10b8412a 100644
--- a/include/metalsvm/fs.h
+++ b/include/metalsvm/fs.h
@@ -76,11 +76,11 @@ typedef struct block_list {
 } block_list_t;
 
 typedef struct vfs_node {
-  /// The permissions mask.
+	/// The permissions mask.
 	uint32_t mask;		
-  /// The owning user.
+	/// The owning user.
 	uint32_t uid;		
-  /// The owning group.
+	/// The owning group.
 	uint32_t gid;		
 	/// Includes the node type. See #defines above.
 	uint32_t type;		
diff --git a/include/metalsvm/semaphore.h b/include/metalsvm/semaphore.h
index 19ad8e38..c0dc0f81 100644
--- a/include/metalsvm/semaphore.h
+++ b/include/metalsvm/semaphore.h
@@ -124,7 +124,7 @@ next_try1:
 		} else {
 			s->queue[s->pos] = curr_task->id;
 			s->pos = (s->pos + 1) % MAX_TASKS;
-			curr_task->status = TASK_BLOCKED;
+			block_current_task();
 			spinlock_irqsave_unlock(&s->lock);
 			reschedule();
 			NOP2;
@@ -152,11 +152,10 @@ next_try2:
 				}
 				s->queue[s->pos] = curr_task->id;
 				s->pos = (s->pos + 1) % MAX_TASKS;
-				curr_task->timeout = deadline;
-				curr_task->flags |= TASK_TIMER_USED;
-				curr_task->status = TASK_BLOCKED;
+				set_timer(deadline);
 				spinlock_irqsave_unlock(&s->lock);
 				reschedule();
+				NOP2;
 				goto next_try2;
 			}
 		}
diff --git a/include/metalsvm/tasks.h b/include/metalsvm/tasks.h
index 0b0bcdac..3be44077 100644
--- a/include/metalsvm/tasks.h
+++ b/include/metalsvm/tasks.h
@@ -65,7 +65,7 @@ int multitasking_init(void);
  * - 0 on success
  * - -EINVAL (-22) on failure
  */
-int create_kernel_task(tid_t* id, entry_point_t ep, void* arg);
+int create_kernel_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio);
 
 /** @brief Create a user level task. 
  *
@@ -100,6 +100,25 @@ void scheduler(void);
  */
 int wakeup_task(tid_t);
 
+/** @brief Block current task
+ *
+ * The current task's status will be changed to TASK_BLOCKED
+ *
+ * @return
+ * - 0 on success
+ * - -EINVAL (-22) on failure
+ */
+int block_current_task(void);
+
+/** @brief Block current task until timer expires
+ *
+ * @param deadline Clock tick, when the timer expires
+ * @return
+ *  - 0 on success
+ *  - -EINVAL (-22) on failure
+ */
+int set_timer(uint64_t deadline);
+
 /** @brief Abort current task */
 void NORETURN abort(void);
 
diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h
index 3379fccb..214df8e7 100644
--- a/include/metalsvm/tasks_types.h
+++ b/include/metalsvm/tasks_types.h
@@ -40,6 +40,13 @@
 extern "C" {
 #endif
 
+#define MAX_PRIO 	32
+#define REALTIME_PRIO	31
+#define HIGH_PRIO	16
+#define NORMAL_PRIO	8
+#define LOW_PRIO	1
+#define IDLE_PRIO	0
+
 #define TASK_INVALID	0
 #define TASK_READY	1
 #define TASK_RUNNING	2
@@ -50,8 +57,6 @@ extern "C" {
 #define TASK_DEFAULT_FLAGS		0
 #define TASK_FPU_INIT			(1 << 0)
 #define TASK_FPU_USED			(1 << 1)
-#define TASK_TIMER_USED			(1 << 2)
-#define TASK_SWITCH_IN_PROGRESS		(1 << 3)
 
 typedef int (*entry_point_t)(void*);
 typedef int (STDCALL *internal_entry_point_t)(void*);
@@ -64,11 +69,15 @@ typedef struct task {
 	/// Task status (INVALID, READY, RUNNING, ...)
 	uint32_t		status;
 	/// Additional status flags. For instance, to signalize the using of the FPU
-	uint32_t		flags;
-	/// Number of used time slices
-	uint32_t		time_slices;
+	uint8_t			flags;
+	/// Task priority
+	uint8_t			prio;
 	/// timeout for a blocked task
 	uint64_t		timeout;
+	/// next task in the queue
+	struct task*		next;
+	/// previous task in the queue
+	struct task*		prev;
 	/// Usage in number of pages
 	atomic_int32_t		user_usage;
 	/// Avoids concurrent access to the page directory
@@ -82,13 +91,11 @@ typedef struct task {
 	/// starting time/tick of the task
 	uint64_t		start_tick;
 	/// Start address of the heap
-	uint32_t		start_heap;
+	size_t			start_heap;
 	/// End address of the heap
-	uint32_t		end_heap;
-#ifdef CONFIG_LWIP
+	size_t			end_heap;
 	/// LwIP error code
 	int			lwip_err;
-#endif
 	/// Mail inbox
 	mailbox_wait_msg_t	inbox;	
 	/// Mail outbox array
@@ -97,6 +104,26 @@ typedef struct task {
 	union fpu_state		fpu;
 } task_t;
 
+typedef struct {
+	task_t*	first;
+	task_t* last;
+} task_list_t;
+
+typedef struct {
+	/// idle task
+	task_t* 	idle __attribute__ ((aligned (CACHE_LINE)));
+	/// previous task
+	task_t*		old_task;
+	/// indicates the used priority queues
+	uint32_t	prio_bitmap;
+	/// a queue for each priority
+	task_list_t 	queue[MAX_PRIO];
+	/// a queue for timers
+	task_list_t	timers;
+	/// lock for this runqueue
+	spinlock_t lock;
+} runqueue_t;
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/kernel/client.c b/kernel/client.c
index 386e69a7..0c3982fb 100644
--- a/kernel/client.c
+++ b/kernel/client.c
@@ -42,7 +42,7 @@ int cli_ConnectTo(Client* cli,char * pAdresse,unsigned short Port,int webAdresse
         if (connect(cli->sSocket,(const struct sockaddr*)&cli->adAddr, sizeof(cli->adAddr))==0)
         {
 
-                create_kernel_task(&cli->bThread,cli_WaitForPacket,cli);
+                create_kernel_task(&cli->bThread,cli_WaitForPacket,cli, NORMAL_PRIO);
 	
                 if (cli->_OnConnect != 0)
                 {
diff --git a/kernel/main.c b/kernel/main.c
index f89d2bc7..b35c947b 100644
--- a/kernel/main.c
+++ b/kernel/main.c
@@ -89,8 +89,7 @@ int main(void)
 	kprintf("Current available memory: %u MBytes\n", atomic_int32_read(&total_available_pages)/((1024*1024)/PAGE_SIZE));
 
 	sleep(5);
-	create_kernel_task(NULL, initd, NULL);
-	per_core(current_task)->time_slices = 0; // reset the number of time slices
+	create_kernel_task(NULL, initd, NULL, NORMAL_PRIO);
 	reschedule();
 
 	while(1) { 
diff --git a/kernel/server.c b/kernel/server.c
index bd5c441a..1d988eaa 100644
--- a/kernel/server.c
+++ b/kernel/server.c
@@ -78,7 +78,7 @@ void* srv_WaitForConnection(Server* srv)
                                                 t = (ServerThreadArgs*) kmalloc(sizeof(ServerThreadArgs));
 						t->ID = i;
 						t->srv = srv;
-                                                create_kernel_task(&srv->bThreads[i],srv_WaitForPacket,t);
+                                                create_kernel_task(&srv->bThreads[i],srv_WaitForPacket,t, NORMAL_PRIO);
 
 						break;
 					}
@@ -175,7 +175,7 @@ int server_init(Server* srv, unsigned short Port, unsigned int dwMaxConnections)
 	bind( srv->sSocket,(const struct sockaddr *) &srv->adAddr, sizeof(srv->adAddr));	// Der Server an die Adresse binden;
 	listen(srv->sSocket,srv->dwMaximumConnections);							// Den Server in listenig State versetzen
 
-        create_kernel_task(&srv->bThread_listen,srv_WaitForConnection,srv);
+        create_kernel_task(&srv->bThread_listen,srv_WaitForConnection,srv, NORMAL_PRIO);
 //	sConnections[0] = accept(sSocket,(struct sockaddr*)&tmpAddr,&tmpAddrLen);
 //	t.ID = 0;
 //	bthread_create(&bThreads[0],NULL,(start_routine) srv_WaitForPacket,&t);
diff --git a/kernel/tasks.c b/kernel/tasks.c
index 2dec1348..cf16ef40 100644
--- a/kernel/tasks.c
+++ b/kernel/tasks.c
@@ -47,14 +47,14 @@
  * A task's id will be its position in this array.
  */
 static task_t task_table[MAX_TASKS] = { \
-		[0]                 = {0, TASK_IDLE, 0, 0, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}, \
-		[1 ... MAX_TASKS-1] = {0, TASK_INVALID, 0, 0, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}};
+		[0]                 = {0, TASK_IDLE,    0, 0, 0, NULL, NULL, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}, \
+		[1 ... MAX_TASKS-1] = {0, TASK_INVALID, 0, 0, 0, NULL, NULL, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}};
 static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT;
+static runqueue_t runqueues[MAX_CORES] = { \
+		[0]                 = {task_table+0, NULL, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}, \
+		[1 ... MAX_CORES-1] = {NULL,         NULL, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}};
 
 DEFINE_PER_CORE(task_t*, current_task, task_table+0);
-#if MAX_CORES > 1
-DEFINE_PER_CORE_STATIC(task_t*, old_task, NULL);
-#endif
 
 /** @brief helper function for the assembly code to determine the current task
  * @return Pointer to the task_t structure of current task
@@ -65,6 +65,7 @@ task_t* get_current_task(void) {
 
 int dump_scheduling_statistics(void)
 {
+#if 0
 	uint32_t i;
 	uint32_t id = 0;
 
@@ -77,7 +78,7 @@ int dump_scheduling_statistics(void)
 			id++;
 		}
 	}
-
+#endif
 	return 0;
 }
 
@@ -91,6 +92,7 @@ int multitasking_init(void) {
 	memset(task_table[0].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
 	task_table[0].pgd = get_boot_pgd();
 	task_table[0].flags = TASK_DEFAULT_FLAGS;
+	task_table[0].prio = IDLE_PRIO;
 
 	return 0;
 }
@@ -103,13 +105,14 @@ size_t get_idle_task(uint32_t id)
 
 	task_table[id].id = id;
 	task_table[id].status = TASK_IDLE;
+	task_table[id].prio = IDLE_PRIO;
 	task_table[id].flags = TASK_DEFAULT_FLAGS;
-	task_table[id].time_slices = 0;
 	atomic_int32_set(&task_table[id].user_usage, 0);
 	mailbox_wait_msg_init(&task_table[id].inbox);
 	memset(task_table[id].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
 	task_table[id].pgd = get_boot_pgd();
 	current_task[id].var = task_table+id;
+	runqueues[id].idle = task_table+id;
 
 	return get_stack(id);
 #else
@@ -117,6 +120,32 @@ size_t get_idle_task(uint32_t id)
 #endif
 }
 
+static void finish_task_switch(void)
+{
+	uint8_t prio;
+	uint32_t core_id = CORE_ID;
+	task_t* old;
+
+	spinlock_lock(&runqueues[core_id].lock);
+	if ((old = runqueues[core_id].old_task) != NULL) {
+		prio = old->prio;
+		if (!runqueues[core_id].queue[prio].first) {
+			old->prev = NULL;
+			runqueues[core_id].queue[prio].first = runqueues[core_id].queue[prio].last = old;
+		} else {
+			old->prev = runqueues[core_id].queue[prio].last;
+			runqueues[core_id].queue[prio].last->next = old;
+			runqueues[core_id].queue[prio].last = old;
+		}
+		runqueues[core_id].old_task = NULL;
+		runqueues[core_id].prio_bitmap |= (1 << prio);
+		old->next = NULL;
+	}
+	spinlock_unlock(&runqueues[core_id].lock);
+
+	irq_enable();
+}
+
 /** @brief Wakeup tasks which are waiting for a message from the current one
  *
  * @param result Current task's resulting return value 
@@ -203,14 +232,18 @@ void NORETURN abort(void) {
  * - 0 on success
  * - -ENOMEM (-12) or -EINVAL (-22) on failure
  */
-static int create_task(tid_t* id, internal_entry_point_t ep, void* arg)
+static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t prio)
 {
 	task_t* curr_task;
 	int ret = -ENOMEM;
-	unsigned int i;
+	unsigned int i, core_id = CORE_ID;
 
 	if (BUILTIN_EXPECT(!ep, 0))
 		return -EINVAL;
+	if (BUILTIN_EXPECT(prio == IDLE_PRIO, 0))
+		return -EINVAL;
+	if (BUILTIN_EXPECT(prio >= MAX_PRIO, 0))
+		return -EINVAL;
 
 	spinlock_irqsave_lock(&table_lock);
 
@@ -229,7 +262,7 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg)
 			task_table[i].id = i;
 			task_table[i].status = TASK_READY;
 			task_table[i].flags = TASK_DEFAULT_FLAGS;
-			task_table[i].time_slices = 0;
+			task_table[i].prio = prio;
 			spinlock_init(&task_table[i].vma_lock);
 			task_table[i].vma_list = NULL;
 			mailbox_wait_msg_init(&task_table[i].inbox);
@@ -245,6 +278,22 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg)
 			task_table[i].end_heap = 0;
 			task_table[i].lwip_err = 0;
 			task_table[i].start_tick = get_clock_tick();
+
+			// add task in the runqueue
+			spinlock_lock(&runqueues[core_id].lock);
+			runqueues[core_id].prio_bitmap |= (1 << prio);
+			if (!runqueues[core_id].queue[prio].first) {
+				task_table[i].prev = NULL;
+				runqueues[core_id].queue[prio].first = task_table+i;
+				runqueues[core_id].queue[prio].last = task_table+i;
+				task_table[i].next = NULL;
+			} else {
+				task_table[i].prev = runqueues[core_id].queue[prio].last;
+				runqueues[core_id].queue[prio].last->next = task_table+i;
+				runqueues[core_id].queue[prio].last = task_table+i;
+				task_table[i].next = NULL;
+			}
+			spinlock_unlock(&runqueues[core_id].lock);
 			break;
 		}
 	}
@@ -258,7 +307,7 @@ create_task_out:
 int sys_fork(void)
 {
 	int ret = -ENOMEM;
-	unsigned int i;
+	unsigned int i, core_id = CORE_ID;
 	task_t* parent_task = per_core(current_task);
 	vma_t** child;
 	vma_t* parent;
@@ -304,12 +353,29 @@ int sys_fork(void)
 			mailbox_wait_msg_init(&task_table[i].inbox);
 			memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
 			task_table[i].outbox[parent_task->id] = &parent_task->inbox; 
-			task_table[i].flags = parent_task->flags & ~TASK_SWITCH_IN_PROGRESS;
+			task_table[i].flags = parent_task->flags;
 			memcpy(&(task_table[i].fpu), &(parent_task->fpu), sizeof(union fpu_state));
 			task_table[i].start_tick = get_clock_tick();
 			task_table[i].start_heap = 0;
 			task_table[i].end_heap = 0;
 			task_table[i].lwip_err = 0;
+			task_table[i].prio = parent_task->prio;
+
+			// add task in the runqueue
+			spinlock_lock(&runqueues[core_id].lock);
+			runqueues[core_id].prio_bitmap |= (1 << parent_task->prio);
+			if (!runqueues[core_id].queue[parent_task->prio].first) {
+				task_table[i].prev = NULL;
+				runqueues[core_id].queue[parent_task->prio].first = task_table+i;
+				runqueues[core_id].queue[parent_task->prio].last = task_table+i;
+				task_table[i].next = NULL;
+			} else {
+				task_table[i].prev = runqueues[core_id].queue[parent_task->prio].last;
+				runqueues[core_id].queue[parent_task->prio].last->next = task_table+i;
+				runqueues[core_id].queue[parent_task->prio].last = task_table+i;
+				task_table[i].next = NULL;
+			}
+			spinlock_unlock(&runqueues[core_id].lock);
 
 			ret = arch_fork(task_table+i);
 
@@ -318,13 +384,7 @@ int sys_fork(void)
 				// Leave the function without releasing the locks
 				// because the locks are already released 
 				// by the parent task!
-#if MAX_CORES > 1
-				task_t* old = per_core(old_task);
-
-				if (old)
-					old->flags &= ~TASK_SWITCH_IN_PROGRESS;
-#endif
-				irq_enable();
+				finish_task_switch();
 				return 0; 
 			}
 
@@ -358,13 +418,8 @@ static int STDCALL kernel_entry(void* args)
 {
 	int ret;
 	kernel_args_t* kernel_args = (kernel_args_t*) args;
-#if MAX_CORES > 1
-	task_t* old = per_core(old_task);
 
-	if (old)
-		old->flags &= ~TASK_SWITCH_IN_PROGRESS;
-#endif
-	irq_enable();
+	finish_task_switch();
 
 	if (BUILTIN_EXPECT(!kernel_args, 0))
 		return -EINVAL;
@@ -376,7 +431,7 @@ static int STDCALL kernel_entry(void* args)
 	return ret;
 }
 
-int create_kernel_task(tid_t* id, entry_point_t ep, void* args)
+int create_kernel_task(tid_t* id, entry_point_t ep, void* args, uint8_t prio)
 {
 	kernel_args_t* kernel_args;
 
@@ -387,7 +442,10 @@ int create_kernel_task(tid_t* id, entry_point_t ep, void* args)
 	kernel_args->func = ep;
 	kernel_args->args = args;
 
-	return create_task(id, kernel_entry, kernel_args);
+	if (prio >= MAX_PRIO)
+		prio = NORMAL_PRIO;
+
+	return create_task(id, kernel_entry, kernel_args, prio);
 }
 
 #define MAX_ARGS	(PAGE_SIZE - 2*sizeof(int) - sizeof(vfs_node_t*))
@@ -616,13 +674,8 @@ invalid:
 static int STDCALL user_entry(void* arg)
 {
 	int ret;
-#if MAX_CORES > 1
-	task_t* old = per_core(old_task);
 
-	if (old)
-		old->flags &= ~TASK_SWITCH_IN_PROGRESS;
-#endif
-	irq_enable();
+	finish_task_switch();
 
 	if (BUILTIN_EXPECT(!arg, 0))
 		return -EINVAL;
@@ -680,7 +733,7 @@ int create_user_task(tid_t* id, const char* fname, char** argv)
 		while ((*dest++ = *src++) != 0);
 	}
 
-	return create_task(id, user_entry, load_args);
+	return create_task(id, user_entry, load_args, NORMAL_PRIO);
 }
 
 /** @brief Used by the execve-Systemcall */
@@ -791,54 +844,182 @@ tid_t wait(int32_t* result)
  */
 int wakeup_task(tid_t id)
 {
+	task_t* task;
+	uint32_t core_id, prio;
+	uint32_t flags;
 	int ret = -EINVAL;
 
-	spinlock_irqsave_lock(&table_lock);
+	flags = irq_nested_disable();
+
+	core_id = CORE_ID;
+	task = task_table + id;
+	prio = task->prio;
 
 	if (task_table[id].status == TASK_BLOCKED) {
 		task_table[id].status = TASK_READY;
 		ret = 0;
+
+		spinlock_lock(&runqueues[core_id].lock);
+		// add task to the runqueue
+		if (!runqueues[core_id].queue[prio].last) {
+			runqueues[core_id].queue[prio].last = runqueues[core_id].queue[prio].first = task;
+			task->next = task->prev = NULL;
+			runqueues[core_id].prio_bitmap |= (1 << prio);
+		} else {
+			task->prev = runqueues[core_id].queue[prio].last;
+			task->next = NULL;
+			runqueues[core_id].queue[prio].last->next = task;
+			runqueues[core_id].queue[prio].last = task;
+		}
+		spinlock_unlock(&runqueues[core_id].lock);
 	}
 
-	spinlock_irqsave_unlock(&table_lock);
+	irq_nested_enable(flags);
 
 	return ret;
 }
 
-/*
- * we use this struct to guarantee that the id
- * has its own cache line
- */
-typedef struct {
-	uint32_t	id __attribute__ ((aligned (CACHE_LINE)));
-	uint8_t		gap[CACHE_LINE-sizeof(uint32_t)];
-} last_id_t;
-
-/** @brief _The_ scheduler procedure
+/** @brief Block current task
  *
- * Manages scheduling - right now this is just a round robin scheduler.
+ * The current task's status will be changed to TASK_BLOCKED
+ *
+ * @return
+ * - 0 on success
+ * - -EINVAL (-22) on failure
  */
-void scheduler(void) 
+int block_current_task(void)
+{
+	task_t* curr_task;
+	tid_t id;
+	uint32_t core_id, prio;
+	uint32_t flags;
+	int ret = -EINVAL;
+
+	flags = irq_nested_disable();
+
+	curr_task = per_core(current_task);
+	id = curr_task->id;
+	prio = curr_task->prio;
+	core_id = CORE_ID;
+
+	if (task_table[id].status == TASK_RUNNING) {
+		task_table[id].status = TASK_BLOCKED;
+		ret = 0;
+
+		spinlock_lock(&runqueues[core_id].lock);
+
+		// remove task from queue
+		if (task_table[id].prev)
+			task_table[id].prev->next = task_table[id].next;
+		if (task_table[id].next)
+			task_table[id].next->prev = task_table[id].prev;
+		if (runqueues[core_id].queue[prio].first == task_table+id)
+			runqueues[core_id].queue[prio].first = task_table[id].next;
+		if (runqueues[core_id].queue[prio].last == task_table+id) {
+			runqueues[core_id].queue[prio].last = task_table[id].prev;
+			if (!runqueues[core_id].queue[prio].last)
+				runqueues[core_id].queue[prio].last = runqueues[core_id].queue[prio].first;
+		}
+
+		// No valid task in queue => update prio_bitmap
+		if (!runqueues[core_id].queue[prio].first)
+			runqueues[core_id].prio_bitmap &= ~(1 << prio);
+
+		spinlock_unlock(&runqueues[core_id].lock);
+	}
+
+	irq_nested_enable(flags);
+
+	return ret;
+}
+
+int set_timer(uint64_t deadline)
+{
+	task_t* curr_task;
+	task_t* tmp;
+	uint32_t core_id, prio;
+	uint32_t flags;
+	int ret = -EINVAL;
+
+	flags = irq_nested_disable();
+
+	curr_task = per_core(current_task);
+	prio = curr_task->prio;
+	core_id = CORE_ID;
+
+	if (curr_task->status == TASK_RUNNING) {
+		curr_task->status = TASK_BLOCKED;
+		curr_task->timeout = deadline;
+		ret = 0;
+
+		spinlock_lock(&runqueues[core_id].lock);
+
+		// remove task from queue
+		if (curr_task->prev)
+			curr_task->prev->next = curr_task->next;
+		if (curr_task->next)
+			curr_task->next->prev = curr_task->prev;
+		if (runqueues[core_id].queue[prio].first == curr_task)
+			runqueues[core_id].queue[prio].first = curr_task->next;
+		if (runqueues[core_id].queue[prio].last == curr_task) {
+			runqueues[core_id].queue[prio].last = curr_task->prev;
+			if (!runqueues[core_id].queue[prio].last)
+				runqueues[core_id].queue[prio].last = runqueues[core_id].queue[prio].first;
+		}
+
+		// No valid task in queue => update prio_bitmap
+		if (!runqueues[core_id].queue[prio].first)
+			runqueues[core_id].prio_bitmap &= ~(1 << prio);
+
+		// add task to the timer queue
+		tmp = runqueues[core_id].timers.first;
+		if (!tmp) {
+			runqueues[core_id].timers.first = runqueues[core_id].timers.last = curr_task;
+			curr_task->prev = curr_task->next = NULL;
+		} else {
+			while(tmp && (deadline >= tmp->timeout))
+				tmp = tmp->next;
+
+			if (!tmp) {
+				curr_task->next = NULL;
+				curr_task->prev = runqueues[core_id].timers.last;
+				if (runqueues[core_id].timers.last)
+					runqueues[core_id].timers.last->next = curr_task;
+				runqueues[core_id].timers.last = curr_task;
+				if (!runqueues[core_id].timers.first)
+					runqueues[core_id].timers.first = curr_task;
+			} else {
+				curr_task->prev = tmp->prev;
+				curr_task->next = tmp;
+				tmp->prev = curr_task;
+				if (curr_task->prev)
+					curr_task->prev->next = curr_task;
+				if (runqueues[core_id].timers.first == tmp)
+					runqueues[core_id].timers.first = curr_task;
+			}
+		}
+
+		spinlock_unlock(&runqueues[core_id].lock);
+	} else kprintf("Task is already blocked. No timer will be set!\n");
+
+	irq_nested_enable(flags);
+
+	return ret;
+}
+
+void scheduler(void)
 {
 	task_t* orig_task;
 	task_t* curr_task;
-	uint32_t i;
-	uint32_t new_id;
+	uint32_t core_id = CORE_ID;
+	uint32_t prio;
 	uint64_t current_tick;
-	static last_id_t last_id = { 0 };
 
-#if MAX_CORES > 1
-	spinlock_irqsave_lock(&table_lock);
-#endif
-	current_tick = get_clock_tick();
 	orig_task = curr_task = per_core(current_task);
 
-	/* increase the number of used time slices */
-	curr_task->time_slices++;
-
 	/* signalizes that this task could be reused */
 	if (curr_task->status == TASK_FINISHED)
-		curr_task->status = TASK_INVALID; 
+		curr_task->status = TASK_INVALID;
 
 	/* if the task is using the FPU, we need to save the FPU context */
 	if (curr_task->flags & TASK_FPU_USED) {
@@ -846,64 +1027,73 @@ void scheduler(void)
 		curr_task->flags &= ~TASK_FPU_USED;
 	}
 
-	for(i=0, new_id=(last_id.id + 1) % MAX_TASKS;
-		i<MAX_TASKS; i++, new_id=(new_id+1) % MAX_TASKS) 
+	spinlock_lock(&runqueues[core_id].lock);
+
+	// check timers
+	current_tick = get_clock_tick();
+	while (runqueues[core_id].timers.first && runqueues[core_id].timers.first->timeout <= current_tick)
 	{
-		if (task_table[new_id].flags & TASK_TIMER_USED) {
-			if (task_table[new_id].status != TASK_BLOCKED)
-				task_table[new_id].flags &= ~TASK_TIMER_USED;
-			if ((task_table[new_id].status == TASK_BLOCKED) && (current_tick >= task_table[new_id].timeout)) {
-				task_table[new_id].flags &= ~TASK_TIMER_USED;
-				task_table[new_id].status = TASK_READY;
-			}
-		}
+		task_t* task = runqueues[core_id].timers.first;
 
-		if ((task_table[new_id].status == TASK_READY) && !(task_table[new_id].flags & TASK_SWITCH_IN_PROGRESS)) {
-			if (curr_task->status == TASK_RUNNING) {
-				curr_task->status = TASK_READY;
-#if MAX_CORES > 1
-				curr_task->flags |= TASK_SWITCH_IN_PROGRESS;
-				per_core(old_task) = curr_task;
-#endif
-			}
-#if MAX_CORES > 1
-			else per_core(old_task) = NULL;
-#endif
-			task_table[new_id].status = TASK_RUNNING;
-			curr_task = per_core(current_task) = task_table+new_id;
-			last_id.id = new_id;
+		// remove timer from queue
+		runqueues[core_id].timers.first = runqueues[core_id].timers.first->next;
+		if (!runqueues[core_id].timers.first)
+			runqueues[core_id].timers.last = NULL;
 
-			goto get_task_out;
+		// wakeup task
+		if (task->status == TASK_BLOCKED) {
+			task->status = TASK_READY;
+			prio = task->prio;
+
+			// add task to the runqueue
+			if (!runqueues[core_id].queue[prio].first) {
+				runqueues[core_id].queue[prio].last = runqueues[core_id].queue[prio].first = task;
+				task->next = task->prev = NULL;
+				runqueues[core_id].prio_bitmap |= (1 << prio);
+			} else {
+				task->prev = runqueues[core_id].queue[prio].last;
+				task->next = NULL;
+				runqueues[core_id].queue[prio].last->next = task;
+				runqueues[core_id].queue[prio].last = task;
+			}
 		}
 	}
 
-#if MAX_CORES > 1
-	per_core(old_task) = NULL;
-#endif
+	runqueues[core_id].old_task = NULL; // reset old task
+	prio = last_set(runqueues[core_id].prio_bitmap); // determines highest priority
 
-	if ((curr_task->status == TASK_RUNNING) || (curr_task->status == TASK_IDLE))
-		goto get_task_out;
+	if (!prio) {
+		if ((curr_task->status == TASK_RUNNING) || (curr_task->status == TASK_IDLE))
+			goto get_task_out;
+		curr_task = per_core(current_task) = runqueues[core_id].idle;
+	} else {
+		// Does the current task have an higher priority? => no task switch
+		if ((curr_task->prio > prio) && (curr_task->status == TASK_RUNNING))
+			goto get_task_out;
 
-	/* 
-	 * we switch to the idle task, if the current task terminates 
-	 * and no other is ready
-	 */
-	new_id = CORE_ID;
-	curr_task = per_core(current_task) = task_table+CORE_ID;
+		if (curr_task->status == TASK_RUNNING) {
+			curr_task->status = TASK_READY;
+			runqueues[core_id].old_task = curr_task;
+		}
+
+		curr_task = per_core(current_task) = runqueues[core_id].queue[prio].first;
+		curr_task->status = TASK_RUNNING;
+
+		// remove new task from queue
+		runqueues[core_id].queue[prio].first = curr_task->next;
+		if (!curr_task->next) {
+			runqueues[core_id].queue[prio].last = NULL;
+			runqueues[core_id].prio_bitmap &= ~(1 << prio);
+		}
+	}
 
 get_task_out:
-#if MAX_CORES > 1
-	spinlock_irqsave_unlock(&table_lock);
-#endif
+	spinlock_unlock(&runqueues[core_id].lock);
 
 	if (curr_task != orig_task) {
-		//kprintf("schedule from %d to %d on core %d\n", orig_task->id, curr_task->id, smp_id());
-		switch_task(new_id);
-#if MAX_CORES > 1
-		orig_task= per_core(old_task);
-		if (orig_task)
-			orig_task->flags &= ~TASK_SWITCH_IN_PROGRESS;
-#endif
+		kprintf("schedule from %u to %u with prio %u on core %u\n",
+			orig_task->id, curr_task->id, (uint32_t)curr_task->prio, CORE_ID);
+		switch_task(curr_task->id);
 	}
 }
 
diff --git a/kernel/tests.c b/kernel/tests.c
index 5b67a06d..d04cf938 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -118,7 +118,7 @@ static int join_test(void* arg)
 	tid_t 	id, ret;
 	int 	result = -1234;
 
-	create_kernel_task(&id, foo, "Hello from foo2");
+	create_kernel_task(&id, foo, "Hello from foo2", HIGH_PRIO);
 
 	kprintf("Wait for child %u\n", id);
 	do {
@@ -273,16 +273,16 @@ int test_init(void)
 //		create_kernel_task(NULL,client_task,NULL);
 #endif
 
-	create_kernel_task(NULL, foo, "Hello from foo1");
-	create_kernel_task(NULL, join_test, NULL);
-	//create_kernel_task(NULL, producer, NULL);
-	//create_kernel_task(NULL, consumer, NULL);
-	//create_kernel_task(NULL, mail_ping, NULL);
+	create_kernel_task(NULL, foo, "Hello from foo1", NORMAL_PRIO);
+	create_kernel_task(NULL, join_test, NULL, NORMAL_PRIO);
+	//create_kernel_task(NULL, producer, , NORMAL_PRIO);
+	//create_kernel_task(NULL, consumer, NULL, NORMAL_PRIO);
+	//create_kernel_task(NULL, mail_ping, NULL, NORMAL_PRIO);
 	//create_user_task(NULL, "/bin/hello", argv);
 	create_user_task(NULL, "/bin/tests", argv);
 	//create_user_task(NULL, "/bin/jacobi", argv);
 	//create_user_task(NULL, "/bin/jacobi", argv);
-	create_user_task(NULL, "/bin/server", server_argv);
+	//create_user_task(NULL, "/bin/server", server_argv);
 	//sleep(5);
 	//create_user_task(NULL, "/bin/client", client_argv);
 
diff --git a/lwip/src/arch/sys_arch.c b/lwip/src/arch/sys_arch.c
index b46f0fd7..781e3bdd 100644
--- a/lwip/src/arch/sys_arch.c
+++ b/lwip/src/arch/sys_arch.c
@@ -85,7 +85,7 @@ sys_thread_t sys_thread_new(const char *name, lwip_thread_fn thread, void *arg,
 {
 	sys_thread_t tmp;
 
-	create_kernel_task(&tmp, thread, arg);
+	create_kernel_task(&tmp, thread, arg, prio);
 	kprintf("Created LWIP task %s with id %u\n", name, tmp);
 
 	return tmp;
diff --git a/lwip/src/include/lwipopts.h b/lwip/src/include/lwipopts.h
index fb9ebf6e..4ecdc944 100644
--- a/lwip/src/include/lwipopts.h
+++ b/lwip/src/include/lwipopts.h
@@ -104,6 +104,20 @@
  */
 #define IP_FORWARD		1
 
+/**
+ * TCPIP_THREAD_PRIO: The priority assigned to the main tcpip thread.
+ * The priority value itself is platform-dependent, but is passed to
+ * sys_thread_new() when the thread is created.
+ */
+#define TCPIP_THREAD_PRIO	HIGH_PRIO
+
+/**
+ * DEFAULT_THREAD_PRIO: The priority assigned to any other lwIP thread.
+ * The priority value itself is platform-dependent, but is passed to
+ * sys_thread_new() when the thread is created.
+ */
+#define DEFAULT_THREAD_PRIO	NORMAL_PRIO
+
 /* DEBUG options */
 #define LWIP_DEBUG		1
 #define DHCP_DEBUG		LWIP_DBG_OFF

From 296e8e98f4437cfd4c0665fc39c13344b92d48c0 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Wed, 17 Aug 2011 14:49:12 +0200
Subject: [PATCH 13/37] the idle priority doesn't longer possess an own
 runqueue

only the idle task uses this priority class
---
 include/metalsvm/tasks_types.h |  2 +-
 kernel/tasks.c                 | 99 ++++++++++++++++++----------------
 2 files changed, 54 insertions(+), 47 deletions(-)

diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h
index 214df8e7..784167a7 100644
--- a/include/metalsvm/tasks_types.h
+++ b/include/metalsvm/tasks_types.h
@@ -40,7 +40,7 @@
 extern "C" {
 #endif
 
-#define MAX_PRIO 	32
+#define MAX_PRIO 	31
 #define REALTIME_PRIO	31
 #define HIGH_PRIO	16
 #define NORMAL_PRIO	8
diff --git a/kernel/tasks.c b/kernel/tasks.c
index cf16ef40..115f5be9 100644
--- a/kernel/tasks.c
+++ b/kernel/tasks.c
@@ -129,13 +129,13 @@ static void finish_task_switch(void)
 	spinlock_lock(&runqueues[core_id].lock);
 	if ((old = runqueues[core_id].old_task) != NULL) {
 		prio = old->prio;
-		if (!runqueues[core_id].queue[prio].first) {
+		if (!runqueues[core_id].queue[prio-1].first) {
 			old->prev = NULL;
-			runqueues[core_id].queue[prio].first = runqueues[core_id].queue[prio].last = old;
+			runqueues[core_id].queue[prio-1].first = runqueues[core_id].queue[prio-1].last = old;
 		} else {
-			old->prev = runqueues[core_id].queue[prio].last;
-			runqueues[core_id].queue[prio].last->next = old;
-			runqueues[core_id].queue[prio].last = old;
+			old->prev = runqueues[core_id].queue[prio-1].last;
+			runqueues[core_id].queue[prio-1].last->next = old;
+			runqueues[core_id].queue[prio-1].last = old;
 		}
 		runqueues[core_id].old_task = NULL;
 		runqueues[core_id].prio_bitmap |= (1 << prio);
@@ -242,7 +242,7 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t
 		return -EINVAL;
 	if (BUILTIN_EXPECT(prio == IDLE_PRIO, 0))
 		return -EINVAL;
-	if (BUILTIN_EXPECT(prio >= MAX_PRIO, 0))
+	if (BUILTIN_EXPECT(prio > MAX_PRIO, 0))
 		return -EINVAL;
 
 	spinlock_irqsave_lock(&table_lock);
@@ -282,15 +282,22 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t
 			// add task in the runqueue
 			spinlock_lock(&runqueues[core_id].lock);
 			runqueues[core_id].prio_bitmap |= (1 << prio);
-			if (!runqueues[core_id].queue[prio].first) {
+			kprintf("prio %d %p\n", prio, runqueues[core_id].queue[prio-1].first);
+			if (!runqueues[core_id].queue[prio-1].first) {
 				task_table[i].prev = NULL;
-				runqueues[core_id].queue[prio].first = task_table+i;
-				runqueues[core_id].queue[prio].last = task_table+i;
+				kputs("A");
+				runqueues[core_id].queue[prio-1].first = task_table+i;
+				kputs("B");
+				runqueues[core_id].queue[prio-1].last = task_table+i;
+				kputs("C");
 				task_table[i].next = NULL;
 			} else {
-				task_table[i].prev = runqueues[core_id].queue[prio].last;
-				runqueues[core_id].queue[prio].last->next = task_table+i;
-				runqueues[core_id].queue[prio].last = task_table+i;
+				kputs("D");
+				task_table[i].prev = runqueues[core_id].queue[prio-1].last;
+				kputs("E");
+				runqueues[core_id].queue[prio-1].last->next = task_table+i;
+				kputs("F");
+				runqueues[core_id].queue[prio-1].last = task_table+i;
 				task_table[i].next = NULL;
 			}
 			spinlock_unlock(&runqueues[core_id].lock);
@@ -364,15 +371,15 @@ int sys_fork(void)
 			// add task in the runqueue
 			spinlock_lock(&runqueues[core_id].lock);
 			runqueues[core_id].prio_bitmap |= (1 << parent_task->prio);
-			if (!runqueues[core_id].queue[parent_task->prio].first) {
+			if (!runqueues[core_id].queue[parent_task->prio-1].first) {
 				task_table[i].prev = NULL;
-				runqueues[core_id].queue[parent_task->prio].first = task_table+i;
-				runqueues[core_id].queue[parent_task->prio].last = task_table+i;
+				runqueues[core_id].queue[parent_task->prio-1].first = task_table+i;
+				runqueues[core_id].queue[parent_task->prio-1].last = task_table+i;
 				task_table[i].next = NULL;
 			} else {
-				task_table[i].prev = runqueues[core_id].queue[parent_task->prio].last;
-				runqueues[core_id].queue[parent_task->prio].last->next = task_table+i;
-				runqueues[core_id].queue[parent_task->prio].last = task_table+i;
+				task_table[i].prev = runqueues[core_id].queue[parent_task->prio-1].last;
+				runqueues[core_id].queue[parent_task->prio-1].last->next = task_table+i;
+				runqueues[core_id].queue[parent_task->prio-1].last = task_table+i;
 				task_table[i].next = NULL;
 			}
 			spinlock_unlock(&runqueues[core_id].lock);
@@ -442,7 +449,7 @@ int create_kernel_task(tid_t* id, entry_point_t ep, void* args, uint8_t prio)
 	kernel_args->func = ep;
 	kernel_args->args = args;
 
-	if (prio >= MAX_PRIO)
+	if (prio > MAX_PRIO)
 		prio = NORMAL_PRIO;
 
 	return create_task(id, kernel_entry, kernel_args, prio);
@@ -861,15 +868,15 @@ int wakeup_task(tid_t id)
 
 		spinlock_lock(&runqueues[core_id].lock);
 		// add task to the runqueue
-		if (!runqueues[core_id].queue[prio].last) {
-			runqueues[core_id].queue[prio].last = runqueues[core_id].queue[prio].first = task;
+		if (!runqueues[core_id].queue[prio-1].last) {
+			runqueues[core_id].queue[prio-1].last = runqueues[core_id].queue[prio-1].first = task;
 			task->next = task->prev = NULL;
 			runqueues[core_id].prio_bitmap |= (1 << prio);
 		} else {
-			task->prev = runqueues[core_id].queue[prio].last;
+			task->prev = runqueues[core_id].queue[prio-1].last;
 			task->next = NULL;
-			runqueues[core_id].queue[prio].last->next = task;
-			runqueues[core_id].queue[prio].last = task;
+			runqueues[core_id].queue[prio-1].last->next = task;
+			runqueues[core_id].queue[prio-1].last = task;
 		}
 		spinlock_unlock(&runqueues[core_id].lock);
 	}
@@ -913,16 +920,16 @@ int block_current_task(void)
 			task_table[id].prev->next = task_table[id].next;
 		if (task_table[id].next)
 			task_table[id].next->prev = task_table[id].prev;
-		if (runqueues[core_id].queue[prio].first == task_table+id)
-			runqueues[core_id].queue[prio].first = task_table[id].next;
-		if (runqueues[core_id].queue[prio].last == task_table+id) {
-			runqueues[core_id].queue[prio].last = task_table[id].prev;
-			if (!runqueues[core_id].queue[prio].last)
-				runqueues[core_id].queue[prio].last = runqueues[core_id].queue[prio].first;
+		if (runqueues[core_id].queue[prio-1].first == task_table+id)
+			runqueues[core_id].queue[prio-1].first = task_table[id].next;
+		if (runqueues[core_id].queue[prio-1].last == task_table+id) {
+			runqueues[core_id].queue[prio-1].last = task_table[id].prev;
+			if (!runqueues[core_id].queue[prio-1].last)
+				runqueues[core_id].queue[prio-1].last = runqueues[core_id].queue[prio-1].first;
 		}
 
 		// No valid task in queue => update prio_bitmap
-		if (!runqueues[core_id].queue[prio].first)
+		if (!runqueues[core_id].queue[prio-1].first)
 			runqueues[core_id].prio_bitmap &= ~(1 << prio);
 
 		spinlock_unlock(&runqueues[core_id].lock);
@@ -959,16 +966,16 @@ int set_timer(uint64_t deadline)
 			curr_task->prev->next = curr_task->next;
 		if (curr_task->next)
 			curr_task->next->prev = curr_task->prev;
-		if (runqueues[core_id].queue[prio].first == curr_task)
-			runqueues[core_id].queue[prio].first = curr_task->next;
-		if (runqueues[core_id].queue[prio].last == curr_task) {
-			runqueues[core_id].queue[prio].last = curr_task->prev;
-			if (!runqueues[core_id].queue[prio].last)
-				runqueues[core_id].queue[prio].last = runqueues[core_id].queue[prio].first;
+		if (runqueues[core_id].queue[prio-1].first == curr_task)
+			runqueues[core_id].queue[prio-1].first = curr_task->next;
+		if (runqueues[core_id].queue[prio-1].last == curr_task) {
+			runqueues[core_id].queue[prio-1].last = curr_task->prev;
+			if (!runqueues[core_id].queue[prio-1].last)
+				runqueues[core_id].queue[prio-1].last = runqueues[core_id].queue[prio-1].first;
 		}
 
 		// No valid task in queue => update prio_bitmap
-		if (!runqueues[core_id].queue[prio].first)
+		if (!runqueues[core_id].queue[prio-1].first)
 			runqueues[core_id].prio_bitmap &= ~(1 << prio);
 
 		// add task to the timer queue
@@ -1046,15 +1053,15 @@ void scheduler(void)
 			prio = task->prio;
 
 			// add task to the runqueue
-			if (!runqueues[core_id].queue[prio].first) {
-				runqueues[core_id].queue[prio].last = runqueues[core_id].queue[prio].first = task;
+			if (!runqueues[core_id].queue[prio-1].first) {
+				runqueues[core_id].queue[prio-1].last = runqueues[core_id].queue[prio-1].first = task;
 				task->next = task->prev = NULL;
 				runqueues[core_id].prio_bitmap |= (1 << prio);
 			} else {
-				task->prev = runqueues[core_id].queue[prio].last;
+				task->prev = runqueues[core_id].queue[prio-1].last;
 				task->next = NULL;
-				runqueues[core_id].queue[prio].last->next = task;
-				runqueues[core_id].queue[prio].last = task;
+				runqueues[core_id].queue[prio-1].last->next = task;
+				runqueues[core_id].queue[prio-1].last = task;
 			}
 		}
 	}
@@ -1076,13 +1083,13 @@ void scheduler(void)
 			runqueues[core_id].old_task = curr_task;
 		}
 
-		curr_task = per_core(current_task) = runqueues[core_id].queue[prio].first;
+		curr_task = per_core(current_task) = runqueues[core_id].queue[prio-1].first;
 		curr_task->status = TASK_RUNNING;
 
 		// remove new task from queue
-		runqueues[core_id].queue[prio].first = curr_task->next;
+		runqueues[core_id].queue[prio-1].first = curr_task->next;
 		if (!curr_task->next) {
-			runqueues[core_id].queue[prio].last = NULL;
+			runqueues[core_id].queue[prio-1].last = NULL;
 			runqueues[core_id].prio_bitmap &= ~(1 << prio);
 		}
 	}

From 5661ee64e118095d37913d6e562022434c61d76f Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Wed, 17 Aug 2011 14:55:33 +0200
Subject: [PATCH 14/37] remove compiler warnings

---
 include/metalsvm/tasks_types.h | 2 --
 kernel/tasks.c                 | 4 ----
 2 files changed, 6 deletions(-)

diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h
index 3379fccb..a3d0bd3a 100644
--- a/include/metalsvm/tasks_types.h
+++ b/include/metalsvm/tasks_types.h
@@ -85,10 +85,8 @@ typedef struct task {
 	uint32_t		start_heap;
 	/// End address of the heap
 	uint32_t		end_heap;
-#ifdef CONFIG_LWIP
 	/// LwIP error code
 	int			lwip_err;
-#endif
 	/// Mail inbox
 	mailbox_wait_msg_t	inbox;	
 	/// Mail outbox array
diff --git a/kernel/tasks.c b/kernel/tasks.c
index 413ce994..2dec1348 100644
--- a/kernel/tasks.c
+++ b/kernel/tasks.c
@@ -243,9 +243,7 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg)
 
 			task_table[i].start_heap = 0;
 			task_table[i].end_heap = 0;
-#ifdef CONFIG_LWIP
 			task_table[i].lwip_err = 0;
-#endif
 			task_table[i].start_tick = get_clock_tick();
 			break;
 		}
@@ -311,9 +309,7 @@ int sys_fork(void)
 			task_table[i].start_tick = get_clock_tick();
 			task_table[i].start_heap = 0;
 			task_table[i].end_heap = 0;
-#ifdef CONFIG_LWIP
 			task_table[i].lwip_err = 0;
-#endif
 
 			ret = arch_fork(task_table+i);
 

From 6c1553ce0ec1ecebf063781f79ac9942543e1455 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Wed, 17 Aug 2011 14:58:51 +0200
Subject: [PATCH 15/37] fix conflict

---
 include/metalsvm/tasks_types.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h
index 926b66cd..784167a7 100644
--- a/include/metalsvm/tasks_types.h
+++ b/include/metalsvm/tasks_types.h
@@ -93,11 +93,7 @@ typedef struct task {
 	/// Start address of the heap
 	size_t			start_heap;
 	/// End address of the heap
-<<<<<<< HEAD
 	size_t			end_heap;
-=======
-	uint32_t		end_heap;
->>>>>>> master
 	/// LwIP error code
 	int			lwip_err;
 	/// Mail inbox

From 93257508ee71ee59e42c1e5f6ac0fe9388bf1dc4 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Wed, 17 Aug 2011 15:09:59 +0200
Subject: [PATCH 16/37] wakeup a blocked task on the core, which the task used
 during the last time slice

---
 include/metalsvm/tasks_types.h |  2 ++
 kernel/tasks.c                 | 17 +++++++----------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h
index 784167a7..3981f15c 100644
--- a/include/metalsvm/tasks_types.h
+++ b/include/metalsvm/tasks_types.h
@@ -78,6 +78,8 @@ typedef struct task {
 	struct task*		next;
 	/// previous task in the queue
 	struct task*		prev;
+	/// last core id on which the task was running
+	uint32_t		last_core;
 	/// Usage in number of pages
 	atomic_int32_t		user_usage;
 	/// Avoids concurrent access to the page directory
diff --git a/kernel/tasks.c b/kernel/tasks.c
index 115f5be9..c6874153 100644
--- a/kernel/tasks.c
+++ b/kernel/tasks.c
@@ -47,8 +47,8 @@
  * A task's id will be its position in this array.
  */
 static task_t task_table[MAX_TASKS] = { \
-		[0]                 = {0, TASK_IDLE,    0, 0, 0, NULL, NULL, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}, \
-		[1 ... MAX_TASKS-1] = {0, TASK_INVALID, 0, 0, 0, NULL, NULL, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}};
+		[0]                 = {0, TASK_IDLE,    0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}, \
+		[1 ... MAX_TASKS-1] = {0, TASK_INVALID, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}};
 static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT;
 static runqueue_t runqueues[MAX_CORES] = { \
 		[0]                 = {task_table+0, NULL, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}, \
@@ -107,6 +107,7 @@ size_t get_idle_task(uint32_t id)
 	task_table[id].status = TASK_IDLE;
 	task_table[id].prio = IDLE_PRIO;
 	task_table[id].flags = TASK_DEFAULT_FLAGS;
+	task_table[id].last_core = id;
 	atomic_int32_set(&task_table[id].user_usage, 0);
 	mailbox_wait_msg_init(&task_table[id].inbox);
 	memset(task_table[id].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
@@ -263,6 +264,7 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t
 			task_table[i].status = TASK_READY;
 			task_table[i].flags = TASK_DEFAULT_FLAGS;
 			task_table[i].prio = prio;
+			task_table[i].last_core = 0;
 			spinlock_init(&task_table[i].vma_lock);
 			task_table[i].vma_list = NULL;
 			mailbox_wait_msg_init(&task_table[i].inbox);
@@ -282,21 +284,14 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t
 			// add task in the runqueue
 			spinlock_lock(&runqueues[core_id].lock);
 			runqueues[core_id].prio_bitmap |= (1 << prio);
-			kprintf("prio %d %p\n", prio, runqueues[core_id].queue[prio-1].first);
 			if (!runqueues[core_id].queue[prio-1].first) {
 				task_table[i].prev = NULL;
-				kputs("A");
 				runqueues[core_id].queue[prio-1].first = task_table+i;
-				kputs("B");
 				runqueues[core_id].queue[prio-1].last = task_table+i;
-				kputs("C");
 				task_table[i].next = NULL;
 			} else {
-				kputs("D");
 				task_table[i].prev = runqueues[core_id].queue[prio-1].last;
-				kputs("E");
 				runqueues[core_id].queue[prio-1].last->next = task_table+i;
-				kputs("F");
 				runqueues[core_id].queue[prio-1].last = task_table+i;
 				task_table[i].next = NULL;
 			}
@@ -367,6 +362,7 @@ int sys_fork(void)
 			task_table[i].end_heap = 0;
 			task_table[i].lwip_err = 0;
 			task_table[i].prio = parent_task->prio;
+			task_table[i].last_core = parent_task->last_core;
 
 			// add task in the runqueue
 			spinlock_lock(&runqueues[core_id].lock);
@@ -858,9 +854,9 @@ int wakeup_task(tid_t id)
 
 	flags = irq_nested_disable();
 
-	core_id = CORE_ID;
 	task = task_table + id;
 	prio = task->prio;
+	core_id = task->last_core;
 
 	if (task_table[id].status == TASK_BLOCKED) {
 		task_table[id].status = TASK_READY;
@@ -1023,6 +1019,7 @@ void scheduler(void)
 	uint64_t current_tick;
 
 	orig_task = curr_task = per_core(current_task);
+	curr_task->last_core = core_id;
 
 	/* signalizes that this task could be reused */
 	if (curr_task->status == TASK_FINISHED)

From 0ba7e146abf895978916ab4ce6a0e46fd54790fc Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Thu, 18 Aug 2011 12:15:05 +0200
Subject: [PATCH 17/37] fix bug: use AT&T instead of Intel style

---
 arch/x86/include/asm/processor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 182db412..fb2f93bb 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -283,7 +283,7 @@ static inline uint32_t last_set(uint32_t i)
 
 	if (!i)
 		return 0;
-	asm volatile ("bsr %0, %1" : "=r"(ret) : "r"(i));
+	asm volatile ("bsr %1, %0" : "=r"(ret) : "r"(i) : "flags");
 
 	return ret;
 }

From 35621d72d131f8cefb04c84bc0f2c663835cf562 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Thu, 18 Aug 2011 12:16:31 +0200
Subject: [PATCH 18/37] first try to realize task stealing

---
 arch/x86/kernel/apic.c         |   2 +-
 arch/x86/kernel/timer.c        |  13 ++-
 arch/x86/mm/page.c             |   1 +
 include/metalsvm/tasks.h       |  16 +++
 include/metalsvm/tasks_types.h |   8 ++
 kernel/tasks.c                 | 184 ++++++++++++++++++++++++++++-----
 kernel/tests.c                 |   2 +-
 7 files changed, 197 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 379a181b..da4c65f4 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -60,7 +60,7 @@ static uint32_t ncores = 1;
 static uint8_t irq_redirect[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF};
 #if MAX_CORES > 1
 static uint8_t boot_code[] = { 0xFA, 0x0F, 0x01, 0x16, 0x3B, 0x70, 0x0F, 0x20, 0xC0, 0x0C, 0x01, 0x0F, 0x22, 0xC0, 0x66, 0xEA, 0x16, 0x70, 0x00, 0x00, 0x08, 0x00, 0x31, 0xC0, 0x66, 0xB8, 0x10, 0x00, 0x8E, 0xD8, 0x8E, 0xC0, 0x8E, 0xE0, 0x8E, 0xE8, 0x8E, 0xD0, 0xBC, 0xEF, 0xBE, 0xAD, 0xDE, 0x68, 0xAD, 0xDE, 0xAD, 0xDE, 0x6A, 0x00, 0xEA, 0xDE, 0xC0, 0xAD, 0xDE, 0x08, 0x00, 0xEB, 0xFE, 0x17, 0x00, 0x41, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x9A, 0xCF, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x92, 0xCF, 0x00};
-static atomic_int32_t cpu_online = ATOMIC_INIT(1);
+atomic_int32_t cpu_online = ATOMIC_INIT(1);
 #endif
 static uint8_t initialized = 0;
 spinlock_t bootlock = SPINLOCK_INIT;
diff --git a/arch/x86/kernel/timer.c b/arch/x86/kernel/timer.c
index ffc7bf00..af70abfd 100644
--- a/arch/x86/kernel/timer.c
+++ b/arch/x86/kernel/timer.c
@@ -36,6 +36,10 @@
  */
 static volatile uint64_t timer_ticks = 0;
 
+#if MAX_CORES > 1
+extern atomic_int32_t cpu_online;
+#endif
+
 uint64_t get_clock_tick(void)
 {
 	return timer_ticks;
@@ -61,8 +65,6 @@ int sys_times(struct tms* buffer, clock_t* clock)
  */
 static void timer_handler(struct state *s)
 {
-	uint32_t i;
-
 	/* Increment our 'tick counter' */
 #if MAX_CORES > 1
 	if (smp_id() == 0)
@@ -78,6 +80,13 @@ static void timer_handler(struct state *s)
 			vga_puts("One second has passed\n");
 		}*/
 	}
+
+	update_load();
+
+#if MAX_CORES > 1
+	if ((atomic_int32_read(&cpu_online) > 1) && (timer_ticks % (TIMER_FREQ/5) == 0))
+		load_balancing();
+#endif
 }
 
 int timer_wait(unsigned int ticks)
diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index 6eadd3d1..a99a576a 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -615,6 +615,7 @@ default_handler:
 	kprintf("Register state: eax = 0x%x, ebx = 0x%x, ecx = 0x%x, edx = 0x%x, edi = 0x%x, esi = 0x%x, ebp = 0x%x, esp = 0x%x\n", 
 		s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp);
 
+	while(1);
 	irq_enable();
 	abort();
 }
diff --git a/include/metalsvm/tasks.h b/include/metalsvm/tasks.h
index 3be44077..be3fdd07 100644
--- a/include/metalsvm/tasks.h
+++ b/include/metalsvm/tasks.h
@@ -85,6 +85,22 @@ int create_user_task(tid_t* id, const char* fame, char** argv);
  */
 tid_t wait(int32_t* result);
 
+/** @brief Update the load of the current core
+ *
+ * This function is called from the timer interrupt
+ * and updates the load of the current core
+ */
+void update_load(void);
+
+#if MAX_CORES > 1
+/** @brief Load balancer
+ *
+ * This load balancer is called from the timer interrupt
+ * and steals tasks from other cores
+ */
+void load_balancing(void);
+#endif
+
 /** @brief Task switcher
  *
  * Timer-interrupted use of this function for task switching */
diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h
index 3981f15c..fc1072de 100644
--- a/include/metalsvm/tasks_types.h
+++ b/include/metalsvm/tasks_types.h
@@ -116,6 +116,14 @@ typedef struct {
 	task_t* 	idle __attribute__ ((aligned (CACHE_LINE)));
 	/// previous task
 	task_t*		old_task;
+	/// total number of tasks in the queue
+	uint32_t	nr_tasks;
+	// current load = average number of tasks in the queue (1-minute average)
+	uint32_t	load;
+	// help counter to determine the the cpu load
+	int32_t 	load_counter;
+	// help counter to avoid "over balancing"
+	int32_t		balance_counter;
 	/// indicates the used priority queues
 	uint32_t	prio_bitmap;
 	/// a queue for each priority
diff --git a/kernel/tasks.c b/kernel/tasks.c
index c6874153..9939651a 100644
--- a/kernel/tasks.c
+++ b/kernel/tasks.c
@@ -51,8 +51,8 @@ static task_t task_table[MAX_TASKS] = { \
 		[1 ... MAX_TASKS-1] = {0, TASK_INVALID, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}};
 static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT;
 static runqueue_t runqueues[MAX_CORES] = { \
-		[0]                 = {task_table+0, NULL, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}, \
-		[1 ... MAX_CORES-1] = {NULL,         NULL, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}};
+		[0]                 = {task_table+0, NULL, 0, 0, 0, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}, \
+		[1 ... MAX_CORES-1] = {NULL,         NULL, 0, 0, 0, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}};
 
 DEFINE_PER_CORE(task_t*, current_task, task_table+0);
 
@@ -63,25 +63,6 @@ task_t* get_current_task(void) {
 	return per_core(current_task);
 }
 
-int dump_scheduling_statistics(void)
-{
-#if 0
-	uint32_t i;
-	uint32_t id = 0;
-
-	kprintf("Scheduling statistics:\n");
-	kprintf("======================\n");
-	kprintf("total ticks:\t%llu\n", get_clock_tick());
-	for(i=0; i<MAX_CORES; i++) {
-		if (task_table[i].status == TASK_IDLE) {
-			kprintf("core %d    :\t%u idle slices\n", id, task_table[i].time_slices);
-			id++;
-		}
-	}
-#endif
-	return 0;
-}
-
 int multitasking_init(void) {
 	if (BUILTIN_EXPECT(task_table[0].status != TASK_IDLE, 0)) {
 		kputs("Task 0 is not an idle task\n");
@@ -175,6 +156,7 @@ static void wakeup_blocked_tasks(int result)
 static void NORETURN do_exit(int arg) {
 	vma_t* tmp;
 	task_t* curr_task = per_core(current_task);
+	uint32_t flags, core_id;
 
 	kprintf("Terminate task: %u, return value %d\n", curr_task->id, arg);
 
@@ -198,6 +180,15 @@ static void NORETURN do_exit(int arg) {
 		kprintf("Memory leak! Task %d did not release %d pages\n", 
 				curr_task->id, atomic_int32_read(&curr_task->user_usage));
 	curr_task->status = TASK_FINISHED;
+
+	// decrease the number of active tasks
+	flags = irq_nested_disable();
+	core_id = CORE_ID;
+	spinlock_lock(&runqueues[core_id].lock);
+	runqueues[core_id].nr_tasks--;
+	spinlock_unlock(&runqueues[core_id].lock);
+	irq_nested_enable(flags);
+
 	reschedule();
 	
 	kprintf("Kernel panic: scheduler on core %d found no valid task\n", CORE_ID);
@@ -237,7 +228,7 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t
 {
 	task_t* curr_task;
 	int ret = -ENOMEM;
-	unsigned int i, core_id = CORE_ID;
+	unsigned int i, core_id;
 
 	if (BUILTIN_EXPECT(!ep, 0))
 		return -EINVAL;
@@ -248,6 +239,7 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t
 
 	spinlock_irqsave_lock(&table_lock);
 
+	core_id = CORE_ID;
 	curr_task = per_core(current_task);
 
 	for(i=0; i<MAX_TASKS; i++) {
@@ -284,6 +276,7 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t
 			// add task in the runqueue
 			spinlock_lock(&runqueues[core_id].lock);
 			runqueues[core_id].prio_bitmap |= (1 << prio);
+			runqueues[core_id].nr_tasks++;
 			if (!runqueues[core_id].queue[prio-1].first) {
 				task_table[i].prev = NULL;
 				runqueues[core_id].queue[prio-1].first = task_table+i;
@@ -309,7 +302,7 @@ create_task_out:
 int sys_fork(void)
 {
 	int ret = -ENOMEM;
-	unsigned int i, core_id = CORE_ID;
+	unsigned int i, core_id;
 	task_t* parent_task = per_core(current_task);
 	vma_t** child;
 	vma_t* parent;
@@ -318,6 +311,8 @@ int sys_fork(void)
 	spinlock_lock(&parent_task->vma_lock);
 	spinlock_irqsave_lock(&table_lock);
 
+	core_id = CORE_ID;
+
 	for(i=0; i<MAX_TASKS; i++) {
 		if (task_table[i].status == TASK_INVALID) {
 			atomic_int32_set(&task_table[i].user_usage, 0);
@@ -367,6 +362,7 @@ int sys_fork(void)
 			// add task in the runqueue
 			spinlock_lock(&runqueues[core_id].lock);
 			runqueues[core_id].prio_bitmap |= (1 << parent_task->prio);
+			runqueues[core_id].nr_tasks++;
 			if (!runqueues[core_id].queue[parent_task->prio-1].first) {
 				task_table[i].prev = NULL;
 				runqueues[core_id].queue[parent_task->prio-1].first = task_table+i;
@@ -1010,6 +1006,133 @@ int set_timer(uint64_t deadline)
 	return ret;
 }
 
+#define FSHIFT	21		/* nr of bits of precision (e.g. 11) */
+#define FIXED_1	(1<<FSHIFT)	/* 1.0 as fixed-point */
+#define EXP	1884		/* 1/exp(5sec/1min) as fixed-point */
+
+void update_load(void)
+{
+	uint32_t core_id = CORE_ID;
+
+	runqueues[core_id].load_counter--;
+	if (runqueues[core_id].balance_counter > 0)
+		runqueues[core_id].balance_counter--;
+	if (runqueues[core_id].load_counter < 0) {
+		runqueues[core_id].load_counter += 5*TIMER_FREQ;
+
+		spinlock_lock(&runqueues[core_id].lock);
+		runqueues[core_id].load *= EXP;
+		runqueues[core_id].load += runqueues[core_id].nr_tasks*(FIXED_1-EXP);
+		runqueues[core_id].load >>= FSHIFT;
+		spinlock_unlock(&runqueues[core_id].lock);
+
+		//kprintf("load of core %u: %u, %u\n", core_id, runqueues[core_id].load, runqueues[core_id].nr_tasks);
+	}
+}
+
+#if MAX_CORES > 1
+extern atomic_int32_t cpu_online;
+
+void load_balancing(void)
+{
+	uint32_t i, core_id = CORE_ID;
+	uint32_t prio;
+	task_t* task;
+
+	spinlock_lock(&runqueues[core_id].lock);
+	for(i=0; (i<atomic_int32_read(&cpu_online)) && (runqueues[core_id].balance_counter <= 0); i++)
+	{
+		if (i == core_id)
+			break;
+
+		spinlock_lock(&runqueues[i].lock);
+		if (runqueues[i].load > runqueues[core_id].load) {
+			kprintf("Try to steal a task from core %u (load %u) to %u (load %u)\n", i, runqueues[i].load, core_id, runqueues[core_id].load);
+			kprintf("Task on core %u: %u, core %u, %u\n", i, runqueues[i].nr_tasks, core_id, runqueues[i].nr_tasks);
+
+			prio = last_set(runqueues[i].prio_bitmap);
+			if (prio) {
+				// steal a ready task
+				task = runqueues[i].queue[prio-1].last;
+				kprintf("Try to steal a ready task %d\n", task->id);
+
+				// remove last element from queue i
+				if (task->prev)
+					task->prev->next = NULL;
+				runqueues[i].queue[prio-1].last = task->prev;
+				if (!runqueues[i].queue[prio-1].last)
+					runqueues[i].queue[prio-1].first = NULL;
+
+				// add task at the end of queue core_id
+				if (!runqueues[core_id].queue[prio-1].last) {
+					runqueues[core_id].queue[prio-1].first = runqueues[core_id].queue[prio-1].last = task;
+					task->next = task->prev = NULL;
+				} else {
+					runqueues[core_id].queue[prio-1].last->next = task;
+					task->prev = runqueues[core_id].queue[prio-1].last;
+					runqueues[core_id].queue[prio-1].last = task;
+					task->next = NULL;
+				}
+
+				// update task counters
+				runqueues[core_id].nr_tasks++;
+				runqueues[i].nr_tasks--;
+				runqueues[core_id].balance_counter = 5*TIMER_FREQ;
+			} else {
+				task_t* tmp;
+
+				// steal a blocked task
+				task = runqueues[i].timers.first;
+				if (!task) // Ups, found no valid task to steal
+					goto no_task_found;
+
+				kprintf("Try to steal blocked task %d\n", task->id);
+
+				// remove first timer from queue i
+				if (runqueues[i].timers.first == runqueues[i].timers.last)
+					runqueues[i].timers.first = runqueues[i].timers.last = NULL;
+				else
+					runqueues[i].timers.first = runqueues[i].timers.first->next;
+
+				// add timer to queue core_id
+				tmp = runqueues[core_id].timers.first;
+				while(tmp && (task->timeout >= tmp->timeout))
+					tmp = tmp->next;
+
+				if (!tmp) {
+					task->next = NULL;
+					task->prev = runqueues[core_id].timers.last;
+					if (runqueues[core_id].timers.last)
+						runqueues[core_id].timers.last->next = task;
+					runqueues[core_id].timers.last = task;
+					if (!runqueues[core_id].timers.first)
+						runqueues[core_id].timers.first = task;
+				} else {
+					task->prev = tmp->prev;
+					task->next = tmp;
+					tmp->prev = task;
+					if (task->prev)
+						task->prev->next = task;
+					if (runqueues[core_id].timers.first == tmp)
+						runqueues[core_id].timers.first = task;
+				}
+
+				// => reschedule on the new core
+				task->last_core = CORE_ID;
+
+				// update task counters
+				runqueues[core_id].nr_tasks++;
+				runqueues[i].nr_tasks--;
+				runqueues[core_id].balance_counter = 5*TIMER_FREQ;
+			}
+		}
+no_task_found:
+		spinlock_unlock(&runqueues[i].lock);
+	}
+	spinlock_unlock(&runqueues[core_id].lock);
+}
+#endif
+
 void scheduler(void)
 {
 	task_t* orig_task;
@@ -1065,6 +1188,17 @@ void scheduler(void)
 
 	runqueues[core_id].old_task = NULL; // reset old task
 	prio = last_set(runqueues[core_id].prio_bitmap); // determines highest priority
+#if MAX_CORES > 1
+	/*if (!prio) {
+		load_balancing();
+		prio = last_set(runqueues[core_id].prio_bitmap); // retry...
+	}*/
+#endif
+
+	if (BUILTIN_EXPECT(prio > MAX_PRIO, 0)) {
+		kprintf("Invalid priority %u by bitmap 0x%x\n", prio, runqueues[core_id].prio_bitmap);
+		prio = 0;
+	}
 
 	if (!prio) {
 		if ((curr_task->status == TASK_RUNNING) || (curr_task->status == TASK_IDLE))
@@ -1095,8 +1229,8 @@ get_task_out:
 	spinlock_unlock(&runqueues[core_id].lock);
 
 	if (curr_task != orig_task) {
-		kprintf("schedule from %u to %u with prio %u on core %u\n",
-			orig_task->id, curr_task->id, (uint32_t)curr_task->prio, CORE_ID);
+		//kprintf("schedule from %u to %u with prio %u on core %u\n",
+		//	orig_task->id, curr_task->id, (uint32_t)curr_task->prio, CORE_ID);
 		switch_task(curr_task->id);
 	}
 }
diff --git a/kernel/tests.c b/kernel/tests.c
index 281329d5..8f8860a4 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -91,7 +91,7 @@ static int foo(void* arg)
 	if (!arg)
 		return 0;
 
-	for(i=0; i<5; i++) {
+	for(i=0; i<20; i++) {
 		kprintf("Message from core %d: %s\n", smp_id(), (char*) arg);
 		sleep(1);
 	}

From e9aa86ed519e9c6431b4a9a788576d2e277c914b Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Thu, 18 Aug 2011 05:38:23 -0700
Subject: [PATCH 19/37] fix bug in the  MPB-specific memcpy functions

=> solves a problem in Simon's mailbox system
---
 arch/x86/scc/iRCCE_get.c          |  4 ++--
 arch/x86/scc/iRCCE_put.c          |  4 ++--
 arch/x86/scc/scc_memcpy.h         | 18 +++++++++++++++++-
 include/metalsvm/config.h.example |  1 +
 4 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/arch/x86/scc/iRCCE_get.c b/arch/x86/scc/iRCCE_get.c
index 1b2efb83..7916c268 100644
--- a/arch/x86/scc/iRCCE_get.c
+++ b/arch/x86/scc/iRCCE_get.c
@@ -41,8 +41,8 @@
 
 void* iRCCE_memcpy_get(void *dest, const void *src, size_t count)
 {
-#ifdef COPPERRIDGE
-  return memcpy_from_mpb(dest, src, count);
+#if defined(COPPERRIDGE) || defined(SCC)
+  return memcpy_get(dest, src, count);
 #else
   return memcpy(dest, src, count);
 #endif
diff --git a/arch/x86/scc/iRCCE_put.c b/arch/x86/scc/iRCCE_put.c
index 93cea070..e810057a 100644
--- a/arch/x86/scc/iRCCE_put.c
+++ b/arch/x86/scc/iRCCE_put.c
@@ -41,8 +41,8 @@
 
 void* iRCCE_memcpy_put(void *dest, const void *src, size_t count)
 {
-#ifdef COPPERRIDGE
-  return memcpy_to_mpb(dest, src, count);
+#if defined(COPPERRIDGE) || defined(SCC)
+  return memcpy_put(dest, src, count);
 #else
   return memcpy(dest, src, count);
 #endif
diff --git a/arch/x86/scc/scc_memcpy.h b/arch/x86/scc/scc_memcpy.h
index 8748a496..00662c41 100644
--- a/arch/x86/scc/scc_memcpy.h
+++ b/arch/x86/scc/scc_memcpy.h
@@ -90,7 +90,23 @@ inline static void *memcpy_get(void *dest, const void *src, size_t count)
  * In our kernel, we didn't want to use FPU registers.
  * Therefore, we use standard memcpy routine
  */
-#define memcpy_put 	memcpy
+inline static void *memcpy_put(void* dest, const void *src, size_t count)
+{
+	int32_t i, j, k;
+
+	if (BUILTIN_EXPECT(!dest || !src, 0))
+		return dest;
+
+	asm volatile (
+		"cld; rep movsl\n\t"
+		"movl %4, %%ecx\n\t" 
+		"andl $3, %%ecx\n\t"
+		"rep movsb\n\t" 
+		: "=&c"(i), "=&D"(j), "=&S"(k) 
+		: "0"(count/4), "g"(count), "1"(dest), "2"(src) : "memory");
+
+	return dest;
+}
 #else
 /** @brief Fast procedure to get a byte range from on-die memory into RAM.
  *
diff --git a/include/metalsvm/config.h.example b/include/metalsvm/config.h.example
index bd4c0b73..90f8b7c8 100644
--- a/include/metalsvm/config.h.example
+++ b/include/metalsvm/config.h.example
@@ -60,6 +60,7 @@ extern "C" {
 
 // RCCE specific flags
 #define SCC
+#define COPPERRIDGE
 #define MS_BAREMETAL
 //#define GORY
 #define SHMADD

From 01a4573b3643c711aba62a55c92d7b3a4cd55d18 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Thu, 18 Aug 2011 05:50:54 -0700
Subject: [PATCH 20/37] remove some output messages

---
 arch/x86/scc/icc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/scc/icc.c b/arch/x86/scc/icc.c
index 865e86c6..06d7beb5 100644
--- a/arch/x86/scc/icc.c
+++ b/arch/x86/scc/icc.c
@@ -268,7 +268,7 @@ void icc_mail_check(void)
 
 	// empty mailbox and interpret headers
 	while( 	iRCCE_mail_recv( &header ) == iRCCE_SUCCESS ) {
-		iRCCE_mailbox_print_header(header);
+		//iRCCE_mailbox_print_header(header);
 
 		switch(header->tag)
 		{

From 1f178b99cc3189a580c7f665bb67effabd777e8f Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Thu, 18 Aug 2011 07:08:25 -0700
Subject: [PATCH 21/37] now, the page fault handler determines the PGT's
 address correctly

---
 arch/x86/mm/page.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index 2cb7ff5f..ee9e5f83 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -623,20 +623,18 @@ static void pagefault_handler(struct state *s)
 		put_page(phyaddr);
 	}
 
+
+	// does our SVM system need to handle this page fault?
 	index1 = viraddr >> 22;
 	index2 = (viraddr >> 12) & 0x3FF;
-
-	kprintf("page fault: pgd 0x%p\n", pgd);
-	if (pgd)
-                pgt = (page_table_t*) (pgd->entries[index1] & 0xFFFFF000);
-	kprintf("page fault: pgt 0x%p\n", pgt);
-	if (!pgt)
+	if (!pgd || !(pgd->entries[index1] & 0xFFFFF000))
+                goto default_handler;
+        pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & 0xFFFFF000);
+	if (!pgt || !(pgt->entries[index2]))
 		goto default_handler;
-
 	if (pgt->entries[index2] & PG_SVM)
 		if (!svm_access_request(viraddr))
 			return;
-	kprintf("pgt->entries[%d] = 0x%x\n", index2, pgt->entries[index2]);
 
 default_handler:
 	kprintf("PAGE FAULT: Task %u got page fault at %p (irq %d, cs:eip 0x%x:0x%x)\n", task->id, viraddr, s->int_no, s->cs, s->eip);

From 9c15383d2cf95181d4c3356dd153f44a738ca061 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Fri, 19 Aug 2011 00:11:36 -0700
Subject: [PATCH 22/37] add first running version of our svm system

---
 arch/x86/include/asm/processor.h |  7 ---
 arch/x86/include/asm/svm.h       | 14 +++++
 arch/x86/mm/page.c               |  4 ++
 arch/x86/mm/svm.c                | 36 +++++++-----
 arch/x86/scc/icc.c               |  2 +-
 kernel/tests.c                   | 96 ++++++++++++++++++++++++--------
 6 files changed, 115 insertions(+), 44 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 5d79e4ac..82393d13 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -123,13 +123,6 @@ inline static int get_return_value(void) {
 	return ret;
 }
 
-#ifdef CONFIG_ROCKCREEK
-static inline void invalidate_cl1(void)
-{
-	asm volatile ( ".byte 0x0f; .byte 0x0a;\n" ); // CL1FLUSHMB
-}
-#endif
-
 /* Force strict CPU ordering */
 #ifdef CONFIG_ROCKCREEK
 inline static void mb(void) { asm volatile ("lock; addl $0,0(%%esp)" ::: "memory"); }
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 667d10a2..1b0b3c1d 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -65,6 +65,20 @@ int svm_access_request(size_t addr);
  */
 int svm_emit_page(size_t addr, int ue);
 
+static inline void svm_invalidate(void)
+{
+	asm volatile ( ".byte 0x0f; .byte 0x0a;\n" ); // CL1FLUSHMB
+}
+
+static inline void svm_flush(void)
+{
+	mb();
+#ifndef SVM_WT
+	flush_cache();
+#endif
+	//asm volatile ( ".byte 0x0f; .byte 0x0a;\n" ); // CL1FLUSHMB
+}
+
 #endif
 
 #ifdef __cplusplus
diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index ee9e5f83..2bde433c 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -356,7 +356,11 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
 			pgt->entries[index] |= PG_MPE;
 #endif
 		if (flags & MAP_SVM)
+#ifdef SVM_WT
+			pgt->entries[index] |= PG_SVM|PG_PWT;
+#else
 			pgt->entries[index] |= PG_SVM;
+#endif
 		if (flags & MAP_NO_ACCESS)
 			pgt->entries[index] &= ~PG_PRESENT;
 
diff --git a/arch/x86/mm/svm.c b/arch/x86/mm/svm.c
index 3c3f8458..cf1868ad 100644
--- a/arch/x86/mm/svm.c
+++ b/arch/x86/mm/svm.c
@@ -60,8 +60,13 @@ int svm_init(void)
 	shmbegin = (size_t)RC_SHM_BUFFER_START();
 	phyaddr = (size_t) RCCE_shmalloc(OWNER_SIZE);
 	irq_nested_enable(flags);
+
 	if (BUILTIN_EXPECT(!phyaddr, 0))
 		return -ENOMEM;
+	if (BUILTIN_EXPECT(phyaddr & 0xFFF, 0)) {
+		kprintf("RCCE_shmalloc returns not a page aligned physiacl address: 0x%x\n", phyaddr);
+		return -ENOMEM;
+	}
 
 	kprintf("Shared memory starts at the physical address 0x%x\n", shmbegin);
  
@@ -96,15 +101,14 @@ int svm_access_request(size_t addr)
 	int remote_rank;
 	uint8_t payload[iRCCE_MAIL_HEADER_PAYLOAD];
 
-	kprintf("enter svm_access_request\n");
-
 	if (phyaddr < shmbegin)
 		return -EINVAL;
 	if (phyaddr >= shmbegin + RCCE_SHM_SIZE_MAX)
 		return -EINVAL;
 	pageid = (phyaddr-shmbegin) >> PAGE_SHIFT;
 
-	invalidate_cl1();
+	svm_flush();
+	svm_invalidate();
 	if (page_owner[pageid] == my_ue)
 		return 0;
 
@@ -112,7 +116,7 @@ int svm_access_request(size_t addr)
 	((size_t*) payload)[0] = my_ue;
 	((size_t*) payload)[1] = phyaddr;
 
-	kprintf("send access request to %d of 0x%x\n", remote_rank, phyaddr);
+	//kprintf("send access request to %d of 0x%x\n", remote_rank, phyaddr);
 	/* send ping request */
 	iRCCE_mail_send(2*sizeof(size_t), ICC_TAG_SVMREQUEST, 0, payload, remote_rank);
 
@@ -122,12 +126,12 @@ int svm_access_request(size_t addr)
 	/* check for incoming messages */
 	icc_mail_check();
 
-	invalidate_cl1();
+	svm_invalidate();
 	while (page_owner[pageid] != my_ue)
 	{
 		NOP4;
-		invalidate_cl1();
-	};
+		svm_invalidate();
+	}
 
 	return change_page_permissions(addr, addr+PAGE_SIZE, VMA_READ|VMA_WRITE|VMA_CACHEABLE);
 }
@@ -149,13 +153,18 @@ void* svmmalloc(size_t size)
 	if (RCCE_ue())
 		map_flags |= MAP_NO_ACCESS;
 	irq_nested_enable(flags);
+
 	if (BUILTIN_EXPECT(!phyaddr, 0))
 		return NULL;
+	if (BUILTIN_EXPECT(phyaddr & 0xFFF, 0)) {
+		kprintf("RCCE_shmalloc returns not a page aligned physiacl address: 0x%x\n", phyaddr);
+		return NULL;
+	}
 
 	viraddr = map_region(0, phyaddr, size >> PAGE_SHIFT, map_flags);
 	phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT] = viraddr;
 
-	//kprintf("shmmalloc: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, viraddr, size);
+	kprintf("svmmalloc: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, viraddr, size);
 
 	return (void*) viraddr;
 }
@@ -173,7 +182,7 @@ void svmfree(void* addr, size_t size)
 	// currently, we allocate memory in page size granulation
 	size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
 
-	//kprintf("shmmalloc: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, addr, size);
+	kprintf("svmfree: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, addr, size);
 
 	unmap_region((size_t) addr, size >> PAGE_SHIFT);
 	phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT] = 0;
@@ -192,7 +201,7 @@ int svm_emit_page(size_t phyaddr, int ue)
 {	
 	uint32_t pageid;
 
-	kprintf("Try to emit page 0x%x to %d\n", phyaddr, ue);
+	//kprintf("Try to emit page 0x%x to %d\n", phyaddr, ue);
 
 	if (phyaddr < shmbegin)
 		return -EINVAL;
@@ -200,7 +209,6 @@ int svm_emit_page(size_t phyaddr, int ue)
 		return -EINVAL;
 	pageid = (phyaddr-shmbegin) >> PAGE_SHIFT;
 
-	invalidate_cl1();
 	if (page_owner[pageid] != my_ue) {
 		// Core is nor owner => forward request to new owner
 		int remote_rank;
@@ -220,12 +228,12 @@ int svm_emit_page(size_t phyaddr, int ue)
 	} else {
 		size_t viraddr = phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT];
 
+		svm_flush();
 		change_page_permissions(viraddr, viraddr+PAGE_SIZE, VMA_NOACCESS|VMA_READ|VMA_CACHEABLE);
 
-		invalidate_cl1();
+		svm_invalidate();
 		page_owner[pageid] = ue;
-		mb();
-		invalidate_cl1();
+		svm_flush();
 	}
 
 	return 0;
diff --git a/arch/x86/scc/icc.c b/arch/x86/scc/icc.c
index 06d7beb5..79ca128e 100644
--- a/arch/x86/scc/icc.c
+++ b/arch/x86/scc/icc.c
@@ -132,7 +132,7 @@ int icc_init(void)
 		return -ENODEV;
 
 	// enable additional outputs	
-	RCCE_debug_set(RCCE_DEBUG_ALL);
+	//RCCE_debug_set(RCCE_DEBUG_ALL);
 
 	my_ue   = RCCE_ue();
 	num_ues = RCCE_num_ues();
diff --git a/kernel/tests.c b/kernel/tests.c
index e5e1a8fe..61e1f38b 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -115,11 +115,6 @@ int mail_ping(void* arg) {
 	return 0;
 }
 
-static inline void cache_invalidate(void) 
-{
-	asm volatile ( ".byte 0x0f; .byte 0x0a;\n" ); // CL1FLUSHMB
-}
-
 #define N	1024
 
 volatile static int* A[N];
@@ -128,15 +123,16 @@ volatile static int* C[N];
 
 static int svm_test(void *arg)
 {
-	uint32_t i, j, k, flags;
+	uint64_t start, end;
+	uint32_t i, j, k;//, flags;
 	int my_ue, num_ues;
 
 	// iRCCE is not thread save => disable interrupts
-	flags = irq_nested_disable();
+	//flags = irq_nested_disable();
 	RCCE_barrier(&RCCE_COMM_WORLD);
 	my_ue = RCCE_ue();
 	num_ues = RCCE_num_ues();
-	irq_nested_enable(flags);
+	//irq_nested_enable(flags);
 
 	// allocate and initialize SVM region
 	A[0] = (int*) svmmalloc(3*N*N*sizeof(int));
@@ -146,42 +142,98 @@ static int svm_test(void *arg)
 	// initialize matrices
 	for(i=0; i<N; i++) {
 		A[i] = A[0] + i*N;
-		B[i] = A[0] + i*N + N*N;
-		C[i] = A[0] + i*N + 2*N*N;
+		B[i] = A[0] + (i*N + N*N);
+		C[i] = A[0] + (i*N + 2*N*N);
 	}
 	if (!my_ue) {
 		for(i=0; i<N; i++) {
 			A[i][i] = 1;
 			for(j=0; j<N; j++)
-				B[i][j] = j;
+				B[i][j] = i+j;
 		}
 	}
 
-	// CL1FLUSH
-	invalidate_cl1();
+	svm_flush();
 
 	// Now, we need only read access on A and B
 	//change_page_permissions((size_t) A[0], (size_t) (A[0]+2*N*N), VMA_CACHEABLE|VMA_READ);
 
 	// iRCCE is not thread save => disable interrupts
-	flags = irq_nested_disable();
+	//flags = irq_nested_disable();
 	RCCE_barrier(&RCCE_COMM_WORLD);
-	irq_nested_enable(flags);
+	//irq_nested_enable(flags);
 
-	kputs("Start calculation...\n");
+	kputs("Start sequentiell calculation...\n");
+
+	start = rdtsc();
+	start = rdtsc();
+
+	// start calculation
+	if (!my_ue) {
+		for(i=0; i<N; i++)
+			for(j=0; j<N; j++)
+				for(k=0; k<N; k++)
+					C[i][j] += A[i][k] * B[k][j];
+	}
+
+
+	end = rdtsc();
+
+	if (!my_ue) {
+		memset(C[0], 0x00, N*N*sizeof(int));
+		// CL1FLUSH
+		svm_flush();
+
+		kprintf("Calculation time (seq): %llu\n", end-start);
+	}
+
+	
+	// iRCCE is not thread save => disable interrupts
+	//flags = irq_nested_disable();
+	RCCE_barrier(&RCCE_COMM_WORLD);
+	//irq_nested_enable(flags);
+
+	kputs("Start parallel calculation...\n");
+
+	start = rdtsc();
+	start = rdtsc();
 
 	// start calculation
 	for(i=my_ue*(N/num_ues); i<(my_ue+1)*(N/num_ues); i++)
 		for(j=0; j<N; j++)
 			for(k=0; k<N; k++)
-				C[i][j] = A[i][k] * B[k][j];
+				C[i][j] += A[i][k] * B[k][j];
+
+	svm_flush();
+
+	end = rdtsc();
 
 	// iRCCE is not thread save => disable interrupts
-	flags = irq_nested_disable();
+	//flags = irq_nested_disable();
 	RCCE_barrier(&RCCE_COMM_WORLD);
-	irq_nested_enable(flags);
+	//irq_nested_enable(flags);
 
-	kputs("Calculation finished...\n");
+	kputs("Check results...\n");
+
+	if (!my_ue) {
+		uint32_t err = 0;
+
+		for(i=0; (i<N) && (err < 10); i++) {
+			for(j=0; (j<N) && (err < 10); j++) {
+				if (C[i][j] != i+j) {
+					err++;
+					kprintf("Wrong value at C[%u][%u] = %u, B[%u][%u] = %u\n", i, j, C[i][j], i, j, B[i][j]);
+				}
+			}
+		}
+	}
+
+	// iRCCE is not thread save => disable interrupts
+	//flags = irq_nested_disable();
+	RCCE_barrier(&RCCE_COMM_WORLD);
+	//irq_nested_enable(flags);
+
+	kprintf("Calculation time (par): %llu\n", end-start);
 
 	svmfree((void*) A[0], 3*N*sizeof(int));
 
@@ -353,8 +405,8 @@ int test_init(void)
 	//create_kernel_task(NULL, join_test, NULL);
 	//create_kernel_task(NULL, producer, NULL);
 	//create_kernel_task(NULL, consumer, NULL);
-	create_kernel_task(NULL, mail_ping, NULL);
-	//create_kernel_task(NULL, svm_test, NULL);
+	//create_kernel_task(NULL, mail_ping, NULL);
+	create_kernel_task(NULL, svm_test, NULL);
 	//create_user_task(NULL, "/bin/hello", argv);
 	//create_user_task(NULL, "/bin/tests", argv);
 	//create_user_task(NULL, "/bin/jacobi", argv);

From f721582a5c021dc28172dd478c1e4d0388f83d6c Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Fri, 19 Aug 2011 13:50:47 -0700
Subject: [PATCH 23/37] on the SCCm we include always scc_memcpy.h

---
 arch/x86/scc/iRCCE_get.c | 2 +-
 arch/x86/scc/iRCCE_put.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/scc/iRCCE_get.c b/arch/x86/scc/iRCCE_get.c
index 7916c268..8859bfed 100644
--- a/arch/x86/scc/iRCCE_get.c
+++ b/arch/x86/scc/iRCCE_get.c
@@ -35,7 +35,7 @@
 
 #include <asm/iRCCE_lib.h>
 
-#ifdef COPPERRIDGE
+#if defined(COPPERRIDGE) || defined(SCC)
 #include "scc_memcpy.h"
 #endif
 
diff --git a/arch/x86/scc/iRCCE_put.c b/arch/x86/scc/iRCCE_put.c
index e810057a..96060cae 100644
--- a/arch/x86/scc/iRCCE_put.c
+++ b/arch/x86/scc/iRCCE_put.c
@@ -35,7 +35,7 @@
 
 #include <asm/iRCCE_lib.h>
 
-#ifdef COPPERRIDGE
+#if defined(COPPERRIDGE) || defined(SCC)
 #include "scc_memcpy.h"
 #endif
 

From fcda5b0d9c32a0ef7e7fba2447149c52afeb5024 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Fri, 19 Aug 2011 13:50:47 -0700
Subject: [PATCH 24/37] on the SCC, we include always the header scc_memcpy.h

---
 arch/x86/scc/iRCCE_get.c | 2 +-
 arch/x86/scc/iRCCE_put.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/scc/iRCCE_get.c b/arch/x86/scc/iRCCE_get.c
index 7916c268..8859bfed 100644
--- a/arch/x86/scc/iRCCE_get.c
+++ b/arch/x86/scc/iRCCE_get.c
@@ -35,7 +35,7 @@
 
 #include <asm/iRCCE_lib.h>
 
-#ifdef COPPERRIDGE
+#if defined(COPPERRIDGE) || defined(SCC)
 #include "scc_memcpy.h"
 #endif
 
diff --git a/arch/x86/scc/iRCCE_put.c b/arch/x86/scc/iRCCE_put.c
index e810057a..96060cae 100644
--- a/arch/x86/scc/iRCCE_put.c
+++ b/arch/x86/scc/iRCCE_put.c
@@ -35,7 +35,7 @@
 
 #include <asm/iRCCE_lib.h>
 
-#ifdef COPPERRIDGE
+#if defined(COPPERRIDGE) || defined(SCC)
 #include "scc_memcpy.h"
 #endif
 

From 6f1c07c0dc170d59e07c6c62a2fe378fb2343261 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Sat, 20 Aug 2011 01:03:18 -0700
Subject: [PATCH 25/37] some code cleanups

---
 arch/x86/mm/svm.c | 13 ++++---------
 kernel/tests.c    | 20 +++-----------------
 2 files changed, 7 insertions(+), 26 deletions(-)

diff --git a/arch/x86/mm/svm.c b/arch/x86/mm/svm.c
index cf1868ad..f3245071 100644
--- a/arch/x86/mm/svm.c
+++ b/arch/x86/mm/svm.c
@@ -70,7 +70,7 @@ int svm_init(void)
 
 	kprintf("Shared memory starts at the physical address 0x%x\n", shmbegin);
  
-	page_owner = (uint8_t*) map_region(0, phyaddr, OWNER_SIZE >> PAGE_SHIFT, MAP_SVM|MAP_KERNEL_SPACE|MAP_NO_CACHE/*MAP_MPE*/);
+	page_owner = (uint8_t*) map_region(0, phyaddr, OWNER_SIZE >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE);
 	if (BUILTIN_EXPECT(!page_owner, 0)) {
 		flags = irq_nested_disable();
 		RCCE_shfree((t_vcharp) phyaddr);
@@ -107,8 +107,7 @@ int svm_access_request(size_t addr)
 		return -EINVAL;
 	pageid = (phyaddr-shmbegin) >> PAGE_SHIFT;
 
-	svm_flush();
-	svm_invalidate();
+	//svm_flush();
 	if (page_owner[pageid] == my_ue)
 		return 0;
 
@@ -126,11 +125,8 @@ int svm_access_request(size_t addr)
 	/* check for incoming messages */
 	icc_mail_check();
 
-	svm_invalidate();
-	while (page_owner[pageid] != my_ue)
-	{
+	while (page_owner[pageid] != my_ue) {
 		NOP4;
-		svm_invalidate();
 	}
 
 	return change_page_permissions(addr, addr+PAGE_SIZE, VMA_READ|VMA_WRITE|VMA_CACHEABLE);
@@ -231,9 +227,8 @@ int svm_emit_page(size_t phyaddr, int ue)
 		svm_flush();
 		change_page_permissions(viraddr, viraddr+PAGE_SIZE, VMA_NOACCESS|VMA_READ|VMA_CACHEABLE);
 
-		svm_invalidate();
 		page_owner[pageid] = ue;
-		svm_flush();
+		mb();
 	}
 
 	return 0;
diff --git a/kernel/tests.c b/kernel/tests.c
index 61e1f38b..7da29bdd 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -124,15 +124,12 @@ volatile static int* C[N];
 static int svm_test(void *arg)
 {
 	uint64_t start, end;
-	uint32_t i, j, k;//, flags;
+	uint32_t i, j, k;
 	int my_ue, num_ues;
 
-	// iRCCE is not thread save => disable interrupts
-	//flags = irq_nested_disable();
 	RCCE_barrier(&RCCE_COMM_WORLD);
 	my_ue = RCCE_ue();
 	num_ues = RCCE_num_ues();
-	//irq_nested_enable(flags);
 
 	// allocate and initialize SVM region
 	A[0] = (int*) svmmalloc(3*N*N*sizeof(int));
@@ -158,11 +155,9 @@ static int svm_test(void *arg)
 	// Now, we need only read access on A and B
 	//change_page_permissions((size_t) A[0], (size_t) (A[0]+2*N*N), VMA_CACHEABLE|VMA_READ);
 
-	// iRCCE is not thread save => disable interrupts
-	//flags = irq_nested_disable();
 	RCCE_barrier(&RCCE_COMM_WORLD);
-	//irq_nested_enable(flags);
 
+#if 0
 	kputs("Start sequentiell calculation...\n");
 
 	start = rdtsc();
@@ -188,11 +183,8 @@ static int svm_test(void *arg)
 	}
 
 	
-	// iRCCE is not thread save => disable interrupts
-	//flags = irq_nested_disable();
 	RCCE_barrier(&RCCE_COMM_WORLD);
-	//irq_nested_enable(flags);
-
+#endif
 	kputs("Start parallel calculation...\n");
 
 	start = rdtsc();
@@ -208,10 +200,7 @@ static int svm_test(void *arg)
 
 	end = rdtsc();
 
-	// iRCCE is not thread save => disable interrupts
-	//flags = irq_nested_disable();
 	RCCE_barrier(&RCCE_COMM_WORLD);
-	//irq_nested_enable(flags);
 
 	kputs("Check results...\n");
 
@@ -228,10 +217,7 @@ static int svm_test(void *arg)
 		}
 	}
 
-	// iRCCE is not thread save => disable interrupts
-	//flags = irq_nested_disable();
 	RCCE_barrier(&RCCE_COMM_WORLD);
-	//irq_nested_enable(flags);
 
 	kprintf("Calculation time (par): %llu\n", end-start);
 

From b0b9b0996e2ed74c421e4301780e9dc18b890590 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Mon, 22 Aug 2011 13:15:49 -0700
Subject: [PATCH 26/37] redesign of the SVM benchmark

---
 kernel/tests.c | 74 ++++++++++++++++++++++++++++----------------------
 1 file changed, 42 insertions(+), 32 deletions(-)

diff --git a/kernel/tests.c b/kernel/tests.c
index 7da29bdd..b30cd4d7 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -131,6 +131,46 @@ static int svm_test(void *arg)
 	my_ue = RCCE_ue();
 	num_ues = RCCE_num_ues();
 
+#if 1
+	if (!my_ue) {
+		// allocate and initialize SVM region
+		A[0] = (int*) kmalloc(3*N*N*sizeof(int));
+		memset((void*) A[0], 0x00, 3*N*N*sizeof(int));
+
+		// initialize matrices
+		for(i=0; i<N; i++) {
+			A[i] = A[0] + i*N;
+			B[i] = A[0] + (i*N + N*N);
+			C[i] = A[0] + (i*N + 2*N*N);
+		}
+
+		for(i=0; i<N; i++) {
+			A[i][i] = 1;
+			for(j=0; j<N; j++)
+				B[i][j] = i+j;
+		}
+
+		kputs("Start sequentiell calculation...\n");
+
+		start = rdtsc();
+		start = rdtsc();
+
+		// start calculation
+		for(i=0; i<N; i++)
+			for(j=0; j<N; j++)
+				for(k=0; k<N; k++)
+					C[i][j] += A[i][k] * B[k][j];
+
+
+		end = rdtsc();
+
+		kprintf("Calculation time (seq): %llu\n", end-start);
+		kfree(A[0], 3*N*N*sizeof(int));
+	}
+
+	RCCE_barrier(&RCCE_COMM_WORLD);
+#endif
+
 	// allocate and initialize SVM region
 	A[0] = (int*) svmmalloc(3*N*N*sizeof(int));
 	if (!my_ue)
@@ -153,38 +193,9 @@ static int svm_test(void *arg)
 	svm_flush();
 
 	// Now, we need only read access on A and B
-	//change_page_permissions((size_t) A[0], (size_t) (A[0]+2*N*N), VMA_CACHEABLE|VMA_READ);
-
+	change_page_permissions((size_t) A[0], (size_t) (A[0]+2*N*N), VMA_CACHEABLE|VMA_READ);
 	RCCE_barrier(&RCCE_COMM_WORLD);
 
-#if 0
-	kputs("Start sequentiell calculation...\n");
-
-	start = rdtsc();
-	start = rdtsc();
-
-	// start calculation
-	if (!my_ue) {
-		for(i=0; i<N; i++)
-			for(j=0; j<N; j++)
-				for(k=0; k<N; k++)
-					C[i][j] += A[i][k] * B[k][j];
-	}
-
-
-	end = rdtsc();
-
-	if (!my_ue) {
-		memset(C[0], 0x00, N*N*sizeof(int));
-		// CL1FLUSH
-		svm_flush();
-
-		kprintf("Calculation time (seq): %llu\n", end-start);
-	}
-
-	
-	RCCE_barrier(&RCCE_COMM_WORLD);
-#endif
 	kputs("Start parallel calculation...\n");
 
 	start = rdtsc();
@@ -198,9 +209,8 @@ static int svm_test(void *arg)
 
 	svm_flush();
 
-	end = rdtsc();
-
 	RCCE_barrier(&RCCE_COMM_WORLD);
+	end = rdtsc();
 
 	kputs("Check results...\n");
 

From a4a4c5b9f0cd7e2a0395f7a53199f02ce2c683d1 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Mon, 22 Aug 2011 13:16:54 -0700
Subject: [PATCH 27/37] use RCCE's method to flush the write combining buffers

---
 arch/x86/include/asm/svm.h | 12 +++++++++---
 arch/x86/mm/page.c         |  2 +-
 arch/x86/mm/svm.c          |  3 ++-
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 1b0b3c1d..91e008ec 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -21,6 +21,10 @@
 #define __ARCH_SVM_H__
 
 #include <metalsvm/stddef.h>
+#include <asm/processor.h>
+#ifdef CONFIG_ROCKCREEK
+#include <asm/RCCE_lib.h>
+#endif
 
 #ifdef __cplusplus
 extern "C" {
@@ -72,11 +76,13 @@ static inline void svm_invalidate(void)
 
 static inline void svm_flush(void)
 {
-	mb();
-#ifndef SVM_WT
+#ifdef CONFIG_ROCKCREEK
+	// need to write to another line to make sure the write combine buffer gets flushed
+	*(int *)RCCE_fool_write_combine_buffer = 1;
+#endif
+#ifdef SVM_WB
 	flush_cache();
 #endif
-	//asm volatile ( ".byte 0x0f; .byte 0x0a;\n" ); // CL1FLUSHMB
 }
 
 #endif
diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index 2bde433c..62dab193 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -356,7 +356,7 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
 			pgt->entries[index] |= PG_MPE;
 #endif
 		if (flags & MAP_SVM)
-#ifdef SVM_WT
+#ifndef SVM_WB
 			pgt->entries[index] |= PG_SVM|PG_PWT;
 #else
 			pgt->entries[index] |= PG_SVM;
diff --git a/arch/x86/mm/svm.c b/arch/x86/mm/svm.c
index f3245071..2f09010a 100644
--- a/arch/x86/mm/svm.c
+++ b/arch/x86/mm/svm.c
@@ -228,7 +228,8 @@ int svm_emit_page(size_t phyaddr, int ue)
 		change_page_permissions(viraddr, viraddr+PAGE_SIZE, VMA_NOACCESS|VMA_READ|VMA_CACHEABLE);
 
 		page_owner[pageid] = ue;
-		mb();
+		// need to write to another line to make sure the write combine buffer gets flushed
+		*(int *)RCCE_fool_write_combine_buffer = 1;
 	}
 
 	return 0;

From 5472960a13ceedd80c7e19f561cf81c7e52d3f38 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Mon, 22 Aug 2011 22:10:15 -0700
Subject: [PATCH 28/37] cosmetic changes

---
 arch/x86/mm/page.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index 62dab193..7321a413 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -406,7 +406,7 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags)
 
 				if ((newflags & PG_SVM) && !(newflags & PG_PRESENT) && (flags & (VMA_READ|VMA_WRITE) && !(flags & VMA_NOACCESS)))
 					newflags |= PG_PRESENT;
-				if ((newflags & PG_SVM) && (newflags & PG_PRESENT) && (flags & VMA_NOACCESS))
+				else if ((newflags & PG_SVM) && (newflags & PG_PRESENT) && (flags & VMA_NOACCESS))
 					newflags &= ~PG_PRESENT;
 
 				// update flags

From af6ef23085650bc827695bc482aedeba26b66506 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Mon, 22 Aug 2011 22:13:06 -0700
Subject: [PATCH 29/37] add some performance counters and remove bug in the
 physical to virtual address translation

---
 arch/x86/include/asm/svm.h |  2 ++
 arch/x86/mm/svm.c          | 39 ++++++++++++++++++++++++++++++++------
 kernel/tests.c             | 12 +++++++-----
 3 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 91e008ec..f20e582d 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -85,6 +85,8 @@ static inline void svm_flush(void)
 #endif
 }
 
+int svm_statistics(void);
+
 #endif
 
 #ifdef __cplusplus
diff --git a/arch/x86/mm/svm.c b/arch/x86/mm/svm.c
index 2f09010a..713c50eb 100644
--- a/arch/x86/mm/svm.c
+++ b/arch/x86/mm/svm.c
@@ -48,6 +48,9 @@ static volatile uint8_t*	page_owner = NULL;
 static size_t phys2virt[SHARED_PAGES] = {[0 ... SHARED_PAGES-1] = 0};
 static size_t shmbegin = 0;
 static int my_ue = 0;
+static uint32_t emit[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0};
+static uint32_t request[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0};
+static uint32_t forward[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0};
 
 int svm_init(void)
 {
@@ -119,6 +122,7 @@ int svm_access_request(size_t addr)
 	/* send ping request */
 	iRCCE_mail_send(2*sizeof(size_t), ICC_TAG_SVMREQUEST, 0, payload, remote_rank);
 
+	request[remote_rank]++;
         NOP8;
         icc_send_irq(remote_rank);
 
@@ -134,8 +138,7 @@ int svm_access_request(size_t addr)
 
 void* svmmalloc(size_t size)
 {
-	size_t phyaddr;
-	size_t viraddr;
+	size_t phyaddr, viraddr, i;
 	uint32_t flags;
 	uint32_t map_flags = MAP_KERNEL_SPACE|MAP_MPE|MAP_SVM;
 
@@ -158,7 +161,8 @@ void* svmmalloc(size_t size)
 	}
 
 	viraddr = map_region(0, phyaddr, size >> PAGE_SHIFT, map_flags);
-	phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT] = viraddr;
+	for(i=0; i<size; i+=PAGE_SIZE)
+		phys2virt[(phyaddr + i - shmbegin) >> PAGE_SHIFT] = viraddr + i;
 
 	kprintf("svmmalloc: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, viraddr, size);
 
@@ -167,7 +171,7 @@ void* svmmalloc(size_t size)
 
 void svmfree(void* addr, size_t size)
 {
-	size_t phyaddr;
+	size_t phyaddr, i;
 	uint32_t flags;
 
 	if (BUILTIN_EXPECT(!addr || !size, 0))
@@ -181,7 +185,8 @@ void svmfree(void* addr, size_t size)
 	kprintf("svmfree: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, addr, size);
 
 	unmap_region((size_t) addr, size >> PAGE_SHIFT);
-	phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT] = 0;
+	for(i=0; i<size; i+=PAGE_SIZE)
+		phys2virt[(phyaddr + i - shmbegin) >> PAGE_SHIFT] = 0;
 
 	// iRCCE is not thread save => disable interrupts
 	flags = irq_nested_disable();
@@ -219,8 +224,10 @@ int svm_emit_page(size_t phyaddr, int ue)
 		/* send ping request */
 		iRCCE_mail_send(2*sizeof(size_t), ICC_TAG_SVMREQUEST, 0, payload, remote_rank);
 
-		 NOP8;
+		NOP8;
 		icc_send_irq(remote_rank);
+
+		forward[remote_rank]++;
 	} else {
 		size_t viraddr = phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT];
 
@@ -230,9 +237,29 @@ int svm_emit_page(size_t phyaddr, int ue)
 		page_owner[pageid] = ue;
 		// need to write to another line to make sure the write combine buffer gets flushed
 		*(int *)RCCE_fool_write_combine_buffer = 1;
+
+		emit[ue]++;
 	}
 
 	return 0;
 }
 
+int svm_statistics(void)
+{
+	uint32_t i;
+
+	kprintf("emit\t:");
+	for(i=0; i<RCCE_MAXNP; i++)
+		kprintf("\t%u", emit[i]);
+	kprintf("\nrequest\t:");
+	for(i=0; i<RCCE_MAXNP; i++) 
+		kprintf("\t%u", request[i]);
+	kprintf("\nforward\t:");
+	for(i=0; i<RCCE_MAXNP; i++)
+		kprintf("\t%u", forward[i]);
+	kputs("\n");
+
+	return 0;
+}
+
 #endif
diff --git a/kernel/tests.c b/kernel/tests.c
index b30cd4d7..f1b43576 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -131,7 +131,7 @@ static int svm_test(void *arg)
 	my_ue = RCCE_ue();
 	num_ues = RCCE_num_ues();
 
-#if 1
+#if 0
 	if (!my_ue) {
 		// allocate and initialize SVM region
 		A[0] = (int*) kmalloc(3*N*N*sizeof(int));
@@ -192,15 +192,15 @@ static int svm_test(void *arg)
 
 	svm_flush();
 
-	// Now, we need only read access on A and B
-	change_page_permissions((size_t) A[0], (size_t) (A[0]+2*N*N), VMA_CACHEABLE|VMA_READ);
-	RCCE_barrier(&RCCE_COMM_WORLD);
-
 	kputs("Start parallel calculation...\n");
 
 	start = rdtsc();
 	start = rdtsc();
 
+	// Now, we need only read access on A and B
+	change_page_permissions((size_t) A[0], (size_t) (A[0]+2*N*N), VMA_CACHEABLE|VMA_READ);
+	RCCE_barrier(&RCCE_COMM_WORLD);
+
 	// start calculation
 	for(i=my_ue*(N/num_ues); i<(my_ue+1)*(N/num_ues); i++)
 		for(j=0; j<N; j++)
@@ -233,6 +233,8 @@ static int svm_test(void *arg)
 
 	svmfree((void*) A[0], 3*N*sizeof(int));
 
+	svm_statistics();
+
 	return 0;
 }
 #endif

From 2605ce5b34f1d0ee88f24c29023d17255f09e2eb Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 23 Aug 2011 01:12:52 -0700
Subject: [PATCH 30/37] RCCE_shmalloc returns only a physical address => we
 have to map this region into the virtual address space

---
 arch/x86/scc/icc.c  |  2 +-
 drivers/net/mmnif.c | 14 +++++++++++++-
 kernel/init.c       |  4 ++--
 kernel/tests.c      | 10 +++++-----
 4 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/arch/x86/scc/icc.c b/arch/x86/scc/icc.c
index 79ca128e..a1254138 100644
--- a/arch/x86/scc/icc.c
+++ b/arch/x86/scc/icc.c
@@ -84,7 +84,7 @@ static void intr_handler(struct state *s)
 	int tmp, z;
 
 #ifdef CONFIG_LWIP
-//	mmnif_irqhandler();
+	mmnif_irqhandler();
 #endif
 
 	z = Z_PID(RC_COREID[my_ue]);
diff --git a/drivers/net/mmnif.c b/drivers/net/mmnif.c
index 6d805772..1cb8b779 100644
--- a/drivers/net/mmnif.c
+++ b/drivers/net/mmnif.c
@@ -47,6 +47,7 @@ extern HANDLE hProc;
 
 #include <metalsvm/semaphore.h>
 #include <metalsvm/spinlock.h>
+#include <metalsvm/page.h>
 
 #include <asm/RCCE.h>
 #include <asm/RCCE_lib.h>
@@ -715,7 +716,11 @@ err_t mmnif_init(struct netif* netif)
 	/* Alloc and clear shared memory for rx_buff
 	 */
         mpb_size = (sizeof(mm_rx_buffer_t) + MMNIF_RX_BUFFERLEN);
+	// align mpb size to the granularity of a page size
+	mpb_size = (mpb_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
         mpb_start_address = RCCE_shmalloc(mpb_size*MMNIF_CORES);
+	// map physical address in the virtual address space
+	mpb_start_address = map_region(0, mpb_start_address, mpb_size >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE);
 
         mmnif->rx_buff = mpb_start_address + (mpb_size) * (own_ip_address - router_ip_address);
         if (!(mpb_start_address))
@@ -1103,6 +1108,8 @@ int mmnif_open(void)
  */
 int mmnif_close(void)
 {
+	size_t phyaddr;
+
 	mmnif_t* mmnif;
 
 	if (!mmnif_dev)
@@ -1119,7 +1126,12 @@ int mmnif_close(void)
 
 	kfree(mmnif->tx_buff[0],MMNIF_TX_QUEUELEN * MMNIF_TX_BUFFERLEN);
 	kfree(mmnif_dev,sizeof(mmnif_t));
-	RCCE_shfree(mpb_start_address);
+
+	// determine physical address
+	phyaddr = virt_to_phys(mpb_start_address);
+	// unmap shared memory regeion
+	unmap_region(mpb_start_address, mpb_size >> PAGE_SHIFT);
+	RCCE_shfree(phyaddr);
 
 	return NULL;
 }
diff --git a/kernel/init.c b/kernel/init.c
index 462578e1..a29807ac 100644
--- a/kernel/init.c
+++ b/kernel/init.c
@@ -153,7 +153,7 @@ int network_init(void)
 		}
 	}
 #else
-	//mmnif_open();
+	mmnif_open();
 #endif
 
 	// start echo and ping server
@@ -169,7 +169,7 @@ int network_shutdown(void)
 {
 #ifdef CONFIG_LWIP
 #ifdef CONFIG_ROCKCREEK
-	//mmnif_close();
+	mmnif_close();
 #elif defined(CONFIG_PCI)
 	dhcp_release(default_netif);
 	dhcp_stop(default_netif);
diff --git a/kernel/tests.c b/kernel/tests.c
index f1b43576..efa081b5 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -131,7 +131,7 @@ static int svm_test(void *arg)
 	my_ue = RCCE_ue();
 	num_ues = RCCE_num_ues();
 
-#if 0
+#if 1
 	if (!my_ue) {
 		// allocate and initialize SVM region
 		A[0] = (int*) kmalloc(3*N*N*sizeof(int));
@@ -399,14 +399,14 @@ int test_init(void)
 //		create_kernel_task(NULL,client_task,NULL);
 #endif
 
-	//create_kernel_task(NULL, foo, "Hello from foo1");
-	//create_kernel_task(NULL, join_test, NULL);
+	create_kernel_task(NULL, foo, "Hello from foo1");
+	create_kernel_task(NULL, join_test, NULL);
 	//create_kernel_task(NULL, producer, NULL);
 	//create_kernel_task(NULL, consumer, NULL);
 	//create_kernel_task(NULL, mail_ping, NULL);
-	create_kernel_task(NULL, svm_test, NULL);
+	//create_kernel_task(NULL, svm_test, NULL);
 	//create_user_task(NULL, "/bin/hello", argv);
-	//create_user_task(NULL, "/bin/tests", argv);
+	create_user_task(NULL, "/bin/tests", argv);
 	//create_user_task(NULL, "/bin/jacobi", argv);
 	//create_user_task(NULL, "/bin/jacobi", argv);
 	//create_user_task(NULL, "/bin/server", server_argv);

From df40d339137dbc00ad5e0ab6208ef3b7ee05ffdf Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 23 Aug 2011 03:21:02 -0700
Subject: [PATCH 31/37] add additional output messages

---
 drivers/net/rckemac.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/rckemac.c b/drivers/net/rckemac.c
index 1c8ae09d..2769b909 100644
--- a/drivers/net/rckemac.c
+++ b/drivers/net/rckemac.c
@@ -261,6 +261,7 @@ again:
 static void rckemacif_input(struct netif* netif, struct pbuf* p)
 {
 	struct eth_hdr *ethhdr;
+	err_t err;
  
 	/* points to packet payload, which starts with an Ethernet header */
 	ethhdr = p->payload;
@@ -275,8 +276,8 @@ static void rckemacif_input(struct netif* netif, struct pbuf* p)
 	case ETHTYPE_PPPOE:
 #endif /* PPPOE_SUPPORT */
 		/* full packet send to tcpip_thread to process */
-		if (mynetif->input(p, mynetif) != ERR_OK) {
-			LWIP_DEBUGF(NETIF_DEBUG, ("rckemacif_input: IP input error\n"));
+		if ((err = mynetif->input(p, mynetif)) != ERR_OK) {
+			LWIP_DEBUGF(NETIF_DEBUG, ("rckemacif_input: IP input error %u\n", err));
 			pbuf_free(p);
 		}
 		break;

From 0d74873fa929cd2da8c3dccc39ee36f1effe6d10 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 23 Aug 2011 06:51:25 -0700
Subject: [PATCH 32/37] minor optimizations

---
 arch/x86/include/asm/svm.h | 21 +++++++++++++++------
 arch/x86/mm/svm.c          | 32 +++++++++++++++++++++++++++-----
 arch/x86/scc/icc.c         | 11 ++++++++++-
 kernel/tests.c             | 11 ++++++-----
 4 files changed, 58 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index f20e582d..cd2737ea 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -21,7 +21,6 @@
 #define __ARCH_SVM_H__
 
 #include <metalsvm/stddef.h>
-#include <asm/processor.h>
 #ifdef CONFIG_ROCKCREEK
 #include <asm/RCCE_lib.h>
 #endif
@@ -69,22 +68,32 @@ int svm_access_request(size_t addr);
  */
 int svm_emit_page(size_t addr, int ue);
 
+/* @brief invalidate the cache entries for all SVM regions
+ */
 static inline void svm_invalidate(void)
 {
 	asm volatile ( ".byte 0x0f; .byte 0x0a;\n" ); // CL1FLUSHMB
 }
 
+/* *brief flushs the cache for all SVM regions
+ */
+#ifdef CONFIG_ROCKCREEK
+#ifndef SVM_WB
 static inline void svm_flush(void)
 {
-#ifdef CONFIG_ROCKCREEK
 	// need to write to another line to make sure the write combine buffer gets flushed
 	*(int *)RCCE_fool_write_combine_buffer = 1;
-#endif
-#ifdef SVM_WB
-	flush_cache();
-#endif
 }
+#else
+void svm_flush(void);
+#endif
+#endif
 
+/* @brief dumps the some performance counters (e.g. numbers of page migrations)
+ * 
+ * @retrun
+ * - 0 on success
+ */
 int svm_statistics(void);
 
 #endif
diff --git a/arch/x86/mm/svm.c b/arch/x86/mm/svm.c
index 713c50eb..10a55d49 100644
--- a/arch/x86/mm/svm.c
+++ b/arch/x86/mm/svm.c
@@ -24,6 +24,7 @@
 #include <metalsvm/page.h>
 #include <metalsvm/errno.h>
 #include <asm/irqflags.h>
+#include <asm/processor.h>
 #ifdef CONFIG_ROCKCREEK
 #include <asm/RCCE.h>
 #include <asm/RCCE_lib.h>
@@ -229,21 +230,42 @@ int svm_emit_page(size_t phyaddr, int ue)
 
 		forward[remote_rank]++;
 	} else {
-		size_t viraddr = phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT];
+		size_t viraddr;
 
 		svm_flush();
-		change_page_permissions(viraddr, viraddr+PAGE_SIZE, VMA_NOACCESS|VMA_READ|VMA_CACHEABLE);
-
 		page_owner[pageid] = ue;
-		// need to write to another line to make sure the write combine buffer gets flushed
-		*(int *)RCCE_fool_write_combine_buffer = 1;
 
 		emit[ue]++;
+		viraddr = phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT];
+		change_page_permissions(viraddr, viraddr+PAGE_SIZE, VMA_NOACCESS|VMA_READ|VMA_CACHEABLE);
 	}
 
 	return 0;
 }
 
+#ifdef SVM_WB
+void svm_flush(void)
+{
+	int z, tmp;
+
+        // need to write to another line to make sure the write combine buffer gets flushed
+        *(int *)RCCE_fool_write_combine_buffer = 1;
+        flush_cache();
+
+#if 0
+	// try to flush L2 cache
+	z = Z_PID(RC_COREID[my_ue]);
+	tmp=ReadConfigReg(CRB_OWN + (z==0 ? GLCFG0 : GLCFG1));
+	tmp &= ~(1 << GLCFG_XFLSHNN_BIT);
+	SetConfigReg(CRB_OWN + (z==0 ? GLCFG0 : GLCFG1), tmp);
+
+	while(!(ReadConfigReg(CRB_OWN + (z==0 ? GLCFG0 : GLCFG1)) & (1 << GLCFG_XFLSHNN_BIT))) {
+		NOP8;
+	}
+#endif
+}
+#endif
+
 int svm_statistics(void)
 {
 	uint32_t i;
diff --git a/arch/x86/scc/icc.c b/arch/x86/scc/icc.c
index a1254138..1d3b4df7 100644
--- a/arch/x86/scc/icc.c
+++ b/arch/x86/scc/icc.c
@@ -164,9 +164,18 @@ int icc_init(void)
 	// reset INTR/LINT0 flag
 	z = Z_PID(RC_COREID[my_ue]);
 	tmp=ReadConfigReg(CRB_OWN + (z==0 ? GLCFG0 : GLCFG1));
-	tmp &= ~2;
+	tmp &= ~(1 << GLCFG_XINTR_BIT);
 	SetConfigReg(CRB_OWN + (z==0 ? GLCFG0 : GLCFG1), tmp);
 
+#if 0
+	// disable L2 cache
+	z = Z_PID(RC_COREID[my_ue]);
+	tmp=ReadConfigReg(CRB_OWN + (z==0 ? L2CFG0 : L2CFG1));
+	tmp |= (1 << L2CFG_WAYDISABLE_BIT);
+	SetConfigReg(CRB_OWN + (z==0 ? L2CFG0 : L2CFG1), tmp);
+	kprintf("set L2CFG to 0x%x\n", (uint32_t) tmp);
+#endif
+
 	// set interrupt handler (INTR/LINT0)
 	irq_install_handler(124, intr_handler);
 
diff --git a/kernel/tests.c b/kernel/tests.c
index efa081b5..7ad3c4f1 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -131,7 +131,7 @@ static int svm_test(void *arg)
 	my_ue = RCCE_ue();
 	num_ues = RCCE_num_ues();
 
-#if 1
+#if 0
 	if (!my_ue) {
 		// allocate and initialize SVM region
 		A[0] = (int*) kmalloc(3*N*N*sizeof(int));
@@ -192,6 +192,7 @@ static int svm_test(void *arg)
 
 	svm_flush();
 
+	RCCE_barrier(&RCCE_COMM_WORLD);
 	kputs("Start parallel calculation...\n");
 
 	start = rdtsc();
@@ -399,14 +400,14 @@ int test_init(void)
 //		create_kernel_task(NULL,client_task,NULL);
 #endif
 
-	create_kernel_task(NULL, foo, "Hello from foo1");
-	create_kernel_task(NULL, join_test, NULL);
+	//create_kernel_task(NULL, foo, "Hello from foo1");
+	//create_kernel_task(NULL, join_test, NULL);
 	//create_kernel_task(NULL, producer, NULL);
 	//create_kernel_task(NULL, consumer, NULL);
 	//create_kernel_task(NULL, mail_ping, NULL);
-	//create_kernel_task(NULL, svm_test, NULL);
+	create_kernel_task(NULL, svm_test, NULL);
 	//create_user_task(NULL, "/bin/hello", argv);
-	create_user_task(NULL, "/bin/tests", argv);
+	//create_user_task(NULL, "/bin/tests", argv);
 	//create_user_task(NULL, "/bin/jacobi", argv);
 	//create_user_task(NULL, "/bin/jacobi", argv);
 	//create_user_task(NULL, "/bin/server", server_argv);

From 0a0452b7a1aa19bdd1fc9c0bacc5d5a99c08d067 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 23 Aug 2011 07:40:20 -0700
Subject: [PATCH 33/37] prepare SVM subsystem to support also other consitency
 modells

---
 arch/x86/include/asm/page.h |  7 +++++--
 arch/x86/include/asm/svm.h  |  5 ++++-
 arch/x86/mm/page.c          | 10 +++++-----
 arch/x86/mm/svm.c           |  4 ++--
 include/metalsvm/stdlib.h   |  2 +-
 kernel/tests.c              |  5 +++--
 6 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 5a07a76d..5d35ac53 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -41,7 +41,8 @@
 #define _PAGE_BIT_PSE		7	/* 4 MB (or 2MB) page */
 #define _PAGE_BIT_PAT		7	/* on 4KB pages */
 #define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */
-#define _PAGE_BIT_SVM		9	/* mark a virtual address range as used by the SVM system */
+#define _PAGE_BIT_SVM_STRONG	9	/* mark a virtual address range as used by the SVM system */
+#define _PAGE_BIT_SVM_LAZYRELEASE	10 /* mark a virtual address range as used by the SVM system */
 
 /// Page is present
 #define PG_PRESENT	(1 << _PAGE_BIT_PRESENT)
@@ -66,7 +67,9 @@
 /// Pattern flag
 #define PG_PAT		(1 << _PAGE_BIT_PAT)
 /// This virtual address range is used by SVM system as marked
-#define PG_SVM		(1 << _PAGE_BIT_SVM)
+#define PG_SVM_STRONG		(1 << _PAGE_BIT_SVM_STRONG)
+/// This virtual address range is used by SVM system as marked
+#define PG_SVM_LAZYRELEASE	(1 << _PAGE_BIT_SVM_LAZYRELEASE)
 
 /// This is a whole set of flags (PRESENT,RW,ACCESSED,DIRTY) for kernelspace tables
 #define KERN_TABLE	(PG_PRESENT|PG_RW|PG_ACCESSED|PG_DIRTY)
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index cd2737ea..301fda74 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -31,6 +31,9 @@ extern "C" {
 
 #ifdef CONFIG_ROCKCREEK
 
+#define SVM_STRONG		(1 << 0)
+#define SVM_LAZYRELEASE		(1 << 1)
+
 /** @brief Init routine of the SVM subsystem 
  *
  * @return
@@ -46,7 +49,7 @@ int svm_init(void);
  *
  * @return Pointer to the new memory range
  */
-void* svmmalloc(size_t size);
+void* svmmalloc(size_t sizei, uint32_t flags);
 
 /** @brief Frees memory, which is managed by the SVM subsystem
  *
diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index 7321a413..d2c4d4bd 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -355,9 +355,9 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
 		if (flags & MAP_MPE)
 			pgt->entries[index] |= PG_MPE;
 #endif
-		if (flags & MAP_SVM)
+		if (flags & MAP_SVM_STRONG)
 #ifndef SVM_WB
-			pgt->entries[index] |= PG_SVM|PG_PWT;
+			pgt->entries[index] |= PG_SVM_STRONG|PG_PWT;
 #else
 			pgt->entries[index] |= PG_SVM;
 #endif
@@ -404,9 +404,9 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags)
 				phyaddr = pgt->entries[index2] & 0xFFFFF000;
 				newflags = pgt->entries[index2] & 0xFFF;  // get old flags
 
-				if ((newflags & PG_SVM) && !(newflags & PG_PRESENT) && (flags & (VMA_READ|VMA_WRITE) && !(flags & VMA_NOACCESS)))
+				if ((newflags & PG_SVM_STRONG) && !(newflags & PG_PRESENT) && (flags & (VMA_READ|VMA_WRITE) && !(flags & VMA_NOACCESS)))
 					newflags |= PG_PRESENT;
-				else if ((newflags & PG_SVM) && (newflags & PG_PRESENT) && (flags & VMA_NOACCESS))
+				else if ((newflags & PG_SVM_STRONG) && (newflags & PG_PRESENT) && (flags & VMA_NOACCESS))
 					newflags &= ~PG_PRESENT;
 
 				// update flags
@@ -636,7 +636,7 @@ static void pagefault_handler(struct state *s)
         pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & 0xFFFFF000);
 	if (!pgt || !(pgt->entries[index2]))
 		goto default_handler;
-	if (pgt->entries[index2] & PG_SVM)
+	if (pgt->entries[index2] & PG_SVM_STRONG)
 		if (!svm_access_request(viraddr))
 			return;
 
diff --git a/arch/x86/mm/svm.c b/arch/x86/mm/svm.c
index 10a55d49..fbf26d2a 100644
--- a/arch/x86/mm/svm.c
+++ b/arch/x86/mm/svm.c
@@ -137,11 +137,11 @@ int svm_access_request(size_t addr)
 	return change_page_permissions(addr, addr+PAGE_SIZE, VMA_READ|VMA_WRITE|VMA_CACHEABLE);
 }
 
-void* svmmalloc(size_t size)
+void* svmmalloc(size_t size, uint32_t consitency)
 {
 	size_t phyaddr, viraddr, i;
 	uint32_t flags;
-	uint32_t map_flags = MAP_KERNEL_SPACE|MAP_MPE|MAP_SVM;
+	uint32_t map_flags = MAP_KERNEL_SPACE|MAP_MPE|MAP_SVM_STRONG;
 
 	// currently, we allocate memory in page size granulation
 	size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
diff --git a/include/metalsvm/stdlib.h b/include/metalsvm/stdlib.h
index 3849745e..1c8eec47 100644
--- a/include/metalsvm/stdlib.h
+++ b/include/metalsvm/stdlib.h
@@ -48,7 +48,7 @@ extern "C" {
 #ifdef CONFIG_ROCKCREEK
 #define MAP_MPE			(1 << 8)
 #endif
-#define MAP_SVM			(1 << 9)
+#define MAP_SVM_STRONG		(1 << 9)
 #define MAP_NO_ACCESS		(1 << 10)
 
 void NORETURN abort(void);
diff --git a/kernel/tests.c b/kernel/tests.c
index 7ad3c4f1..6cff1758 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -115,7 +115,8 @@ int mail_ping(void* arg) {
 	return 0;
 }
 
-#define N	1024
+//#define N	1024
+#define N 	513
 
 volatile static int* A[N];
 volatile static int* B[N];
@@ -172,7 +173,7 @@ static int svm_test(void *arg)
 #endif
 
 	// allocate and initialize SVM region
-	A[0] = (int*) svmmalloc(3*N*N*sizeof(int));
+	A[0] = (int*) svmmalloc(3*N*N*sizeof(int), SVM_STRONG);
 	if (!my_ue)
 		memset((void*) A[0], 0x00, 3*N*N*sizeof(int));
 

From 637399c1e1f1e757225295bc81d1ab8935b32ecd Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 23 Aug 2011 07:58:35 -0700
Subject: [PATCH 34/37] Attention! N has to be divisible by the number of
 cores!

---
 kernel/tests.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/tests.c b/kernel/tests.c
index 6cff1758..e92253b5 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -116,7 +116,7 @@ int mail_ping(void* arg) {
 }
 
 //#define N	1024
-#define N 	513
+#define N 	514
 
 volatile static int* A[N];
 volatile static int* B[N];
@@ -219,8 +219,9 @@ static int svm_test(void *arg)
 	if (!my_ue) {
 		uint32_t err = 0;
 
-		for(i=0; (i<N) && (err < 10); i++) {
-			for(j=0; (j<N) && (err < 10); j++) {
+		svm_invalidate();
+		for(i=0; (i<N) && (err < 32); i++) {
+			for(j=0; (j<N) && (err < 32); j++) {
 				if (C[i][j] != i+j) {
 					err++;
 					kprintf("Wrong value at C[%u][%u] = %u, B[%u][%u] = %u\n", i, j, C[i][j], i, j, B[i][j]);

From 499f43bfaf0bd7ca9387091712c0b60273bfa5d2 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Tue, 23 Aug 2011 14:03:34 -0700
Subject: [PATCH 35/37] add first version of the lazy release consistency

---
 arch/x86/mm/page.c        |  3 +++
 arch/x86/mm/svm.c         | 12 +++++++++---
 include/metalsvm/stdlib.h |  3 ++-
 kernel/tests.c            | 13 ++++++++++---
 4 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index d2c4d4bd..e8985010 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -361,6 +361,9 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
 #else
 			pgt->entries[index] |= PG_SVM;
 #endif
+		if (flags & MAP_SVM_LAZYRELEASE)
+			pgt->entries[index] |= PG_SVM_LAZYRELEASE|PG_PWT;
+
 		if (flags & MAP_NO_ACCESS)
 			pgt->entries[index] &= ~PG_PRESENT;
 
diff --git a/arch/x86/mm/svm.c b/arch/x86/mm/svm.c
index fbf26d2a..1dd2075f 100644
--- a/arch/x86/mm/svm.c
+++ b/arch/x86/mm/svm.c
@@ -137,11 +137,17 @@ int svm_access_request(size_t addr)
 	return change_page_permissions(addr, addr+PAGE_SIZE, VMA_READ|VMA_WRITE|VMA_CACHEABLE);
 }
 
-void* svmmalloc(size_t size, uint32_t consitency)
+void* svmmalloc(size_t size, uint32_t consistency)
 {
 	size_t phyaddr, viraddr, i;
 	uint32_t flags;
-	uint32_t map_flags = MAP_KERNEL_SPACE|MAP_MPE|MAP_SVM_STRONG;
+	uint32_t map_flags = MAP_KERNEL_SPACE|MAP_MPE;
+
+	if (consistency & SVM_STRONG)
+		map_flags |= MAP_SVM_STRONG;
+	else if (consistency & SVM_LAZYRELEASE)
+		map_flags |= MAP_SVM_LAZYRELEASE;
+	else return 0;
 
 	// currently, we allocate memory in page size granulation
 	size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
@@ -150,7 +156,7 @@ void* svmmalloc(size_t size, uint32_t consitency)
 	flags = irq_nested_disable();
 	phyaddr = (size_t) RCCE_shmalloc(size);
 
-	if (RCCE_ue())
+	if (RCCE_ue() && (consistency & SVM_STRONG))
 		map_flags |= MAP_NO_ACCESS;
 	irq_nested_enable(flags);
 
diff --git a/include/metalsvm/stdlib.h b/include/metalsvm/stdlib.h
index 1c8eec47..15d0961e 100644
--- a/include/metalsvm/stdlib.h
+++ b/include/metalsvm/stdlib.h
@@ -49,7 +49,8 @@ extern "C" {
 #define MAP_MPE			(1 << 8)
 #endif
 #define MAP_SVM_STRONG		(1 << 9)
-#define MAP_NO_ACCESS		(1 << 10)
+#define MAP_SVM_LAZYRELEASE	(1 << 10)
+#define MAP_NO_ACCESS		(1 << 11)
 
 void NORETURN abort(void);
 
diff --git a/kernel/tests.c b/kernel/tests.c
index e92253b5..2c52b868 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -115,8 +115,9 @@ int mail_ping(void* arg) {
 	return 0;
 }
 
-//#define N	1024
-#define N 	514
+#define N	1024
+//#define N 	514
+#define LAZY
 
 volatile static int* A[N];
 volatile static int* B[N];
@@ -132,7 +133,7 @@ static int svm_test(void *arg)
 	my_ue = RCCE_ue();
 	num_ues = RCCE_num_ues();
 
-#if 0
+#if 1
 	if (!my_ue) {
 		// allocate and initialize SVM region
 		A[0] = (int*) kmalloc(3*N*N*sizeof(int));
@@ -173,7 +174,11 @@ static int svm_test(void *arg)
 #endif
 
 	// allocate and initialize SVM region
+#ifndef LAZY
 	A[0] = (int*) svmmalloc(3*N*N*sizeof(int), SVM_STRONG);
+#else
+	A[0] = (int*) svmmalloc(3*N*N*sizeof(int), SVM_LAZYRELEASE);
+#endif
 	if (!my_ue)
 		memset((void*) A[0], 0x00, 3*N*N*sizeof(int));
 
@@ -199,9 +204,11 @@ static int svm_test(void *arg)
 	start = rdtsc();
 	start = rdtsc();
 
+#ifndef LAZY
 	// Now, we need only read access on A and B
 	change_page_permissions((size_t) A[0], (size_t) (A[0]+2*N*N), VMA_CACHEABLE|VMA_READ);
 	RCCE_barrier(&RCCE_COMM_WORLD);
+#endif
 
 	// start calculation
 	for(i=my_ue*(N/num_ues); i<(my_ue+1)*(N/num_ues); i++)

From 1e1e77351a9c5e2b6a9ae6657d8964fe53f35cd5 Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Wed, 24 Aug 2011 09:32:36 +0200
Subject: [PATCH 36/37] first approach to determine the cpu load

---
 kernel/tasks.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/kernel/tasks.c b/kernel/tasks.c
index 9939651a..57fccdba 100644
--- a/kernel/tasks.c
+++ b/kernel/tasks.c
@@ -50,9 +50,14 @@ static task_t task_table[MAX_TASKS] = { \
 		[0]                 = {0, TASK_IDLE,    0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}, \
 		[1 ... MAX_TASKS-1] = {0, TASK_INVALID, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}};
 static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT;
+#if MAX_CORES > 1
 static runqueue_t runqueues[MAX_CORES] = { \
 		[0]                 = {task_table+0, NULL, 0, 0, 0, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}, \
 		[1 ... MAX_CORES-1] = {NULL,         NULL, 0, 0, 0, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}};
+#else
+static runqueue_t runqueues[1] = { \
+		[0]                 = {task_table+0, NULL, 0, 0, 0, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}};
+#endif
 
 DEFINE_PER_CORE(task_t*, current_task, task_table+0);
 
@@ -859,6 +864,9 @@ int wakeup_task(tid_t id)
 		ret = 0;
 
 		spinlock_lock(&runqueues[core_id].lock);
+		// increase the number of ready tasks
+		runqueues[core_id].nr_tasks++;
+
 		// add task to the runqueue
 		if (!runqueues[core_id].queue[prio-1].last) {
 			runqueues[core_id].queue[prio-1].last = runqueues[core_id].queue[prio-1].first = task;
@@ -906,6 +914,8 @@ int block_current_task(void)
 		ret = 0;
 
 		spinlock_lock(&runqueues[core_id].lock);
+		// reduce the number of ready tasks
+		runqueues[core_id].nr_tasks--;
 
 		// remove task from queue
 		if (task_table[id].prev)
@@ -953,6 +963,9 @@ int set_timer(uint64_t deadline)
 
 		spinlock_lock(&runqueues[core_id].lock);
 
+		// reduce the number of ready tasks
+		runqueues[core_id].nr_tasks--;
+
 		// remove task from queue
 		if (curr_task->prev)
 			curr_task->prev->next = curr_task->next;
@@ -1035,6 +1048,7 @@ extern atomic_int32_t cpu_online;
 
 void load_balancing(void)
 {
+#if 0
 	uint32_t i, core_id = CORE_ID;
 	uint32_t prio;
 	task_t* task;
@@ -1130,6 +1144,7 @@ no_task_found:
 		spinlock_unlock(&runqueues[i].lock);
 	}
 	spinlock_unlock(&runqueues[core_id].lock);
+#endif
 }
 #endif
 
@@ -1172,6 +1187,9 @@ void scheduler(void)
 			task->status = TASK_READY;
 			prio = task->prio;
 
+			// increase the number of ready tasks
+			runqueues[core_id].nr_tasks++;
+
 			// add task to the runqueue
 			if (!runqueues[core_id].queue[prio-1].first) {
 				runqueues[core_id].queue[prio-1].last = runqueues[core_id].queue[prio-1].first = task;
@@ -1189,10 +1207,10 @@ void scheduler(void)
 	runqueues[core_id].old_task = NULL; // reset old task
 	prio = last_set(runqueues[core_id].prio_bitmap); // determines highest priority
 #if MAX_CORES > 1
-	/*if (!prio) {
+	if (!prio) {
 		load_balancing();
 		prio = last_set(runqueues[core_id].prio_bitmap); // retry...
-	}*/
+	}
 #endif
 
 	if (BUILTIN_EXPECT(prio > MAX_PRIO, 0)) {

From 7b3da9e99866113298c921c219868e5d9b81790c Mon Sep 17 00:00:00 2001
From: Stefan Lankes <lankes@lfbs.rwth-aachen.de>
Date: Wed, 24 Aug 2011 00:55:43 -0700
Subject: [PATCH 37/37] remove obsolete line

---
 arch/x86/mm/page.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index 33b5592b..cb22c6f3 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -651,7 +651,6 @@ default_handler:
 	kprintf("Register state: eax = 0x%x, ebx = 0x%x, ecx = 0x%x, edx = 0x%x, edi = 0x%x, esi = 0x%x, ebp = 0x%x, esp = 0x%x\n", 
 		s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp);
 
-	while(1);
 	irq_enable();
 	abort();
 }