diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 379a181b..da4c65f4 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -60,7 +60,7 @@ static uint32_t ncores = 1;
 static uint8_t irq_redirect[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF};
 #if MAX_CORES > 1
 static uint8_t boot_code[] = { 0xFA, 0x0F, 0x01, 0x16, 0x3B, 0x70, 0x0F, 0x20, 0xC0, 0x0C, 0x01, 0x0F, 0x22, 0xC0, 0x66, 0xEA, 0x16, 0x70, 0x00, 0x00, 0x08, 0x00, 0x31, 0xC0, 0x66, 0xB8, 0x10, 0x00, 0x8E, 0xD8, 0x8E, 0xC0, 0x8E, 0xE0, 0x8E, 0xE8, 0x8E, 0xD0, 0xBC, 0xEF, 0xBE, 0xAD, 0xDE, 0x68, 0xAD, 0xDE, 0xAD, 0xDE, 0x6A, 0x00, 0xEA, 0xDE, 0xC0, 0xAD, 0xDE, 0x08, 0x00, 0xEB, 0xFE, 0x17, 0x00, 0x41, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x9A, 0xCF, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x92, 0xCF, 0x00};
-static atomic_int32_t cpu_online = ATOMIC_INIT(1);
+atomic_int32_t cpu_online = ATOMIC_INIT(1);
 #endif
 static uint8_t initialized = 0;
 spinlock_t bootlock = SPINLOCK_INIT;
diff --git a/arch/x86/kernel/timer.c b/arch/x86/kernel/timer.c
index ffc7bf00..af70abfd 100644
--- a/arch/x86/kernel/timer.c
+++ b/arch/x86/kernel/timer.c
@@ -36,6 +36,10 @@
  */
 static volatile uint64_t timer_ticks = 0;
 
+#if MAX_CORES > 1
+extern atomic_int32_t cpu_online;
+#endif
+
 uint64_t get_clock_tick(void)
 {
 	return timer_ticks;
@@ -61,8 +65,6 @@ int sys_times(struct tms* buffer, clock_t* clock)
  */
 static void timer_handler(struct state *s)
 {
-	uint32_t i;
-
 	/* Increment our 'tick counter' */
 #if MAX_CORES > 1
 	if (smp_id() == 0)
@@ -78,6 +80,13 @@ static void timer_handler(struct state *s)
 			vga_puts("One second has passed\n");
 		}*/
 	}
+
+	update_load();
+
+#if MAX_CORES > 1
+	if ((atomic_int32_read(&cpu_online) > 1) && (timer_ticks % (TIMER_FREQ/5) == 0))
+		load_balancing();
+#endif
 }
 
 int timer_wait(unsigned int ticks)
diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index 6eadd3d1..a99a576a 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -615,6 +615,7 @@ default_handler:
 	kprintf("Register state: eax = 0x%x, ebx = 0x%x, ecx = 0x%x, edx = 0x%x, edi = 0x%x, esi = 0x%x, ebp = 0x%x, esp = 0x%x\n", 
 		s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp);
 
+	while(1);
 	irq_enable();
 	abort();
 }
diff --git a/include/metalsvm/tasks.h b/include/metalsvm/tasks.h
index 3be44077..be3fdd07 100644
--- a/include/metalsvm/tasks.h
+++ b/include/metalsvm/tasks.h
@@ -85,6 +85,22 @@ int create_user_task(tid_t* id, const char* fame, char** argv);
  */
 tid_t wait(int32_t* result);
 
+/** @brief Update the load of the current core
+ *
+ * This function is called from the timer interrupt
+ * and updates the load of the current core
+ */
+void update_load(void);
+
+#if MAX_CORES > 1
+/** @brief Load balancer
+ *
+ * This load balancer is called from the timer interrupt
+ * and steals tasks from other cores
+ */
+void load_balancing(void);
+#endif
+
 /** @brief Task switcher
  *
  * Timer-interrupted use of this function for task switching */
diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h
index 3981f15c..fc1072de 100644
--- a/include/metalsvm/tasks_types.h
+++ b/include/metalsvm/tasks_types.h
@@ -116,6 +116,14 @@ typedef struct {
 	task_t* 	idle __attribute__ ((aligned (CACHE_LINE)));
 	/// previous task
 	task_t*		old_task;
+	/// total number of tasks in the queue
+	uint32_t	nr_tasks;
+	// current load = average number of tasks in the queue (1-minute average)
+	uint32_t	load;
+	// help counter to determine the the cpu load
+	int32_t 	load_counter;
+	// help counter to avoid "over balancing"
+	int32_t		balance_counter;
 	/// indicates the used priority queues
 	uint32_t	prio_bitmap;
 	/// a queue for each priority
diff --git a/kernel/tasks.c b/kernel/tasks.c
index c6874153..9939651a 100644
--- a/kernel/tasks.c
+++ b/kernel/tasks.c
@@ -51,8 +51,8 @@ static task_t task_table[MAX_TASKS] = { \
 		[1 ... MAX_TASKS-1] = {0, TASK_INVALID, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0, 0}};
 static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT;
 static runqueue_t runqueues[MAX_CORES] = { \
-		[0]                 = {task_table+0, NULL, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}, \
-		[1 ... MAX_CORES-1] = {NULL,         NULL, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}};
+		[0]                 = {task_table+0, NULL, 0, 0, 0, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}, \
+		[1 ... MAX_CORES-1] = {NULL,         NULL, 0, 0, 0, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_INIT}};
 
 DEFINE_PER_CORE(task_t*, current_task, task_table+0);
 
@@ -63,25 +63,6 @@ task_t* get_current_task(void) {
 	return per_core(current_task);
 }
 
-int dump_scheduling_statistics(void)
-{
-#if 0
-	uint32_t i;
-	uint32_t id = 0;
-
-	kprintf("Scheduling statistics:\n");
-	kprintf("======================\n");
-	kprintf("total ticks:\t%llu\n", get_clock_tick());
-	for(i=0; i<MAX_CORES; i++) {
-		if (task_table[i].status == TASK_IDLE) {
-			kprintf("core %d    :\t%u idle slices\n", id, task_table[i].time_slices);
-			id++;
-		}
-	}
-#endif
-	return 0;
-}
-
 int multitasking_init(void) {
 	if (BUILTIN_EXPECT(task_table[0].status != TASK_IDLE, 0)) {
 		kputs("Task 0 is not an idle task\n");
@@ -175,6 +156,7 @@ static void wakeup_blocked_tasks(int result)
 static void NORETURN do_exit(int arg) {
 	vma_t* tmp;
 	task_t* curr_task = per_core(current_task);
+	uint32_t flags, core_id;
 
 	kprintf("Terminate task: %u, return value %d\n", curr_task->id, arg);
 
@@ -198,6 +180,15 @@ static void NORETURN do_exit(int arg) {
 		kprintf("Memory leak! Task %d did not release %d pages\n", 
 				curr_task->id, atomic_int32_read(&curr_task->user_usage));
 	curr_task->status = TASK_FINISHED;
+
+	// decrease the number of active tasks
+	flags = irq_nested_disable();
+	core_id = CORE_ID;
+	spinlock_lock(&runqueues[core_id].lock);
+	runqueues[core_id].nr_tasks--;
+	spinlock_unlock(&runqueues[core_id].lock);
+	irq_nested_enable(flags);
+
 	reschedule();
 	
 	kprintf("Kernel panic: scheduler on core %d found no valid task\n", CORE_ID);
@@ -237,7 +228,7 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t
 {
 	task_t* curr_task;
 	int ret = -ENOMEM;
-	unsigned int i, core_id = CORE_ID;
+	unsigned int i, core_id;
 
 	if (BUILTIN_EXPECT(!ep, 0))
 		return -EINVAL;
@@ -248,6 +239,7 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t
 
 	spinlock_irqsave_lock(&table_lock);
 
+	core_id = CORE_ID;
 	curr_task = per_core(current_task);
 
 	for(i=0; i<MAX_TASKS; i++) {
@@ -284,6 +276,7 @@ static int create_task(tid_t* id, internal_entry_point_t ep, void* arg, uint8_t
 			// add task in the runqueue
 			spinlock_lock(&runqueues[core_id].lock);
 			runqueues[core_id].prio_bitmap |= (1 << prio);
+			runqueues[core_id].nr_tasks++;
 			if (!runqueues[core_id].queue[prio-1].first) {
 				task_table[i].prev = NULL;
 				runqueues[core_id].queue[prio-1].first = task_table+i;
@@ -309,7 +302,7 @@ create_task_out:
 int sys_fork(void)
 {
 	int ret = -ENOMEM;
-	unsigned int i, core_id = CORE_ID;
+	unsigned int i, core_id;
 	task_t* parent_task = per_core(current_task);
 	vma_t** child;
 	vma_t* parent;
@@ -318,6 +311,8 @@ int sys_fork(void)
 	spinlock_lock(&parent_task->vma_lock);
 	spinlock_irqsave_lock(&table_lock);
 
+	core_id = CORE_ID;
+
 	for(i=0; i<MAX_TASKS; i++) {
 		if (task_table[i].status == TASK_INVALID) {
 			atomic_int32_set(&task_table[i].user_usage, 0);
@@ -367,6 +362,7 @@ int sys_fork(void)
 			// add task in the runqueue
 			spinlock_lock(&runqueues[core_id].lock);
 			runqueues[core_id].prio_bitmap |= (1 << parent_task->prio);
+			runqueues[core_id].nr_tasks++;
 			if (!runqueues[core_id].queue[parent_task->prio-1].first) {
 				task_table[i].prev = NULL;
 				runqueues[core_id].queue[parent_task->prio-1].first = task_table+i;
@@ -1010,6 +1006,133 @@ int set_timer(uint64_t deadline)
 	return ret;
 }
 
+#define FSHIFT	21		/* nr of bits of precision (e.g. 11) */
+#define FIXED_1	(1<<FSHIFT)	/* 1.0 as fixed-point */
+#define EXP	1884		/* 1/exp(5sec/1min) as fixed-point */
+
+void update_load(void)
+{
+	uint32_t core_id = CORE_ID;
+
+	runqueues[core_id].load_counter--;
+	if (runqueues[core_id].balance_counter > 0)
+		runqueues[core_id].balance_counter--;
+	if (runqueues[core_id].load_counter < 0) {
+		runqueues[core_id].load_counter += 5*TIMER_FREQ;
+
+		spinlock_lock(&runqueues[core_id].lock);
+		runqueues[core_id].load *= EXP;
+		runqueues[core_id].load += runqueues[core_id].nr_tasks*(FIXED_1-EXP);
+		runqueues[core_id].load >>= FSHIFT;
+		spinlock_unlock(&runqueues[core_id].lock);
+
+		//kprintf("load of core %u: %u, %u\n", core_id, runqueues[core_id].load, runqueues[core_id].nr_tasks);
+	}
+}
+
+#if MAX_CORES > 1
+extern atomic_int32_t cpu_online;
+
+void load_balancing(void)
+{
+	uint32_t i, core_id = CORE_ID;
+	uint32_t prio;
+	task_t* task;
+
+	spinlock_lock(&runqueues[core_id].lock);
+	for(i=0; (i<atomic_int32_read(&cpu_online)) && (runqueues[core_id].balance_counter <= 0); i++)
+	{
+		if (i == core_id)
+			break;
+
+		spinlock_lock(&runqueues[i].lock);
+		if (runqueues[i].load > runqueues[core_id].load) {
+			kprintf("Try to steal a task from core %u (load %u) to %u (load %u)\n", i, runqueues[i].load, core_id, runqueues[core_id].load);
+			kprintf("Task on core %u: %u, core %u, %u\n", i, runqueues[i].nr_tasks, core_id, runqueues[i].nr_tasks);
+
+			prio = last_set(runqueues[i].prio_bitmap);
+			if (prio) {
+				// steal a ready task
+				task = runqueues[i].queue[prio-1].last;
+				kprintf("Try to steal a ready task %d\n", task->id);
+
+				// remove last element from queue i
+				if (task->prev)
+					task->prev->next = NULL;
+				runqueues[i].queue[prio-1].last = task->prev;
+				if (!runqueues[i].queue[prio-1].last)
+					runqueues[i].queue[prio-1].first = NULL;
+
+				// add task at the end of queue core_id
+				if (!runqueues[core_id].queue[prio-1].last) {
+					runqueues[core_id].queue[prio-1].first = runqueues[core_id].queue[prio-1].last = task;
+					task->next = task->prev = NULL;
+				} else {
+					runqueues[core_id].queue[prio-1].last->next = task;
+					task->prev = runqueues[core_id].queue[prio-1].last;
+					runqueues[core_id].queue[prio-1].last = task;
+					task->next = NULL;
+				}
+
+				// update task counters
+				runqueues[core_id].nr_tasks++;
+				runqueues[i].nr_tasks--;
+				runqueues[core_id].balance_counter = 5*TIMER_FREQ;
+			} else {
+				task_t* tmp;
+
+				// steal a blocked task
+				task = runqueues[i].timers.first;
+				if (!task) // Ups, found no valid task to steal
+					goto no_task_found;
+
+				kprintf("Try to steal blocked task %d\n", task->id);
+
+				// remove first timer from queue i
+				if (runqueues[i].timers.first == runqueues[i].timers.last)
+					runqueues[i].timers.first = runqueues[i].timers.last = NULL;
+				else
+					runqueues[i].timers.first = runqueues[i].timers.first->next;
+
+				// add timer to queue core_id
+				tmp = runqueues[core_id].timers.first;
+				while(tmp && (task->timeout >= tmp->timeout))
+					tmp = tmp->next;
+
+				if (!tmp) {
+					task->next = NULL;
+					task->prev = runqueues[core_id].timers.last;
+					if (runqueues[core_id].timers.last)
+						runqueues[core_id].timers.last->next = task;
+					runqueues[core_id].timers.last = task;
+					if (!runqueues[core_id].timers.first)
+						runqueues[core_id].timers.first = task;
+				} else {
+					task->prev = tmp->prev;
+					task->next = tmp;
+					tmp->prev = task;
+					if (task->prev)
+						task->prev->next = task;
+					if (runqueues[core_id].timers.first == tmp)
+						runqueues[core_id].timers.first = task;
+				}
+
+				// => reschedule on the new core
+				task->last_core = CORE_ID;
+
+				// update task counters
+				runqueues[core_id].nr_tasks++;
+				runqueues[i].nr_tasks--;
+				runqueues[core_id].balance_counter = 5*TIMER_FREQ;
+			}
+		}
+no_task_found:
+		spinlock_unlock(&runqueues[i].lock);
+	}
+	spinlock_unlock(&runqueues[core_id].lock);
+}
+#endif
+
 void scheduler(void)
 {
 	task_t* orig_task;
@@ -1065,6 +1188,17 @@ void scheduler(void)
 
 	runqueues[core_id].old_task = NULL; // reset old task
 	prio = last_set(runqueues[core_id].prio_bitmap); // determines highest priority
+#if MAX_CORES > 1
+	/*if (!prio) {
+		load_balancing();
+		prio = last_set(runqueues[core_id].prio_bitmap); // retry...
+	}*/
+#endif
+
+	if (BUILTIN_EXPECT(prio > MAX_PRIO, 0)) {
+		kprintf("Invalid priority %u by bitmap 0x%x\n", prio, runqueues[core_id].prio_bitmap);
+		prio = 0;
+	}
 
 	if (!prio) {
 		if ((curr_task->status == TASK_RUNNING) || (curr_task->status == TASK_IDLE))
@@ -1095,8 +1229,8 @@ get_task_out:
 	spinlock_unlock(&runqueues[core_id].lock);
 
 	if (curr_task != orig_task) {
-		kprintf("schedule from %u to %u with prio %u on core %u\n",
-			orig_task->id, curr_task->id, (uint32_t)curr_task->prio, CORE_ID);
+		//kprintf("schedule from %u to %u with prio %u on core %u\n",
+		//	orig_task->id, curr_task->id, (uint32_t)curr_task->prio, CORE_ID);
 		switch_task(curr_task->id);
 	}
 }
diff --git a/kernel/tests.c b/kernel/tests.c
index 281329d5..8f8860a4 100644
--- a/kernel/tests.c
+++ b/kernel/tests.c
@@ -91,7 +91,7 @@ static int foo(void* arg)
 	if (!arg)
 		return 0;
 
-	for(i=0; i<5; i++) {
+	for(i=0; i<20; i++) {
 		kprintf("Message from core %d: %s\n", smp_id(), (char*) arg);
 		sleep(1);
 	}