diff --git a/.bintray_descriptor.json b/.bintray_descriptor.json
index 7afd423d9..5d244a4f7 100644
--- a/.bintray_descriptor.json
+++ b/.bintray_descriptor.json
@@ -5,7 +5,7 @@
         "subject": "rwth-os",
         "website_url": "http://www.hermitcore.org",
         "issue_tracker_url": "https://github.com/RWTH-OS/HermitCore/issues",
-        "vcs_url": "https://github.com/RWTH-OS/pthread-embedded.git",
+        "vcs_url": "https://github.com/RWTH-OS/HermitCore.git",
         "github_release_notes_file": "RELEASE",
         "licenses": ["Revised BSD"],
         "public_download_numbers": false,
@@ -13,7 +13,7 @@
     },
 
     "version": {
-        "name": "0.1",
+        "name": "0.2.1",
         "desc": "HermitCore's kernel as libOS",
         "gpgSign": false
     },
@@ -28,7 +28,7 @@
         "deb_architecture": "amd64",
 	  "override": 1}
     },
-    {"includePattern": "build/(libhermit[^/]*rpm$)", "uploadPattern": "$1", "override": 1}
+    {"includePattern": "build/(libhermit[^/]*rpm$)", "uploadPattern": "$1", "override": 1},
     {"includePattern": "build/(libhermit[^/]*tar.bz2$)", "uploadPattern": "$1", "override": 1}
     ],
     "publish": true
diff --git a/.travis.yml b/.travis.yml
index ffcf780fd..116a34b6e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -17,6 +17,11 @@ script:
   - cd build
   - cmake ..
   - make -j1 package
+  - cd $TRAVIS_BUILD_DIR
+  - ./tests.sh
+
+notifications:
+  slack: hermitcore:UtcfeEXkbpx3WyIDK2Wm2beS
 
 deploy:
   on: master
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 970a1c753..65cdc03cd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -198,10 +198,10 @@ set(CPACK_PACKAGE_NAME libhermit)
 set(CPACK_SYSTEM_NAME all)
 
 set(CPACK_PACKAGE_VERSION_MAJOR 0)
-set(CPACK_PACKAGE_VERSION_MINOR 1)
-set(CPACK_PACKAGE_VERSION_PATCH 0)
+set(CPACK_PACKAGE_VERSION_MINOR 2)
+set(CPACK_PACKAGE_VERSION_PATCH 1)
 
-set(CPACK_PACKAGE_CONTACT "Daniel Krebs <github@daniel-krebs.net>")
+set(CPACK_PACKAGE_CONTACT "Stefan Lankes <slankes@eonerc.rwth-aachen.de>")
 
 # build .deb, .rpm and .tar.bz2 packages
 set(CPACK_GENERATOR DEB;RPM;TBZ2)
diff --git a/README.md b/README.md
index 90f0b6532..12b358dce 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # HermitCore - A lightweight unikernel for a scalable and predictable runtime behavior
 
-[![Join the chat at https://gitter.im/RWTH-OS/HermitCore](https://badges.gitter.im/RWTH-OS/HermitCore.svg)](https://gitter.im/RWTH-OS/HermitCore?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 [![Build Status](https://travis-ci.org/RWTH-OS/HermitCore.svg?branch=devel)](https://travis-ci.org/RWTH-OS/HermitCore)
+[![Slack Status](https://radiant-ridge-95061.herokuapp.com/badge.svg)](https://radiant-ridge-95061.herokuapp.com)
 
 The project [HermitCore]( http://www.hermitcore.org ) is a new
 [unikernel](http://unikernel.org) targeting a scalable and predictable runtime
@@ -27,6 +27,10 @@ cloud computing applications. It is the result of a research project at RWTH
 Aachen University and is currently an experimental approach, i.e., not
 production ready. Please use it with caution.
 
+## Contributing
+
+HermitCore is being developed on [GitHub](https://github.com/RWTH-OS/HermitCore).
+Create your own fork, send us a pull request, and chat with us on [Slack](https://radiant-ridge-95061.herokuapp.com).
 
 ## Requirements
 
@@ -49,6 +53,20 @@ $ sudo apt-get -qq update
 $ sudo apt-get install binutils-hermit newlib-hermit pthread-embedded-hermit gcc-hermit libhermit
 ```
 
+For non-Debian based systems, a docker image with the complete toolchain is provided and can be installed as follows:
+
+```bash
+$ docker pull rwthos/hermitcore
+```
+
+The following commad starts within the new docker container a shell and mounts from the host system the directory `~/src` to `/src`:
+
+```bash
+$ docker run -i -t -v ~/src:/src rwthos/hermitcore:latest
+```
+
+Within the shell the croos toolchain can be used to build HermitCore applications.
+
 If you want to build the toolchain yourself, have a look at the repository [hermit-toolchain](https://github.com/RWTH-OS/hermit-toolchain), which contains scripts to build the whole toolchain.
 
 Depending on how you want to use HermitCore, you might need additional packages
@@ -56,8 +74,18 @@ such as:
 
  * QEMU (`apt-get install qemu-system-x86`)
 
+## Building HermitCore
 
-## CMake requirements
+### Preliminary work
+
+To build HermitCore from source (without compiler), the repository with its submodules has to be cloned.
+
+```bash
+$ git clone git@github.com:RWTH-OS/HermitCore.git
+$ cd HermitCore
+$ git submodule init
+$ git submodule update
+```
 
 We require a fairly recent version of CMake (`3.7`) which is not yet present in
 most Linux distributions. We therefore provide a helper script that fetches the
@@ -87,14 +115,16 @@ cmake-3.7.2-Linux-x86_64.tar.gz         100%[===================>]  29,26M  3,74
 So before you build HermitCore you have to source the `local-cmake.sh` script
 everytime you open a new terminal.
 
-	
-## Building HermitCore
+### Building the library perating systems and its examples
+
+To build HermitCore go to the directory with the source code, create a `build` directory and call `cmake` followed by `make`.
 
 ```bash
 $ mkdir build
 $ cd build
 $ cmake ..
 $ make
+$ sudo make install
 ```
 
 If your toolchain is not located in `/opt/hermit/bin` then you have to supply
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index c613c11b6..b679a06e0 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -48,6 +48,8 @@ extern "C" {
 #define APIC_SVR		0x00F0
 /// Error Status Register
 #define APIC_ESR		0x0280
+/// Corrected Machine-Check Error Interrupt Register
+#define APIC_CMCI		0x02F0
 /// Interrupt Command Register [bits 0-31]
 #define APIC_ICR1		0x0300
 /// Interrupt Command Register [bits 32-63]
diff --git a/arch/x86/include/asm/atomic32.h b/arch/x86/include/asm/atomic32.h
index 25bb80e48..aa9a9cf28 100644
--- a/arch/x86/include/asm/atomic32.h
+++ b/arch/x86/include/asm/atomic32.h
@@ -52,8 +52,8 @@ typedef struct { volatile int32_t counter; } atomic_int32_t;
  * This function will atomically exchange the value of an atomic variable and
  * return its old value. Is used in locking-operations.\n
  * \n
- * Intel manuals: If a memory operand is referenced, the processor's locking 
- * protocol is automatically implemented for the duration of the exchange 
+ * Intel manuals: If a memory operand is referenced, the processor's locking
+ * protocol is automatically implemented for the duration of the exchange
  * operation, regardless of the presence or absence of the LOCK prefix.
  *
  * @param d Pointer to the atomic_int_32_t with the value you want to exchange
@@ -79,7 +79,7 @@ inline static int32_t atomic_int32_test_and_set(atomic_int32_t* d, int32_t ret)
 inline static int32_t atomic_int32_add(atomic_int32_t *d, int32_t i)
 {
 	int32_t res = i;
-	asm volatile(LOCK "xaddl %0, %1" : "=r"(i) : "m"(d->counter), "0"(i) : "memory", "cc");
+	asm volatile(LOCK "xaddl %0, %1" : "+r"(i), "+m"(d->counter) : : "memory", "cc");
 	return res+i;
 }
 
@@ -95,7 +95,7 @@ inline static int32_t atomic_int32_add(atomic_int32_t *d, int32_t i)
  */
 inline static int32_t atomic_int32_sub(atomic_int32_t *d, int32_t i)
 {
-        return atomic_int32_add(d, -i);
+    return atomic_int32_add(d, -i);
 }
 
 /** @brief Atomic increment by one
@@ -105,7 +105,9 @@ inline static int32_t atomic_int32_sub(atomic_int32_t *d, int32_t i)
  * @param d The atomic_int32_t var you want to increment
  */
 inline static int32_t atomic_int32_inc(atomic_int32_t* d) {
-	return atomic_int32_add(d, 1);
+	int32_t res = 1;
+	asm volatile(LOCK "xaddl %0, %1" : "+r"(res), "+m"(d->counter) : : "memory", "cc");
+	return ++res;
 }
 
 /** @brief Atomic decrement by one
@@ -115,7 +117,9 @@ inline static int32_t atomic_int32_inc(atomic_int32_t* d) {
  * @param d The atomic_int32_t var you want to decrement
  */
 inline static int32_t atomic_int32_dec(atomic_int32_t* d) {
-	return atomic_int32_add(d, -1);
+	int32_t res = -1;
+	asm volatile(LOCK "xaddl %0, %1" : "+r"(res), "+m"(d->counter) : : "memory", "cc");
+	return --res;
 }
 
 /** @brief Read out an atomic_int32_t var
@@ -132,7 +136,7 @@ inline static int32_t atomic_int32_read(atomic_int32_t *d) {
 
 /** @brief Set the value of an atomic_int32_t var
  *
- * This function is for convenience: It sets the internal value of 
+ * This function is for convenience: It sets the internal value of
  * an atomic_int32_t var for you.
  *
  * @param d Pointer to the atomic_int32_t var you want to set
diff --git a/arch/x86/include/asm/atomic64.h b/arch/x86/include/asm/atomic64.h
index 02bf98437..b741f0026 100644
--- a/arch/x86/include/asm/atomic64.h
+++ b/arch/x86/include/asm/atomic64.h
@@ -52,8 +52,8 @@ typedef struct { volatile int64_t counter; } atomic_int64_t;
  * This function will atomically exchange the value of an atomic variable and
  * return its old value. Is used in locking-operations.\n
  * \n
- * Intel manuals: If a memory operand is referenced, the processor's locking 
- * protocol is automatically implemented for the duration of the exchange 
+ * Intel manuals: If a memory operand is referenced, the processor's locking
+ * protocol is automatically implemented for the duration of the exchange
  * operation, regardless of the presence or absence of the LOCK prefix.
  *
  * @param d Pointer to the atomic_int_64_t with the value you want to exchange
@@ -79,7 +79,7 @@ inline static int64_t atomic_int64_test_and_set(atomic_int64_t* d, int64_t ret)
 inline static int64_t atomic_int64_add(atomic_int64_t *d, int64_t i)
 {
 	int64_t res = i;
-	asm volatile(LOCK "xaddq %0, %1" : "=r"(i) : "m"(d->counter), "0"(i) : "memory", "cc");
+	asm volatile(LOCK "xaddq %0, %1" : "+r"(i), "+m"(d->counter) : : "memory", "cc");
 	return res+i;
 }
 
@@ -95,7 +95,7 @@ inline static int64_t atomic_int64_add(atomic_int64_t *d, int64_t i)
  */
 inline static int64_t atomic_int64_sub(atomic_int64_t *d, int64_t i)
 {
-        return atomic_int64_add(d, -i);
+	return atomic_int64_add(d, -i);
 }
 
 /** @brief Atomic increment by one
@@ -105,7 +105,9 @@ inline static int64_t atomic_int64_sub(atomic_int64_t *d, int64_t i)
  * @param d The atomic_int64_t var you want to increment
  */
 inline static int64_t atomic_int64_inc(atomic_int64_t* d) {
-	return atomic_int64_add(d, 1);
+	int64_t res = 1;
+	asm volatile(LOCK "xaddq %0, %1" : "+r"(res), "+m"(d->counter) : : "memory", "cc");
+	return ++res;
 }
 
 /** @brief Atomic decrement by one
@@ -115,7 +117,9 @@ inline static int64_t atomic_int64_inc(atomic_int64_t* d) {
  * @param d The atomic_int64_t var you want to decrement
  */
 inline static int64_t atomic_int64_dec(atomic_int64_t* d) {
-	return atomic_int64_add(d, -1);
+	int64_t res = -1;
+	asm volatile(LOCK "xaddq %0, %1" : "+r"(res), "+m"(d->counter) : : "memory", "cc");
+	return --res;
 }
 
 /** @brief Read out an atomic_int64_t var
@@ -132,7 +136,7 @@ inline static int64_t atomic_int64_read(atomic_int64_t *d) {
 
 /** @brief Set the value of an atomic_int64_t var
  *
- * This function is for convenience: It sets the internal value of 
+ * This function is for convenience: It sets the internal value of
  * an atomic_int64_t var for you.
  *
  * @param d Pointer to the atomic_int64_t var you want to set
diff --git a/arch/x86/include/asm/multiboot.h b/arch/x86/include/asm/multiboot.h
index 5b8826a9a..1745f8db5 100644
--- a/arch/x86/include/asm/multiboot.h
+++ b/arch/x86/include/asm/multiboot.h
@@ -143,6 +143,8 @@ typedef struct multiboot_mod_list multiboot_module_t;
 
 /// Pointer to multiboot structure
 /// This pointer is declared at set by entry.asm
-extern multiboot_info_t* mb_info;
+extern const multiboot_info_t* const mb_info;
+extern char* cmdline;
+extern size_t cmdsize;
 
 #endif
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index cac2e633b..1c41dc58e 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -102,14 +102,14 @@ static inline size_t sign_extend(ssize_t addr, int bits)
 #define PAGE_MAP_ENTRIES	       (1L << PAGE_MAP_BITS)
 
 /// Align to next page
-#define PAGE_FLOOR(addr)        (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
+#define PAGE_CEIL(addr)		(((addr) + PAGE_SIZE - 1) & PAGE_MASK)
 /// Align to page
-#define PAGE_CEIL(addr)         ( (addr)                  & PAGE_MASK)
+#define PAGE_FLOOR(addr)	( (addr)                  & PAGE_MASK)
 
 /// Align to next 2M boundary
-#define PAGE_2M_FLOOR(addr)	(((addr) + (1L << 21) - 1) & ((~0L) << 21))
+#define PAGE_2M_CEIL(addr)	(((addr) + (1L << 21) - 1) & ((~0L) << 21))
 /// Align to nex 2M boundary
-#define PAGE_2M_CEIL(addr)	( (addr)                   & ((~0L) << 21))
+#define PAGE_2M_FLOOR(addr)	( (addr)                   & ((~0L) << 21))
 
 /// Page is present
 #define PG_PRESENT		(1 << 0)
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 26d4b0860..211aa03bd 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -26,7 +26,7 @@
  */
 
 
-/** 
+/**
  * @author Stefan Lankes
  * @file arch/x86/include/asm/pci.h
  * @brief functions related to PCI initialization and information
@@ -48,6 +48,8 @@ typedef struct {
 	uint32_t irq;
 } pci_info_t;
 
+#define PCI_IGNORE_SUBID	(0)
+
 /** @brief Initialize the PCI environment
  */
 int pci_init(void);
@@ -55,15 +57,16 @@ int pci_init(void);
 /** @brief Determine the IObase address and the interrupt number of a specific device
  *
  * @param vendor_id The device's vendor ID
- * @param device_id the device's ID
+ * @param device_id The device's ID
+ * @param subystem_id The subsystem DI
  * @param info Pointer to the record pci_info_t where among other the IObase address will be stored
  * @param enable_bus_master If true, the bus mastering will be enabled.
  *
- * @return 
+ * @return
  * - 0 on success
  * - -EINVAL (-22) on failure
  */
-int pci_get_device_info(uint32_t vendor_id, uint32_t device_id, pci_info_t* info, int8_t enble_bus_master);
+int pci_get_device_info(uint32_t vendor_id, uint32_t device_id, uint32_t subsystem_id, pci_info_t* info, int8_t enble_bus_master);
 
 /** @brief Print information of existing pci adapters
  *
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 24ae11b78..15aa50a70 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -52,6 +52,7 @@ extern "C" {
 #define CPU_FEATURE_PSE			(1 << 3)
 #define CPU_FEATURE_MSR			(1 << 5)
 #define CPU_FEATURE_PAE			(1 << 6)
+#define CPU_FEATURE_MCE			(1 << 7)
 #define CPU_FEATURE_APIC		(1 << 9)
 #define CPU_FEATURE_SEP			(1 << 11)
 #define CPU_FEATURE_PGE			(1 << 13)
@@ -308,6 +309,10 @@ inline static uint32_t has_msr(void) {
 	return (cpu_info.feature1 & CPU_FEATURE_MSR);
 }
 
+inline static uint32_t has_mce(void) {
+	return (cpu_info.feature1 & CPU_FEATURE_MCE);
+}
+
 inline static uint32_t has_apic(void) {
 	return (cpu_info.feature1 & CPU_FEATURE_APIC);
 }
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index aa64a6eae..1192490b8 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -176,7 +176,7 @@ static inline void lapic_timer_set_counter(uint32_t counter)
 
 static inline void lapic_timer_disable(void)
 {
-	lapic_write(APIC_LVT_TSR, 0x10000);
+	lapic_write(APIC_LVT_T, 0x10000);
 }
 
 static inline void lapic_timer_oneshot(void)
@@ -364,7 +364,7 @@ int apic_enable_timer(void)
 }
 
 static apic_mp_t* search_mptable(size_t base, size_t limit) {
-	size_t ptr=PAGE_CEIL(base), vptr=0;
+	size_t ptr=PAGE_FLOOR(base), vptr=0;
 	size_t flags = PG_GLOBAL | PG_RW | PG_PCD;
 	apic_mp_t* tmp;
 	uint32_t i;
@@ -410,7 +410,7 @@ static apic_mp_t* search_mptable(size_t base, size_t limit) {
 
 #if 0
 static size_t search_ebda(void) {
-	size_t ptr=PAGE_CEIL(0x400), vptr=0xF0000;
+	size_t ptr=PAGE_FLOOR(0x400), vptr=0xF0000;
 	size_t flags = PG_GLOBAL | PG_RW | PG_PCD;
 
 	// protec apic by the NX flags
@@ -456,8 +456,8 @@ static int lapic_reset(void)
 		lapic_write(APIC_LVT_TSR, 0x10000);	// disable thermal sensor interrupt
 	if (max_lvt >= 5)
 		lapic_write(APIC_LVT_PMC, 0x10000);	// disable performance counter interrupt
-	lapic_write(APIC_LINT0, 0x7C);	// connect LINT0 to idt entry 124
-	lapic_write(APIC_LINT1, 0x7D);	// connect LINT1 to idt entry 125
+	lapic_write(APIC_LINT0, 0x00010000);	// disable LINT0
+	lapic_write(APIC_LINT1, 0x00010000);	// disable LINT1
 	lapic_write(APIC_LVT_ER, 0x7E);	// connect error to idt entry 126
 
 	return 0;
@@ -580,8 +580,8 @@ int smp_init(void)
 	 * Wakeup the other cores via IPI. They start at this address
 	 * in real mode, switch to protected and finally they jump to smp_main.
 	 */
-	page_map(SMP_SETUP_ADDR, SMP_SETUP_ADDR, PAGE_FLOOR(sizeof(boot_code)) >> PAGE_BITS, PG_RW|PG_GLOBAL);
-	vma_add(SMP_SETUP_ADDR, SMP_SETUP_ADDR + PAGE_FLOOR(sizeof(boot_code)), VMA_READ|VMA_WRITE|VMA_CACHEABLE);
+	page_map(SMP_SETUP_ADDR, SMP_SETUP_ADDR, PAGE_CEIL(sizeof(boot_code)) >> PAGE_BITS, PG_RW|PG_GLOBAL);
+	vma_add(SMP_SETUP_ADDR, SMP_SETUP_ADDR + PAGE_CEIL(sizeof(boot_code)), VMA_READ|VMA_WRITE|VMA_CACHEABLE);
 	memcpy((void*)SMP_SETUP_ADDR, boot_code, sizeof(boot_code));
 
 	for(i=0; i<sizeof(boot_code); i++)
@@ -667,6 +667,7 @@ int apic_calibration(void)
 	atomic_int32_inc(&cpu_online);
 
 	if (is_single_kernel()) {
+		LOG_INFO("Disable PIC\n");
 		// Now, HermitCore is able to use the APIC => Therefore, we disable the PIC
 		outportb(0xA1, 0xFF);
 		outportb(0x21, 0xFF);
@@ -683,6 +684,7 @@ int apic_calibration(void)
 		}
 
 		// now, we don't longer need the IOAPIC timer and turn it off
+		LOG_INFO("Disable IOAPIC timer\n");
 		ioapic_intoff(2, apic_processors[boot_processor]->id);
 	}
 
@@ -721,7 +723,7 @@ static int apic_probe(void)
 
 found_mp:
 	if (!apic_mp) {
-		LOG_ERROR("Didn't find MP config table\n");
+		LOG_INFO("Didn't find MP config table\n");
 		goto no_mp;
 	}
 
@@ -916,12 +918,6 @@ int smp_start(void)
 	// install IDT
 	idt_install();
 
-	/*
-	 * we turned on paging
-	 * => now, we are able to register our task
-	 */
-	register_task();
-
 	// enable additional cpu features
 	cpu_detection();
 
@@ -936,6 +932,12 @@ int smp_start(void)
 
 	set_idle_task();
 
+	/*
+	 * TSS is set, pagining is enabled
+	 * => now, we are able to register our task
+	 */
+	register_task();
+
 	irq_enable();
 
 	atomic_int32_inc(&cpu_online);
@@ -1039,6 +1041,7 @@ static void apic_err_handler(struct state *s)
 void shutdown_system(void)
 {
 	int if_bootprocessor = (boot_processor == apic_cpu_id());
+	uint32_t max_lvt;
 
 	irq_disable();
 
@@ -1061,8 +1064,11 @@ void shutdown_system(void)
 	if (if_bootprocessor)
 		LOG_INFO("Disable APIC\n");
 
-	lapic_write(APIC_LVT_TSR, 0x10000);	// disable thermal sensor interrupt
-	lapic_write(APIC_LVT_PMC, 0x10000);	// disable performance counter interrupt
+	max_lvt = apic_lvt_entries();
+	if (max_lvt >= 4)
+		lapic_write(APIC_LVT_TSR, 0x10000);	// disable thermal sensor interrupt
+	if (max_lvt >= 5)
+		lapic_write(APIC_LVT_PMC, 0x10000);	// disable performance counter interrupt
 	lapic_write(APIC_SVR, 0x00);	// disable the apic
 
 	// disable x2APIC
@@ -1082,17 +1088,16 @@ void shutdown_system(void)
 	}
 }
 
-static void apic_shutdown(struct state * s)
+static void apic_shutdown(struct state* s)
 {
 	go_down = 1;
 
 	LOG_DEBUG("Receive shutdown interrupt\n");
 }
 
-static void apic_lint0(struct state * s)
+static void apic_wakeup(struct state* s)
 {
-	// Currently nothing to do
-	LOG_INFO("Receive LINT0 interrupt\n");
+	LOG_DEBUG("Receive wakeup interrupt\n");
 }
 
 int apic_init(void)
@@ -1104,12 +1109,12 @@ int apic_init(void)
 		return ret;
 
 	// set APIC error handler
+	irq_install_handler(121, apic_wakeup);
 	irq_install_handler(126, apic_err_handler);
 #if MAX_CORES > 1
 	irq_install_handler(80+32, apic_tlb_handler);
 #endif
 	irq_install_handler(81+32, apic_shutdown);
-	irq_install_handler(124, apic_lint0);
 	if (apic_processors[boot_processor])
 		LOG_INFO("Boot processor %u (ID %u)\n", boot_processor, apic_processors[boot_processor]->id);
 	else
diff --git a/arch/x86/kernel/entry.asm b/arch/x86/kernel/entry.asm
index 0182d5294..6375533da 100644
--- a/arch/x86/kernel/entry.asm
+++ b/arch/x86/kernel/entry.asm
@@ -69,6 +69,9 @@ align 4
     global hbmem_size
     global uhyve
     global image_size
+    global uartport
+    global cmdline
+    global cmdsize
     base dq 0
     limit dq 0
     cpu_freq dd 0
@@ -93,6 +96,9 @@ align 4
     hbmem_base dq 0
     hbmem_size dq 0
     uhyve dd 0
+    uartport dq 0
+    cmdline dq 0
+    cmdsize dq 0
 
 ; Bootstrap page tables are used during the initialization.
 align 4096
@@ -114,9 +120,6 @@ boot_pgt:
 SECTION .ktext
 align 4
 start64:
-    ; store pointer to the multiboot information
-    mov [mb_info], QWORD rdx
-
     ; reset registers to kill any stale realmode selectors
     xor eax, eax
     mov ds, eax
@@ -134,6 +137,9 @@ start64:
     cmp eax, 0
     jne Lno_pml4_init
 
+    ; store pointer to the multiboot information
+    mov [mb_info], QWORD rdx
+
     ; relocate page tables
     mov rdi, boot_pml4
     mov rax, QWORD [rdi]
@@ -188,16 +194,17 @@ Lno_mbinfo:
     xor rcx, rcx
     mov rsi, 510*0x200000
     sub rsi, kernel_start
+    mov r11, QWORD [image_size]
 Lremap:
     mov QWORD [rdi], rax
     add rax, 0x200000
     add rcx, 0x200000
     add rdi, 8
-    ; note: the whole code segement muust fit in the first pgd
+    ; note: the whole code segement has to fit in the first pgd
     cmp rcx, rsi
-    jnb Lno_pml4_init
-    cmp rcx, QWORD [image_size]
-    jb Lremap
+    jnl Lno_pml4_init
+    cmp rcx, r11
+    jl Lremap
 
 Lno_pml4_init:
     ; Set CR3
@@ -272,9 +279,9 @@ gdt_flush:
     global isr%1
     align 64
     isr%1:
-        push byte 0 ; pseudo error code
-        push byte %1
-        jmp common_stub
+    push byte 0 ; pseudo error code
+    push byte %1
+    jmp common_stub
 %endmacro
 
 ; Similar to isrstub_pseudo_error, but without pushing
@@ -284,8 +291,8 @@ gdt_flush:
     global isr%1
     align 64
     isr%1:
-        push byte %1
-        jmp common_stub
+    push byte %1
+    jmp common_stub
 %endmacro
 
 ; Create isr entries, where the number after the
@@ -337,9 +344,9 @@ isrstub_pseudo_error 9
     global irq%1
     align 64
     irq%1:
-        push byte 0 ; pseudo error code
-        push byte 32+%1
-        jmp common_stub
+    push byte 0 ; pseudo error code
+    push byte 32+%1
+    jmp common_stub
 %endmacro
 
 ; Create entries for the interrupts 0 to 23
@@ -360,15 +367,15 @@ global wakeup
 align 64
 wakeup:
     push byte 0 ; pseudo error code
-	push byte 121
-	jmp common_stub
+    push byte 121
+    jmp common_stub
 
 global mmnif_irq
 align 64
 mmnif_irq:
     push byte 0 ; pseudo error code
-	push byte 122
-	jmp common_stub
+    push byte 122
+    jmp common_stub
 
 global apic_timer
 align 64
@@ -409,7 +416,6 @@ extern irq_handler
 extern get_current_stack
 extern finish_task_switch
 extern syscall_handler
-extern kernel_stack
 
 global getcontext
 align 64
diff --git a/arch/x86/kernel/gdt.c b/arch/x86/kernel/gdt.c
index b56f48381..2eca34564 100644
--- a/arch/x86/kernel/gdt.c
+++ b/arch/x86/kernel/gdt.c
@@ -41,7 +41,7 @@ gdt_ptr_t		gp;
 // currently, our kernel has full access to the ioports
 static gdt_entry_t	gdt[GDT_ENTRIES] = {[0 ... GDT_ENTRIES-1] = {0, 0, 0, 0, 0, 0}};
 static tss_t		task_state_segments[MAX_CORES] __attribute__ ((aligned (PAGE_SIZE)));
-static uint8_t		stack_table[MAX_CORES*KERNEL_STACK_SIZE*MAX_IST] __attribute__ ((aligned (PAGE_SIZE)));
+static uint8_t		stack_table[MAX_CORES][KERNEL_STACK_SIZE*MAX_IST] __attribute__ ((aligned (PAGE_SIZE)));
 
 extern const void boot_stack;
 
@@ -149,9 +149,9 @@ void gdt_install(void)
 	for(i=0; i<MAX_CORES; i++) {
 		task_state_segments[i].rsp0 = (size_t)&boot_stack + (i+1) * KERNEL_STACK_SIZE - 0x10;
 		task_state_segments[i].ist1 = 0; // ist will created per task
-		task_state_segments[i].ist2 = (size_t)stack_table + MAX_IST*i * KERNEL_STACK_SIZE + (2 /*IST number */ - 1) * KERNEL_STACK_SIZE - 0x10;
-		task_state_segments[i].ist3 = (size_t)stack_table + MAX_IST*i * KERNEL_STACK_SIZE + (3 /*IST number */ - 1) * KERNEL_STACK_SIZE - 0x10;
-		task_state_segments[i].ist4 = (size_t)stack_table + MAX_IST*i * KERNEL_STACK_SIZE + (4 /*IST number */ - 1) * KERNEL_STACK_SIZE - 0x10;
+		task_state_segments[i].ist2 = (size_t) stack_table[i] + (2 /*IST number */ - 1) * KERNEL_STACK_SIZE - 0x10;
+		task_state_segments[i].ist3 = (size_t) stack_table[i] + (3 /*IST number */ - 1) * KERNEL_STACK_SIZE - 0x10;
+		task_state_segments[i].ist4 = (size_t) stack_table[i] + (4 /*IST number */ - 1) * KERNEL_STACK_SIZE - 0x10;
 
 		gdt_set_gate(num+i*2, (unsigned long) (task_state_segments+i), sizeof(tss_t)-1,
 			GDT_FLAG_PRESENT | GDT_FLAG_TSS | GDT_FLAG_RING0, 0);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index d308d6b81..de109c989 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -290,7 +290,7 @@ size_t** irq_handler(struct state *s)
 
 	size_t** ret = NULL;
 
-	if(BUILTIN_EXPECT(s->int_no >= MAX_HANDLERS, 0)) {
+	if (BUILTIN_EXPECT(s->int_no >= MAX_HANDLERS, 0)) {
 		LOG_ERROR("Invalid IRQ number %d\n", s->int_no);
 		return NULL;
 	}
diff --git a/arch/x86/kernel/isrs.c b/arch/x86/kernel/isrs.c
index dd2583f27..f79cdbd0b 100644
--- a/arch/x86/kernel/isrs.c
+++ b/arch/x86/kernel/isrs.c
@@ -225,7 +225,7 @@ static void arch_fault_handler(struct state *s)
 	else
 		LOG_WARNING("Unknown exception %d", s->int_no);
 
-	LOG_ERROR(" Exception (%d) on core %d at %#x:%#lx, fs = %#lx, gs = %#lx, error code = 0x%#lx, task id = %u, rflags = %#x\n",
+	LOG_ERROR(" Exception (%d) on core %d at %#x:%#lx, fs = %#lx, gs = %#lx, error code = %#lx, task id = %u, rflags = %#x\n",
 		s->int_no, CORE_ID, s->cs, s->rip, s->fs, s->gs, s->error, per_core(current_task)->id, s->rflags);
 	LOG_ERROR("rax %#lx, rbx %#lx, rcx %#lx, rdx %#lx, rbp, %#lx, rsp %#lx rdi %#lx, rsi %#lx, r8 %#lx, r9 %#lx, r10 %#lx, r11 %#lx, r12 %#lx, r13 %#lx, r14 %#lx, r15 %#lx\n",
 		s->rax, s->rbx, s->rcx, s->rdx, s->rbp, s->rsp, s->rdi, s->rsi, s->r8, s->r9, s->r10, s->r11, s->r12, s->r13, s->r14, s->r15);
diff --git a/arch/x86/kernel/pci.c b/arch/x86/kernel/pci.c
index 114ecf80e..d35e919fc 100644
--- a/arch/x86/kernel/pci.c
+++ b/arch/x86/kernel/pci.c
@@ -45,6 +45,7 @@
 #define	PCI_CFRV	0x08	/* Configuration Revision */
 #define	PCI_CFLT	0x0c	/* Configuration Latency Timer */
 #define	PCI_CBIO	0x10	/* Configuration Base IO Address */
+#define PCI_CSID	0x2C	/* Configuration Subsystem Id & Subsystem Vendor Id */
 #define	PCI_CFIT	0x3c	/* Configuration Interrupt */
 #define	PCI_CFDA	0x40	/* Configuration Driver Area */
 
@@ -101,6 +102,11 @@ static uint32_t pci_conf_read(uint32_t bus, uint32_t slot, uint32_t off)
 	return data;
 }
 
+static inline uint32_t pci_subid(uint32_t bus, uint32_t slot)
+{
+	return pci_conf_read(bus, slot, PCI_CSID);
+}
+
 static inline uint32_t pci_what_irq(uint32_t bus, uint32_t slot)
 {
 	return pci_conf_read(bus, slot, PCI_CFIT) & 0xFF;
@@ -139,15 +145,15 @@ static inline uint32_t pci_what_size(uint32_t bus, uint32_t slot, uint32_t nr)
 int pci_init(void)
 {
 	uint32_t slot, bus;
-	
+
 	for (bus = 0; bus < MAX_BUS; bus++)
 		for (slot = 0; slot < MAX_SLOTS; slot++)
 			adapters[bus][slot] = pci_conf_read(bus, slot, PCI_CFID);
-	
+
 	return 0;
 }
 
-int pci_get_device_info(uint32_t vendor_id, uint32_t device_id, pci_info_t* info, int8_t bus_master)
+int pci_get_device_info(uint32_t vendor_id, uint32_t device_id, uint32_t subsystem_id, pci_info_t* info, int8_t bus_master)
 {
 	uint32_t slot, bus, i;
 
@@ -160,8 +166,9 @@ int pci_get_device_info(uint32_t vendor_id, uint32_t device_id, pci_info_t* info
 	for (bus = 0; bus < MAX_BUS; bus++) {
 		for (slot = 0; slot < MAX_SLOTS; slot++) {
 			if (adapters[bus][slot] != -1) {
-				if (((adapters[bus][slot] & 0xffff) == vendor_id) && 
-				   (((adapters[bus][slot] & 0xffff0000) >> 16) == device_id)) {
+				if (((adapters[bus][slot] & 0xffff) == vendor_id) &&
+				   (((adapters[bus][slot] & 0xffff0000) >> 16) == device_id) &&
+				   (((pci_subid(bus, slot) >> 16) & subsystem_id) == subsystem_id)) {
 					for(i=0; i<6; i++) {
 						info->base[i] = pci_what_iobase(bus, slot, i);
 						info->size[i] = (info->base[i]) ? pci_what_size(bus, slot, i) : 0;
@@ -195,7 +202,7 @@ int print_pci_adapters(void)
 		if (adapters[bus][slot] != -1) {
 				counter++;
 				LOG_INFO("%d) Vendor ID: 0x%x  Device Id: 0x%x\n",
-					counter, adapters[bus][slot] & 0xffff, 
+					counter, adapters[bus][slot] & 0xffff,
 					(adapters[bus][slot] & 0xffff0000) >> 16);
 
 #ifdef WITH_PCI_IDS
diff --git a/arch/x86/kernel/processor.c b/arch/x86/kernel/processor.c
index 7b1cf0073..ca225dd47 100644
--- a/arch/x86/kernel/processor.c
+++ b/arch/x86/kernel/processor.c
@@ -172,10 +172,10 @@ static void fpu_init_xsave(union fpu_state* fpu)
 
 static uint32_t get_frequency_from_mbinfo(void)
 {
-	if (mb_info && (mb_info->flags & MULTIBOOT_INFO_CMDLINE))
+	if (mb_info && (mb_info->flags & MULTIBOOT_INFO_CMDLINE) && (cmdline))
 	{
 		// search in the command line for cpu frequency
-		char* found = strstr((char*) mb_info->cmdline, "-freq");
+		char* found = strstr((char*) (size_t)cmdline, "-freq");
 		if (!found)
 			return 0;
 
@@ -472,6 +472,8 @@ int cpu_detection(void) {
 		cr4 |= CR4_PGE;
 	if (has_fsgsbase())
 		cr4 |= CR4_FSGSBASE;
+	if (has_mce())
+		cr4 |= CR4_MCE;		// enable machine check exceptions
 	//if (has_vmx())
 	//	cr4 |= CR4_VMXE;
 	cr4 &= ~CR4_TSD;		// => every privilege level is able to use rdtsc
@@ -552,7 +554,7 @@ int cpu_detection(void) {
 		a = b = c = d = 0;
                 cpuid(1, &a, &b, &cpu_info.feature2, &cpu_info.feature1);
 
-		LOG_INFO("CPU features: %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+		LOG_INFO("CPU features: %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
 			has_sse() ? "SSE " : "",
 			has_sse2() ? "SSE2 " : "",
 			has_sse3() ? "SSE3 " : "",
@@ -564,6 +566,7 @@ int cpu_detection(void) {
 			has_fma() ? "FMA " : "",
 			has_movbe() ? "MOVBE " : "",
 			has_x2apic() ? "X2APIC " : "",
+			has_mce() ? "MCE " : "",
 			has_fpu() ? "FPU " : "",
 			has_fxsr() ? "FXSR " : "",
 			has_xsave() ? "XSAVE " : "",
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 30528794d..b25240a0b 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -212,7 +212,7 @@ int hermit_kill(tid_t dest, int signum)
 	return 0;
 }
 
-void signal_init()
+void signal_init(void)
 {
 	// initialize per-core signal queue
 	for(int i = 0; i < MAX_CORES; i++) {
diff --git a/arch/x86/kernel/tasks.c b/arch/x86/kernel/tasks.c
index 13bebbee5..4a3a89045 100644
--- a/arch/x86/kernel/tasks.c
+++ b/arch/x86/kernel/tasks.c
@@ -39,7 +39,10 @@
 #include <asm/page.h>
 #include <asm/multiboot.h>
 
-#define TLS_OFFSET	0
+#define TLS_ALIGNBITS		5
+#define TLS_ALIGNSIZE		(1L << TLS_ALIGNBITS)
+#define TSL_ALIGNMASK		((~0L) << TLS_ALIGNBITS)
+#define TLS_FLOOR(addr)		((((size_t)addr) + TLS_ALIGNSIZE - 1) & TSL_ALIGNMASK)
 
 /*
  * Note that linker symbols are not variables, they have no memory allocated for
@@ -64,20 +67,20 @@ static int init_tls(void)
 		curr_task->tls_addr = (size_t) &tls_start;
 		curr_task->tls_size = (size_t) &tls_end - (size_t) &tls_start;
 
-		tls_addr = kmalloc(curr_task->tls_size + TLS_OFFSET + sizeof(size_t));
+		tls_addr = kmalloc(curr_task->tls_size + TLS_ALIGNSIZE + sizeof(size_t));
 		if (BUILTIN_EXPECT(!tls_addr, 0)) {
 			LOG_ERROR("load_task: heap is missing!\n");
 			return -ENOMEM;
 		}
 
-		memset(tls_addr, 0x00, TLS_OFFSET);
-		memcpy((void*) (tls_addr+TLS_OFFSET), (void*) curr_task->tls_addr, curr_task->tls_size);
-		fs = (size_t) tls_addr + curr_task->tls_size + TLS_OFFSET;
+		memset(tls_addr, 0x00, TLS_ALIGNSIZE);
+		memcpy((void*) TLS_FLOOR(tls_addr), (void*) curr_task->tls_addr, curr_task->tls_size);
+		fs = (size_t) TLS_FLOOR(tls_addr) + curr_task->tls_size;
 		*((size_t*)fs) = fs;
 
 		// set fs register to the TLS segment
 		set_tls(fs);
-		LOG_INFO("TLS of task %d on core %d starts at 0x%zx (size 0x%zx)\n", curr_task->id, CORE_ID, tls_addr + TLS_OFFSET, curr_task->tls_size);
+		LOG_INFO("TLS of task %d on core %d starts at 0x%zx (size 0x%zx)\n", curr_task->id, CORE_ID, TLS_FLOOR(tls_addr), curr_task->tls_size);
 	} else set_tls(0); // no TLS => clear fs register
 
 	return 0;
@@ -103,10 +106,10 @@ int is_proxy(void)
 		return 0;
 	if (!is_single_kernel())
 		return 1;
-	if (mb_info && (mb_info->flags & MULTIBOOT_INFO_CMDLINE))
+	if (mb_info && (mb_info->flags & MULTIBOOT_INFO_CMDLINE) && (cmdline))
 	{
 		// search in the command line for the "proxy" hint
-		char* found = strstr((char*) (size_t) mb_info->cmdline, "-proxy");
+		char* found = strstr((char*) (size_t) cmdline, "-proxy");
 		if (found)
 			return 1;
 	}
@@ -123,7 +126,6 @@ size_t* get_current_stack(void)
 	else
 		stptr = (stptr + DEFAULT_STACK_SIZE - sizeof(size_t)) & ~0x1F;
 
-	set_per_core(kernel_stack, stptr);
 	set_tss(stptr, (size_t) curr_task->ist_addr + KERNEL_STACK_SIZE - 0x10);
 
 	return curr_task->last_stack_pointer;
@@ -190,10 +192,15 @@ int create_default_frame(task_t* task, entry_point_t ep, void* arg, uint32_t cor
 	return 0;
 }
 
+#define USE_MWAIT
+
 void wait_for_task(void)
 {
+#ifndef USE_MWAIT
+	HALT;
+#else
 	if (!has_mwait()) {
-		PAUSE;
+		HALT;
 	} else {
 		void* queue = get_readyqueue();
 
@@ -203,4 +210,21 @@ void wait_for_task(void)
 		monitor(queue, 0, 0);
 		mwait(0x2 /* 0x2 = c3, 0xF = c0 */, 1 /* break on interrupt flag */);
 	}
+#endif
+}
+
+void wakeup_core(uint32_t core_id)
+{
+#ifdef USE_MWAIT
+	// if mwait is available, an IPI isn't required to wakeup the core
+	if (has_mwait())
+		return;
+#endif
+
+	// no self IPI required
+	if (core_id == CORE_ID)
+		return;
+
+	LOG_DEBUG("wakeup core %d\n", core_id);
+	apic_send_ipi(core_id, 121);
 }
diff --git a/arch/x86/kernel/timer.c b/arch/x86/kernel/timer.c
index f44b49f2e..819adcc26 100644
--- a/arch/x86/kernel/timer.c
+++ b/arch/x86/kernel/timer.c
@@ -70,10 +70,6 @@ void check_ticks(void)
 }
 #endif
 
-static void wakeup_handler(struct state *s)
-{
-}
-
 /*
  * Handles the timer. In this case, it's very simple: We
  * increment the 'timer_ticks' variable every time the
@@ -187,7 +183,6 @@ int timer_init(void)
 	 */
 	irq_install_handler(32, timer_handler);
 	irq_install_handler(123, timer_handler);
-	irq_install_handler(121, wakeup_handler);
 
 #ifdef DYNAMIC_TICKS
 	boot_tsc = has_rdtscp() ? rdtscp(NULL) : rdtsc();
diff --git a/arch/x86/kernel/uart.c b/arch/x86/kernel/uart.c
index e96f21920..38ce21427 100644
--- a/arch/x86/kernel/uart.c
+++ b/arch/x86/kernel/uart.c
@@ -99,28 +99,38 @@
 
 #define DEFAULT_UART_PORT 	0xc110
 
-static size_t	iobase = 0;
+extern size_t	uartport;
 
 static inline unsigned char read_from_uart(uint32_t off)
 {
 	uint8_t c = 0;
 
-	if (iobase)
-		c = inportb(iobase + off);
+	if (uartport)
+		c = inportb(uartport + off);
 
 	return c;
 }
 
+static inline int is_transmit_empty(void)
+{
+	if (uartport)
+		return inportb(uartport + UART_LSR) & 0x20;
+
+	return 1;
+}
+
 static inline void write_to_uart(uint32_t off, unsigned char c)
 {
-	if (iobase)
-		outportb(iobase + off, c);
+	while (is_transmit_empty() == 0) { PAUSE; }
+
+	if (uartport)
+		outportb(uartport + off, c);
 }
 
 /* Puts a single character on a serial device */
 int uart_putchar(unsigned char c)
 {
-	if (!iobase)
+	if (!uartport)
 		return 0;
 
 	write_to_uart(UART_TX, c);
@@ -133,7 +143,7 @@ int uart_puts(const char *text)
 {
 	size_t i, len = strlen(text);
 
-	if (!iobase)
+	if (!uartport)
 		return 0;
 
 	for (i = 0; i < len; i++)
@@ -144,19 +154,12 @@ int uart_puts(const char *text)
 
 static int uart_config(void)
 {
-	/*
-	 * enable FIFOs
-	 * clear RX and TX FIFO
-	 * set irq trigger to 8 bytes
-	 */
-	write_to_uart(UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT | UART_FCR_TRIGGER_1);
+	if (!uartport)
+		return 0;
 
 	/* disable interrupts */
 	write_to_uart(UART_IER, 0);
 
-	/* DTR + RTS */
-	write_to_uart(UART_MCR, UART_MCR_DTR|UART_MCR_RTS);
-
 	/*
 	 * 8bit word length
 	 * 1 stop bit
@@ -169,51 +172,59 @@ static int uart_config(void)
 	write_to_uart(UART_LCR, lcr);
 
 	/*
-	 * set baudrate to 9600
+	 * set baudrate to 38400
 	 */
-	uint32_t divisor = 1843200 / 9600; //115200;
-	write_to_uart(UART_DLL, divisor & 0xff);
-	write_to_uart(UART_DLM, (divisor >> 8) & 0xff);
+	write_to_uart(UART_DLL, 0x03);
+	write_to_uart(UART_DLM, 0x00);
 
 	/* set DLAB=0 */
 	write_to_uart(UART_LCR, lcr & (~UART_LCR_DLAB));
 
+	/*
+	 * enable FIFOs
+	 * clear RX and TX FIFO
+	 * set irq trigger to 8 bytes
+	 */
+	write_to_uart(UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT | UART_FCR_TRIGGER_1);
+
 	return 0;
 }
 
-extern const void kernel_start;
-
 int uart_init(void)
 {
 	if (is_uhyve())
 		return 0;
 
+	if (uartport)
+		return uart_config();
+
 	pci_info_t pci_info;
 	uint32_t bar = 0;
 
 	// Searching for Intel's UART device
-	if (pci_get_device_info(0x8086, 0x0936, &pci_info, 1) == 0)
+	if (pci_get_device_info(0x8086, 0x0936, PCI_IGNORE_SUBID, &pci_info, 1) == 0)
 		goto Lsuccess;
 	// Searching for Qemu's UART device
-	if (pci_get_device_info(0x1b36, 0x0002, &pci_info, 1) == 0)
+	if (pci_get_device_info(0x1b36, 0x0002, PCI_IGNORE_SUBID, &pci_info, 1) == 0)
 		goto Lsuccess;
 	// Searching for Qemu's 2x UART device (pci-serial-2x)
-	if (pci_get_device_info(0x1b36, 0x0003, &pci_info, 1) == 0)
+	if (pci_get_device_info(0x1b36, 0x0003, PCI_IGNORE_SUBID, &pci_info, 1) == 0)
 		goto Lsuccess;
 	// Searching for Qemu's 4x UART device (pci-serial-4x)
-	if (pci_get_device_info(0x1b36, 0x0004, &pci_info, 1) == 0)
+	if (pci_get_device_info(0x1b36, 0x0004, PCI_IGNORE_SUBID, &pci_info, 1) == 0)
 		goto Lsuccess;
 
 	// default value of our QEMU configuration
-	iobase = DEFAULT_UART_PORT;
+	uartport = DEFAULT_UART_PORT;
 
 	// configure uart
-	return uart_config();;
+	return uart_config();
 
 Lsuccess:
-	iobase = pci_info.base[bar];
+	uartport = pci_info.base[bar];
+
 	//irq_install_handler(32+pci_info.irq, uart_handler);
-	kprintf("UART uses io address 0x%x\n", iobase);
+	kprintf("UART uses io address 0x%x\n", uartport);
 
 	// configure uart
 	return uart_config();
diff --git a/arch/x86/loader/CMakeLists.txt b/arch/x86/loader/CMakeLists.txt
index 0d142c166..b9ea11d7e 100644
--- a/arch/x86/loader/CMakeLists.txt
+++ b/arch/x86/loader/CMakeLists.txt
@@ -24,7 +24,7 @@ target_link_libraries(arch_x86_loader
 	"-T ${CMAKE_CURRENT_LIST_DIR}/link.ld"
 	"-z max-page-size=4096"
 	-Wl,--build-id=none		# required because CMake links with gcc, not ld
-	-nostdlib)
+	-nostdlib -static)
 
 # tools/proxy looks for `ldhermit.elf`
 set_target_properties(arch_x86_loader PROPERTIES
diff --git a/arch/x86/loader/entry.asm b/arch/x86/loader/entry.asm
index 2d99a6f36..3ae3d7c1a 100644
--- a/arch/x86/loader/entry.asm
+++ b/arch/x86/loader/entry.asm
@@ -99,32 +99,24 @@ stublet:
     ; Interpret multiboot information
     mov DWORD [mb_info], ebx
 
-    ; Initialize CPU features
-    call cpu_init
-
-    pop ebx ; restore pointer to multiboot structure
-    lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table.
-    jmp GDT64.Code:start64 ; Set the code segment and enter 64-bit long mode.
-
 ; This will set up the x86 control registers:
 ; Caching and the floating point unit are enabled
 ; Bootstrap page tables are loaded and page size
 ; extensions (huge pages) enabled.
-global cpu_init
 cpu_init:
 
 ; initialize page tables
 
     ; map vga 1:1
-    push edi
-    mov eax, VIDEO_MEM_ADDR   ; map vga
-    and eax, 0xFFFFF000       ; page align lower half
-    mov edi, eax
-    shr edi, 9                ; (edi >> 12) * 8 (index for boot_pgt)
-    add edi, boot_pgt
-    or eax, 0x113             ; set present, global, writable and cache disable bits
-    mov DWORD [edi], eax
-    pop edi
+    ; push edi
+    ; mov eax, VIDEO_MEM_ADDR   ; map vga
+    ; and eax, 0xFFFFF000       ; page align lower half
+    ; mov edi, eax
+    ; shr edi, 9                ; (edi >> 12) * 8 (index for boot_pgt)
+    ; add edi, boot_pgt
+    ; or eax, 0x13             ; set present, writable and cache disable bits
+    ; mov DWORD [edi], eax
+    ; pop edi
 
     ; map multiboot info 1:1
     push edi
@@ -133,7 +125,7 @@ cpu_init:
     mov edi, eax
     shr edi, 9                ; (edi >> 12) * 8 (index for boot_pgt)
     add edi, boot_pgt
-    or eax, 0x101             ; set present and global bits
+    or eax, 0x3               ; set present and writable bits
     mov DWORD [edi], eax
     pop edi
 
@@ -151,7 +143,7 @@ L0: cmp ecx, ebx
     mov edi, eax
     shr edi, 9                ; (edi >> 12) * 8 (index for boot_pgt)
     add edi, boot_pgt
-    or eax, 0x103             ; set present, global and writable bits
+    or eax, 0x3               ; set present and writable bits
     mov DWORD [edi], eax
     add ecx, 0x1000
     jmp L0
@@ -188,23 +180,22 @@ L1:
     test edx, 1 << 29 ; Test if the LM-bit, which is bit 29, is set in the D-register.
     jz Linvalid ; They aren't, there is no long mode.
 
+    ; Set CR3
+    mov eax, boot_pml4
+    ;or eax, (1 << 0)        ; set present bit
+    mov cr3, eax
 
-    ; we need to enable PAE modus
+	; we need to enable PAE modus
     mov eax, cr4
     or eax, 1 << 5
     mov cr4, eax
 
-    ; switch to the compatibility mode (which is part of long mode)
+	; switch to the compatibility mode (which is part of long mode)
     mov ecx, 0xC0000080
     rdmsr
     or eax, 1 << 8
     wrmsr
 
-    ; Set CR3
-    mov eax, boot_pml4
-    or eax, (1 << 0)        ; set present bit
-    mov cr3, eax
-
     ; Set CR4
     mov eax, cr4
     and eax, 0xfffbf9ff     ; disable SSE
@@ -221,7 +212,9 @@ L1:
     or eax, (1 << 31)       ; enable paging
     mov cr0, eax
 
-    ret
+	;pop ebx ; restore pointer to multiboot structure
+    lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table.
+    jmp GDT64.Code:start64 ; Set the code segment and enter 64-bit long mode.
 
 ; there is no long mode
 Linvalid:
@@ -244,7 +237,7 @@ start64:
 
     ; jump to the boot processors's C code
     extern main
-    call main
+    jmp main
     jmp $
 
 SECTION .data
@@ -262,17 +255,17 @@ boot_stack:
 ; Bootstrap page tables are used during the initialization.
 ALIGN 4096
 boot_pml4:
-    DQ boot_pdpt + 0x107 ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_USER
-    times 510 DQ 0       ; PAGE_MAP_ENTRIES - 2
-    DQ boot_pml4 + 0x303 ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_SELF (self-reference)
+    DQ boot_pdpt + 0x7  ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_USER
+    times 510 DQ 0      ; PAGE_MAP_ENTRIES - 2
+    DQ boot_pml4 + 0x3  ; PG_PRESENT | PG_GLOBAL | PG_RW
 boot_pdpt:
-    DQ boot_pgd + 0x107  ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_USER
-    times 510 DQ 0       ; PAGE_MAP_ENTRIES - 2
-    DQ boot_pml4 + 0x303 ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_SELF (self-reference)
+    DQ boot_pgd + 0x7   ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_USER
+    times 510 DQ 0      ; PAGE_MAP_ENTRIES - 2
+    DQ boot_pml4 + 0x3  ; PG_PRESENT | PG_GLOBAL | PG_RW
 boot_pgd:
-    DQ boot_pgt + 0x107  ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_USER
-    times 510 DQ 0       ; PAGE_MAP_ENTRIES - 2
-    DQ boot_pml4 + 0x303 ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_SELF (self-reference)
+    DQ boot_pgt + 0x7   ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_USER
+    times 510 DQ 0      ; PAGE_MAP_ENTRIES - 2
+    DQ boot_pml4 + 0x3  ; PG_PRESENT | PG_GLOBAL | PG_RW
 boot_pgt:
     times 512 DQ 0
 
diff --git a/arch/x86/loader/include/ctype.h b/arch/x86/loader/include/ctype.h
new file mode 100644
index 000000000..c5aa3915b
--- /dev/null
+++ b/arch/x86/loader/include/ctype.h
@@ -0,0 +1,129 @@
+/****************************************************************************************
+ *
+ * Author: Stefan Lankes
+ *         Chair for Operating Systems, RWTH Aachen University
+ * Date:   24/03/2011
+ *
+ ****************************************************************************************
+ * 
+ * Written by the Chair for Operating Systems, RWTH Aachen University
+ * 
+ * NO Copyright (C) 2010, Stefan Lankes,
+ * consider these trivial functions to be public domain.
+ * 
+ * These functions are distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */ 
+
+/** 
+ * @author Stefan Lankes
+ * @file include/ctype.h
+ * @brief Functions related to alphanumerical character values
+ *
+ * This file contains functions helping to determine 
+ * the type of alphanumerical character values.
+ */
+
+#ifndef __CTYPE_H_
+#define __CYTPE_H_
+
+/** Returns true if the value of 'c' is an ASCII-charater */
+static inline int isascii(int c) 
+{
+	return (((unsigned char)(c))<=0x7f);
+}
+
+/** Applies an and-operation to 
+ * push the value of 'c' into the ASCII-range */
+static inline int toascii(int c)
+{
+	return (((unsigned char)(c))&0x7f);
+}
+
+/** Returns true if the value of 'c' is the 
+ * space character or a control character */
+static inline int isspace(int c)
+{
+	if (!isascii(c))
+		return 0;
+
+	if (' ' == (unsigned char) c)
+		return 1;
+	if ('\n' == (unsigned char) c)
+		return 1;
+	if ('\r' == (unsigned char) c)
+		return 1;
+	if ('\t' == (unsigned char) c)
+		return 1;
+	if ('\v' == (unsigned char) c)
+		return 1;
+	if ('\f' == (unsigned char) c)
+		return 1;
+
+	return 0;
+}
+
+/** Returns true if the value of 'c' is a number */
+static inline int isdigit(int c)
+{
+	if (!isascii(c))
+		return 0;
+
+	if (((unsigned char) c >= '0') && ((unsigned char) c <= '9'))
+		return 1;
+
+	return 0;
+}
+
+/** Returns true if the value of 'c' is a lower case letter */
+static inline int islower(int c)
+{
+	if (!isascii(c))
+		return 0;
+
+	if (((unsigned char) c >= 'a') && ((unsigned char) c <= 'z'))
+		return 1;
+
+	return 0;
+}
+
+/** Returns true if the value of 'c' is an upper case letter */
+static inline int isupper(int c)
+{
+	if (!isascii(c))
+		return 0;
+
+	if (((unsigned char) c >= 'A') && ((unsigned char) c <= 'Z'))
+		return 1;
+
+	return 0;
+}
+
+/** Returns true if the value of 'c' is an alphabetic character */
+static inline int isalpha(int c)
+{
+	if (isupper(c) || islower(c))
+		return 1;
+
+	return 0;
+}
+
+/** Makes the input character lower case.\n Will do nothing if it 
+ * was something different than an upper case letter before. */
+static inline unsigned char tolower(unsigned char c)
+{
+	if (isupper(c))
+		c -= 'A'-'a';
+	return c;
+}
+
+/** Makes the input character upper case.\n Will do nothing if it 
+ * was something different than a lower case letter before. */
+static inline unsigned char toupper(unsigned char c)
+{
+	if (islower(c))
+		c -= 'a'-'A';
+	return c;
+}
+
+#endif
diff --git a/arch/x86/loader/include/limits.h b/arch/x86/loader/include/limits.h
new file mode 100644
index 000000000..4f311b8ad
--- /dev/null
+++ b/arch/x86/loader/include/limits.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2010, Stefan Lankes, RWTH Aachen University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of the University nor the names of its contributors
+ *      may be used to endorse or promote products derived from this
+ *      software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * author Stefan Lankes
+ * @file include/limits.h
+ * @brief Define constants related to numerical value-ranges of variable types
+ *
+ * This file contains define constants for the numerical
+ * ranges of the most typical variable types.
+ */
+
+#ifndef __ARCH_LIMITS_H__
+#define __ARCH_LIMITS_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Number of bits in a char */
+#define	CHAR_BIT	8
+
+/** Maximum value for a signed char */
+#define	SCHAR_MAX	0x7f
+/** Minimum value for a signed char */
+#define	SCHAR_MIN	(-0x7f - 1)
+
+/** Maximum value for an unsigned char */
+#define	UCHAR_MAX	0xff
+
+/** Maximum value for an unsigned short */
+#define	USHRT_MAX	0xffff
+/** Maximum value for a short */
+#define	SHRT_MAX	0x7fff
+/** Minimum value for a short */
+#define	SHRT_MIN	(-0x7fff - 1)
+
+/** Maximum value for an unsigned int */
+#define	UINT_MAX	0xffffffffU
+/** Maximum value for an int */
+#define	INT_MAX		0x7fffffff
+/** Minimum value for an int */
+#define	INT_MIN	(-0x7fffffff - 1)
+
+/** Maximum value for an unsigned long */
+#define	ULONG_MAX	0xffffffffUL
+/** Maximum value for a long */
+#define	LONG_MAX	0x7fffffffL
+/** Minimum value for a long */
+#define	LONG_MIN	(-0x7fffffffL - 1)
+
+/** Maximum value for an unsigned long long */
+#define	ULLONG_MAX	0xffffffffffffffffULL
+/** Maximum value for a long long */
+#define	LLONG_MAX	0x7fffffffffffffffLL
+/** Minimum value for a long long */
+#define	LLONG_MIN	(-0x7fffffffffffffffLL - 1)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/arch/x86/loader/include/page.h b/arch/x86/loader/include/page.h
index ddb158505..adf46fc72 100644
--- a/arch/x86/loader/include/page.h
+++ b/arch/x86/loader/include/page.h
@@ -86,12 +86,12 @@ static inline size_t sign_extend(ssize_t addr, int bits)
 #endif
 
 /// The number of entries in a page map table
-#define PAGE_MAP_ENTRIES	       (1L << PAGE_MAP_BITS)
+#define PAGE_MAP_ENTRIES	(1L << PAGE_MAP_BITS)
 
 /// Align to next page
-#define PAGE_FLOOR(addr)        (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
+#define PAGE_CEIL(addr)		(((addr) + PAGE_SIZE - 1) & PAGE_MASK)
 /// Align to page
-#define PAGE_CEIL(addr)         ( (addr)                  & PAGE_MASK)
+#define PAGE_FLOOR(addr)	( (addr)                  & PAGE_MASK)
 
 /// Page is present
 #define PG_PRESENT		(1 << 0)
diff --git a/arch/x86/loader/include/stddef.h b/arch/x86/loader/include/stddef.h
index 36f95f25f..76c2add7b 100644
--- a/arch/x86/loader/include/stddef.h
+++ b/arch/x86/loader/include/stddef.h
@@ -85,6 +85,14 @@ typedef char int8_t;
 /// 16 bit wide char type
 typedef unsigned short wchar_t;
 
+/** @brief String to long
+ *
+ * @return Long value of the parsed numerical string
+ */
+long _strtol(const char* str, char** endptr, int base);
+
+#define strtol(str, endptr, base)	_strtol((str), (endptr), (base))
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/arch/x86/loader/include/string.h b/arch/x86/loader/include/string.h
index 84ef64a67..3d51a9492 100644
--- a/arch/x86/loader/include/string.h
+++ b/arch/x86/loader/include/string.h
@@ -41,7 +41,9 @@ char *strncpy(char *dest, const char *src, size_t n);
 char *strcpy(char *dest, const char *src);
 int strcmp(const char *s1, const char *s2);
 int strncmp(const char *s1, const char *s2, size_t n);
-char *strstr(const char *s, const char *find);
+char *_strstr(const char *s, const char *find);
+
+#define strstr(s, find) _strstr((s), (find))
 
 #ifdef __cplusplus
 }
diff --git a/arch/x86/loader/include/uart.h b/arch/x86/loader/include/uart.h
index 0567aafa5..52d041b9c 100644
--- a/arch/x86/loader/include/uart.h
+++ b/arch/x86/loader/include/uart.h
@@ -34,17 +34,11 @@
 extern "C" {
 #endif
 
-/** @brief Initialize UART output
- *
- * @return Returns 0 on success
- */
-int uart_init(void);
-
 /** @brief Initialize UART output without a device check
  *
  * @return Returns 0 on success
  */
-int uart_early_init(char*);
+int uart_init(const char*);
 
 /** @brief Simple string output on a serial device.
  *
@@ -56,7 +50,7 @@ int uart_puts(const char *text);
 
 /** @brief Simple character output on a serial device.
  *
- * @return The original input character casted to int 
+ * @return The original input character casted to int
  */
 int uart_putchar(unsigned char c);
 
diff --git a/arch/x86/loader/main.c b/arch/x86/loader/main.c
index 12cba0777..b0439de85 100644
--- a/arch/x86/loader/main.c
+++ b/arch/x86/loader/main.c
@@ -42,8 +42,9 @@ extern const void kernel_start;
 extern const void kernel_end;
 extern const void bss_start;
 extern const void bss_end;
+extern size_t uartport;
 
-static int load_code(size_t viraddr, size_t phyaddr, size_t limit, uint32_t file_size, size_t mem_size)
+static int load_code(size_t viraddr, size_t phyaddr, size_t limit, uint32_t file_size, size_t mem_size, size_t cmdline, size_t cmdsize)
 {
 	const size_t displacement = 0x200000ULL - (phyaddr & 0x1FFFFFULL);
 
@@ -65,6 +66,9 @@ static int load_code(size_t viraddr, size_t phyaddr, size_t limit, uint32_t file
 	*((uint32_t*) (viraddr + 0x30)) = 0; // apicid
 	*((uint64_t*) (viraddr + 0x38)) = mem_size;
 	*((uint32_t*) (viraddr + 0x60)) = 1; // numa nodes
+	*((uint64_t*) (viraddr + 0x98)) = uartport;
+	*((uint64_t*) (viraddr + 0xA0)) = cmdline;
+	*((uint64_t*) (viraddr + 0xA8)) = cmdsize;
 
 	// move file to a 2 MB boundary
 	for(size_t va = viraddr+(npages << PAGE_BITS)+displacement-sizeof(uint8_t); va >= viraddr+displacement; va-=sizeof(uint8_t))
@@ -86,6 +90,8 @@ void main(void)
 	elf_header_t* header = NULL;
 	uint32_t file_size = 0;
 	size_t mem_size = 0;
+	size_t cmdline_size = 0;
+	size_t cmdline = 0;
 
 	// initialize .bss section
 	memset((void*)&bss_start, 0x00, ((size_t) &bss_end - (size_t) &bss_start));
@@ -95,6 +101,12 @@ void main(void)
 	kprintf("Loader starts at %p and ends at %p\n", &kernel_start, &kernel_end);
 	kprintf("Found mb_info at %p\n", mb_info);
 
+	if (mb_info && mb_info->cmdline) {
+		cmdline = (size_t) mb_info->cmdline;
+		cmdline_size = strlen((char*)cmdline);
+	}
+
+	// enable paging
 	page_init();
 
 	if (mb_info) {
@@ -171,7 +183,7 @@ void main(void)
 					viraddr = prog_header->virt_addr;
 				if (!phyaddr)
 					phyaddr = prog_header->offset + (size_t)header;
-				file_size = prog_header->virt_addr + PAGE_FLOOR(prog_header->file_size) - viraddr;
+				file_size = prog_header->virt_addr + PAGE_CEIL(prog_header->file_size) - viraddr;
 				mem_size += prog_header->mem_size;
 			}
 			break;
@@ -184,7 +196,7 @@ void main(void)
 		}
 	}
 
-	if (BUILTIN_EXPECT(load_code(viraddr, phyaddr, limit, file_size, mem_size), 0))
+	if (BUILTIN_EXPECT(load_code(viraddr, phyaddr, limit, file_size, mem_size, cmdline, cmdline_size), 0))
 		goto failed;
 
 	kprintf("Entry point: 0x%zx\n", header->entry);
diff --git a/arch/x86/loader/page.c b/arch/x86/loader/page.c
index 3b88adbf8..0cab29437 100644
--- a/arch/x86/loader/page.c
+++ b/arch/x86/loader/page.c
@@ -45,7 +45,7 @@ extern const void kernel_start;
 extern const void kernel_end;
 
 /// This page is reserved for copying
-#define PAGE_TMP		(PAGE_FLOOR((size_t) &kernel_start) - PAGE_SIZE)
+#define PAGE_TMP		(PAGE_CEIL((size_t) &kernel_start) - PAGE_SIZE)
 
 /** This PGD table is initialized in entry.asm */
 extern size_t* boot_map;
@@ -188,12 +188,12 @@ int page_init(void)
 
 		// already mapped => entry.asm
 		//addr = (size_t) mb_info & PAGE_MASK;
-		//npages = PAGE_FLOOR(sizeof(*mb_info)) >> PAGE_BITS;
+		//npages = PAGE_CEIL(sizeof(*mb_info)) >> PAGE_BITS;
 		//page_map(addr, addr, npages, PG_GLOBAL);
 
 		if (mb_info->flags & MULTIBOOT_INFO_MODS) {
 			addr = mb_info->mods_addr;
-			npages = PAGE_FLOOR(mb_info->mods_count*sizeof(multiboot_module_t)) >> PAGE_BITS;
+			npages = PAGE_CEIL(mb_info->mods_count*sizeof(multiboot_module_t)) >> PAGE_BITS;
 			ret = page_map(addr, addr, npages, PG_GLOBAL);
 			kprintf("Map module info at 0x%lx (ret %d)\n", addr, ret);
 
@@ -202,14 +202,14 @@ int page_init(void)
 			// at first we determine the first free page
 			for(int i=0; i<mb_info->mods_count; i++) {
 				if (first_page < mmodule[i].mod_end)
-					first_page = PAGE_FLOOR(mmodule[i].mod_end);
+					first_page = PAGE_CEIL(mmodule[i].mod_end);
 			}
 
 			// we map only the first page of each module (= ELF file) because
 			// we need only the program header of the ELF file
 			for(int i=0; i<mb_info->mods_count; i++) {
 				addr = mmodule[i].mod_start;
-				npages = PAGE_FLOOR(mmodule[i].mod_end - mmodule[i].mod_start) >> PAGE_BITS;
+				npages = PAGE_CEIL(mmodule[i].mod_end - mmodule[i].mod_start) >> PAGE_BITS;
 				ret = page_map(addr, addr, 1 /*npages*/, PG_GLOBAL);
 				kprintf("Map first page of module %d at 0x%lx (ret %d)\n", i, addr, ret);
 				kprintf("Module %d consists %zd\n", i, npages);
diff --git a/arch/x86/loader/stdio.c b/arch/x86/loader/stdio.c
index c4ee533fb..9c3e5f422 100644
--- a/arch/x86/loader/stdio.c
+++ b/arch/x86/loader/stdio.c
@@ -31,7 +31,7 @@
 
 int koutput_init(void)
 {
-	uart_early_init((char*) mb_info->cmdline);
+	uart_init((const char*) (size_t)mb_info->cmdline);
 
 	return 0;
 }
diff --git a/arch/x86/loader/strstr.c b/arch/x86/loader/strstr.c
new file mode 100644
index 000000000..d2b687e86
--- /dev/null
+++ b/arch/x86/loader/strstr.c
@@ -0,0 +1,73 @@
+/*	$NetBSD: strstr.c,v 1.1 2005/12/20 19:28:52 christos Exp $	*/
+
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chris Torek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * The code has been taken from NetBSD (sys/libkern/strstr.c) and is consequently
+ * BSD-licensed. Unnecessary functions have been removed and all typedefs required
+ * have been added.
+ */
+
+/* HermiCore prelude */
+#include <stddef.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+
+/*
+ * Find the first occurrence of find in s.
+ */
+char *
+_strstr(s, find)
+	const char *s, *find;
+{
+	char c, sc;
+	size_t len;
+
+	if (BUILTIN_EXPECT(!s, 0))
+		return NULL;
+	if (BUILTIN_EXPECT(!find, 0))
+		return NULL;
+
+	if ((c = *find++) != 0) {
+		len = strlen(find);
+		do {
+			do {
+				if ((sc = *s++) == 0)
+					return (NULL);
+			} while (sc != c);
+		} while (strncmp(s, find, len) != 0);
+		s--;
+	}
+	return ((char *) s);
+}
diff --git a/arch/x86/loader/strtol.c b/arch/x86/loader/strtol.c
new file mode 100644
index 000000000..639e11e9f
--- /dev/null
+++ b/arch/x86/loader/strtol.c
@@ -0,0 +1,132 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chris Torek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * From: @(#)strtol.c	8.1 (Berkeley) 6/4/93
+ */
+
+/*
+ * The code has been taken from FreeBSD (sys/libkern/strtol.c) and is consequently
+ * BSD-licensed. Unnecessary functions have been removed and all typedefs required
+ * have been added.
+ */
+
+/* HermitCore prelude */
+#include <stddef.h>
+#include <ctype.h>
+#include <limits.h>
+
+/*
+ * Convert a string to a long integer.
+ *
+ * Ignores `locale' stuff.  Assumes that the upper and lower case
+ * alphabets and digits are each contiguous.
+ */
+long
+_strtol(nptr, endptr, base)
+	const char *nptr;
+	char **endptr;
+	int base;
+{
+	const char *s = nptr;
+	unsigned long acc;
+	unsigned char c;
+	unsigned long cutoff;
+	int neg = 0, any, cutlim;
+
+	/*
+	 * Skip white space and pick up leading +/- sign if any.
+	 * If base is 0, allow 0x for hex and 0 for octal, else
+	 * assume decimal; if base is already 16, allow 0x.
+	 */
+	do {
+		c = *s++;
+	} while (isspace(c));
+	if (c == '-') {
+		neg = 1;
+		c = *s++;
+	} else if (c == '+')
+		c = *s++;
+	if ((base == 0 || base == 16) &&
+	    c == '0' && (*s == 'x' || *s == 'X')) {
+		c = s[1];
+		s += 2;
+		base = 16;
+	}
+	if (base == 0)
+		base = c == '0' ? 8 : 10;
+
+	/*
+	 * Compute the cutoff value between legal numbers and illegal
+	 * numbers.  That is the largest legal value, divided by the
+	 * base.  An input number that is greater than this value, if
+	 * followed by a legal input character, is too big.  One that
+	 * is equal to this value may be valid or not; the limit
+	 * between valid and invalid numbers is then based on the last
+	 * digit.  For instance, if the range for longs is
+	 * [-2147483648..2147483647] and the input base is 10,
+	 * cutoff will be set to 214748364 and cutlim to either
+	 * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated
+	 * a value > 214748364, or equal but the next digit is > 7 (or 8),
+	 * the number is too big, and we will return a range error.
+	 *
+	 * Set any if any `digits' consumed; make it negative to indicate
+	 * overflow.
+	 */
+	cutoff = neg ? -(unsigned long)LONG_MIN : LONG_MAX;
+	cutlim = cutoff % (unsigned long)base;
+	cutoff /= (unsigned long)base;
+	for (acc = 0, any = 0;; c = *s++) {
+		if (!isascii(c))
+			break;
+		if (isdigit(c))
+			c -= '0';
+		else if (isalpha(c))
+			c -= isupper(c) ? 'A' - 10 : 'a' - 10;
+		else
+			break;
+		if (c >= base)
+			break;
+		if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
+			any = -1;
+		else {
+			any = 1;
+			acc *= base;
+			acc += c;
+		}
+	}
+	if (any < 0) {
+		acc = neg ? LONG_MIN : LONG_MAX;
+	} else if (neg)
+		acc = -acc;
+	if (endptr != 0)
+		*((const char **)endptr) = any ? s - 1 : nptr;
+	return (acc);
+}
diff --git a/arch/x86/loader/uart.c b/arch/x86/loader/uart.c
index d6013dd61..5d0599843 100644
--- a/arch/x86/loader/uart.c
+++ b/arch/x86/loader/uart.c
@@ -92,28 +92,38 @@
 
 #define DEFAULT_UART_PORT	0 //0xc110
 
-static size_t	iobase = 0;
+size_t	uartport = 0;
 
 static inline unsigned char read_from_uart(uint32_t off)
 {
 	uint8_t c;
 
-	if (iobase)
-		c = inportb(iobase + off);
+	if (uartport)
+		c = inportb(uartport + off);
 
 	return c;
 }
 
+static inline int is_transmit_empty(void)
+{
+	if (uartport)
+		return inportb(uartport + UART_LSR) & 0x20;
+
+	return 1;
+}
+
 static void write_to_uart(uint32_t off, unsigned char c)
 {
-	if (iobase)
-		outportb(iobase + off, c);
+	while (is_transmit_empty() == 0) ;
+
+	if (uartport)
+		outportb(uartport + off, c);
 }
 
 /* Puts a single character on a serial device */
 int uart_putchar(unsigned char c)
 {
-	if (!iobase)
+	if (!uartport)
 		return 0;
 
 	write_to_uart(UART_TX, c);
@@ -126,7 +136,7 @@ int uart_puts(const char *text)
 {
 	size_t i, len = strlen(text);
 
-	if (!iobase)
+	if (!uartport)
 		return 0;
 
 	for (i = 0; i < len; i++)
@@ -137,22 +147,12 @@ int uart_puts(const char *text)
 
 static int uart_config(void)
 {
-	if (!iobase)
+	if (!uartport)
 		return 0;
 
-	/*
-	 * enable FIFOs
-	 * clear RX and TX FIFO
-	 * set irq trigger to 8 bytes
-	 */
-	write_to_uart(UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT | UART_FCR_TRIGGER_1);
-
 	/* disable interrupts */
 	write_to_uart(UART_IER, 0);
 
-	/* DTR + RTS */
-	write_to_uart(UART_MCR, UART_MCR_DTR|UART_MCR_RTS);
-
 	/*
 	 * 8bit word length
 	 * 1 stop bit
@@ -165,82 +165,39 @@ static int uart_config(void)
 	write_to_uart(UART_LCR, lcr);
 
 	/*
-	 * set baudrate to 9600
+	 * set baudrate to 38400
 	 */
-	uint32_t divisor = 1843200 / 9600; // 115200;
-	write_to_uart(UART_DLL, divisor & 0xff);
-	write_to_uart(UART_DLM, (divisor >> 8) & 0xff);
+	write_to_uart(UART_DLL, 0x03);
+	write_to_uart(UART_DLM, 0x00);
 
 	/* set DLAB=0 */
 	write_to_uart(UART_LCR, lcr & (~UART_LCR_DLAB));
 
+	/*
+	 * enable FIFOs
+	 * clear RX and TX FIFO
+	 * set irq trigger to 8 bytes
+	 */
+	write_to_uart(UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT | UART_FCR_TRIGGER_1);
+
 	return 0;
 }
 
-extern const void kernel_start;
-
-int uart_early_init(char* cmdline)
+int uart_init(const char* cmdline)
 {
-#if 1
-	// default value of our QEMU configuration
-	iobase = DEFAULT_UART_PORT;
-#else
-	if (BUILTIN_EXPECT(!cmdline, 0))
-		return -EINVAL;
+	char* str;
 
-	char* str = strstr(cmdline, "uart=");
-	if (!str)
-		return -EINVAL;
+	if (!uartport && cmdline && ((str = strstr(cmdline, "uart=io:")) != NULL))
+		uartport = strtol(str+8, (char **)NULL, 16);
 
-	if (strncmp(str, "uart=io:", 8) == 0) {
-		iobase = strtol(str+8, (char **)NULL, 16);
-		if (!iobase)
-			iobase = DEFAULT_UART_PORT;
-			return -EINVAL;
-	}
-#endif
+	if (!uartport)
+		uartport = DEFAULT_UART_PORT;
+
+	if (!uartport)
+		return 0;
 
 	// configure uart
 	return uart_config();
 }
 
-int uart_init(void)
-{
-#ifdef CONFIG_PCI
-	pci_info_t pci_info;
-	uint32_t bar = 0;
-
-	// Searching for Intel's UART device
-	if (pci_get_device_info(0x8086, 0x0936, &pci_info) == 0)
-		goto Lsuccess;
- 	// Searching for Qemu's UART device
-	if (pci_get_device_info(0x1b36, 0x0002, &pci_info) == 0)
-		goto Lsuccess;
-	// Searching for Qemu's 2x UART device (pci-serial-2x)
-	if (pci_get_device_info(0x1b36, 0x0003, &pci_info) == 0)
-		goto Lsuccess;
-	// Searching for Qemu's 4x UART device (pci-serial-4x)
-	if (pci_get_device_info(0x1b36, 0x0004, &pci_info) == 0)
-		goto Lsuccess;
-
-	iobase = DEFAULT_UART_PORT;
-
-	return uart_config();
-
-Lsuccess:
-	iobase = pci_info.base[bar];
-	//irq_install_handler(32+pci_info.irq, uart_handler);
-	kprintf("UART uses io address 0x%x\n", iobase);
-
-	// configure uart
-	return uart_config();
-#else
-	// default value of our QEMU configuration
-	iobase = DEFAULT_UART_PORT;
-
-	// configure uart
-	return uart_config();
-#endif
-}
-
 #endif
diff --git a/arch/x86/mm/memory.c b/arch/x86/mm/memory.c
index 8313ddb01..697c2b6ce 100644
--- a/arch/x86/mm/memory.c
+++ b/arch/x86/mm/memory.c
@@ -194,13 +194,13 @@ void* page_alloc(size_t sz, uint32_t flags)
 {
 	size_t viraddr = 0;
 	size_t phyaddr;
-	uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS;
+	uint32_t npages = PAGE_CEIL(sz) >> PAGE_BITS;
 	size_t pflags = PG_PRESENT|PG_GLOBAL|PG_XD;
 
 	if (BUILTIN_EXPECT(!npages, 0))
 		goto oom;
 
-	viraddr = vma_alloc(PAGE_FLOOR(sz), flags);
+	viraddr = vma_alloc(PAGE_CEIL(sz), flags);
 	if (BUILTIN_EXPECT(!viraddr, 0))
 		goto oom;
 
@@ -238,10 +238,10 @@ void page_free(void* viraddr, size_t sz)
 
 	phyaddr = virt_to_phys((size_t)viraddr);
 
-	vma_free((size_t) viraddr, (size_t) viraddr + PAGE_FLOOR(sz));
+	vma_free((size_t) viraddr, (size_t) viraddr + PAGE_CEIL(sz));
 
 	if (phyaddr)
-		put_pages(phyaddr, PAGE_FLOOR(sz) >> PAGE_BITS);
+		put_pages(phyaddr, PAGE_CEIL(sz) >> PAGE_BITS);
 }
 
 int memory_init(void)
@@ -267,13 +267,13 @@ int memory_init(void)
 			// mark first available memory slot as free
 			for(; mmap < mmap_end; mmap = (multiboot_memory_map_t*) ((size_t) mmap + sizeof(uint32_t) + mmap->size)) {
 				if (mmap->type == MULTIBOOT_MEMORY_AVAILABLE) {
-					start_addr = PAGE_FLOOR(mmap->addr);
-					end_addr = PAGE_CEIL(mmap->addr + mmap->len);
+					start_addr = PAGE_CEIL(mmap->addr);
+					end_addr = PAGE_FLOOR(mmap->addr + mmap->len);
 
 					LOG_INFO("Free region 0x%zx - 0x%zx\n", start_addr, end_addr);
 
 					if ((start_addr <= base) && (end_addr >= PAGE_2M_FLOOR((size_t) &kernel_start + image_size))) {
-						init_list.start = PAGE_2M_FLOOR((size_t) &kernel_start + image_size);
+						init_list.start = PAGE_2M_CEIL((size_t) &kernel_start + image_size);
 						init_list.end = end_addr;
 
 						LOG_INFO("Add region 0x%zx - 0x%zx\n", init_list.start, init_list.end);
@@ -295,13 +295,13 @@ int memory_init(void)
 		atomic_int64_add(&total_pages, (limit-base) >> PAGE_BITS);
 		atomic_int64_add(&total_available_pages, (limit-base) >> PAGE_BITS);
 
-		init_list.start = PAGE_2M_FLOOR(base + image_size);
+		init_list.start = PAGE_2M_CEIL(base + image_size);
 		init_list.end = limit;
 	}
 
 	// determine allocated memory, we use 2MB pages to map the kernel
-	atomic_int64_add(&total_allocated_pages, PAGE_2M_FLOOR(image_size) >> PAGE_BITS);
-	atomic_int64_sub(&total_available_pages, PAGE_2M_FLOOR(image_size) >> PAGE_BITS);
+	atomic_int64_add(&total_allocated_pages, PAGE_2M_CEIL(image_size) >> PAGE_BITS);
+	atomic_int64_sub(&total_available_pages, PAGE_2M_CEIL(image_size) >> PAGE_BITS);
 
 	LOG_INFO("free list starts at 0x%zx, limit 0x%zx\n", init_list.start, init_list.end);
 
@@ -324,10 +324,10 @@ int memory_init(void)
 			for(; mmap < mmap_end; mmap = (multiboot_memory_map_t*) ((size_t) mmap + sizeof(uint32_t) + mmap->size))
 			{
 				if (mmap->type == MULTIBOOT_MEMORY_AVAILABLE) {
-					start_addr = PAGE_FLOOR(mmap->addr);
-					end_addr = PAGE_CEIL(mmap->addr + mmap->len);
+					start_addr = PAGE_CEIL(mmap->addr);
+					end_addr = PAGE_FLOOR(mmap->addr + mmap->len);
 
-					if ((start_addr <= base) && (end_addr >= PAGE_2M_FLOOR(base+image_size)))
+					if ((start_addr <= base) && (end_addr >= PAGE_2M_CEIL(base+image_size)))
 						end_addr = base;
 
 					// ignore everything below 1M => reserve for I/O devices
@@ -335,11 +335,11 @@ int memory_init(void)
 						start_addr = GAP_BELOW;
 
 					if (start_addr < (size_t)mb_info)
-						start_addr = PAGE_FLOOR((size_t)mb_info);
+						start_addr = PAGE_CEIL((size_t)mb_info);
 
-					if (mb_info->flags & MULTIBOOT_INFO_CMDLINE) {
-						if (start_addr < (size_t) mb_info->cmdline+2*PAGE_SIZE)
-							start_addr = PAGE_FLOOR((size_t) mb_info->cmdline+2*PAGE_SIZE);
+					if ((mb_info->flags & MULTIBOOT_INFO_CMDLINE) && cmdline) {
+						if (start_addr < (size_t) cmdline+cmdsize)
+							start_addr = PAGE_CEIL((size_t) cmdline+cmdsize);
 					}
 
 					if (start_addr >= end_addr)
diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c
index af4b7c791..f381daf46 100644
--- a/arch/x86/mm/page.c
+++ b/arch/x86/mm/page.c
@@ -325,11 +325,16 @@ int page_init(void)
 		LOG_INFO("Detect Go runtime! Consequently, HermitCore zeroed heap.\n");
 	}
 
-	if (mb_info && ((mb_info->cmdline & PAGE_MASK) != ((size_t) mb_info & PAGE_MASK))) {
-		LOG_INFO("Map multiboot cmdline 0x%x into the virtual address space\n", mb_info->cmdline);
-		// reserve 2 pages for long cmdline strings
-		page_map(((size_t) mb_info->cmdline) & PAGE_MASK, ((size_t) mb_info->cmdline) & PAGE_MASK, 2, PG_GLOBAL|PG_RW|PG_PRESENT);
-	}
+	if (mb_info && (mb_info->flags & MULTIBOOT_INFO_CMDLINE) && (cmdline))
+	{
+		size_t i = 0;
+
+		while(((size_t) cmdline + i) <= ((size_t) cmdline + cmdsize))
+		{
+			page_map(((size_t) cmdline + i) & PAGE_MASK, ((size_t) cmdline + i) & PAGE_MASK, 1, PG_GLOBAL|PG_RW|PG_PRESENT);
+			i += PAGE_SIZE;
+		}
+	} else cmdline = 0;
 
 	/* Replace default pagefault handler */
 	irq_uninstall_handler(14);
diff --git a/arch/x86/mm/vma.c b/arch/x86/mm/vma.c
index cb56aacea..ad58d1b79 100644
--- a/arch/x86/mm/vma.c
+++ b/arch/x86/mm/vma.c
@@ -25,7 +25,9 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <hermit/stdio.h>
 #include <hermit/vma.h>
+#include <hermit/logging.h>
 #include <asm/multiboot.h>
 
 int vma_arch_init(void)
@@ -37,11 +39,20 @@ int vma_arch_init(void)
 		if (BUILTIN_EXPECT(ret, 0))
 			goto out;
 
-		if ((mb_info->cmdline & PAGE_MASK) != ((size_t) mb_info & PAGE_MASK)) {
-			// reserve 2 pages for long cmdline strings
-			ret = vma_add((size_t)mb_info->cmdline & PAGE_MASK, ((size_t)mb_info->cmdline & PAGE_MASK) + 2*PAGE_SIZE, VMA_READ|VMA_WRITE);
-			if (BUILTIN_EXPECT(ret, 0))
-				goto out;
+		if ((mb_info->flags & MULTIBOOT_INFO_CMDLINE) && cmdline) {
+			LOG_INFO("vma_arch_init: map cmdline %p (size 0x%zd)", cmdline, cmdsize);
+
+			size_t i = 0;
+			while(((size_t) cmdline + i) < ((size_t) cmdline + cmdsize))
+			{
+				if ((((size_t)cmdline + i) & PAGE_MASK) != ((size_t) mb_info & PAGE_MASK)) {
+					ret = vma_add(((size_t)cmdline + i) & PAGE_MASK, (((size_t)cmdline + i) & PAGE_MASK) + PAGE_SIZE, VMA_READ|VMA_WRITE);
+					if (BUILTIN_EXPECT(ret, 0))
+						goto out;
+				}
+
+				i += PAGE_SIZE;
+			}
 		}
 	}
 
diff --git a/cmake/HermitCore-Configuration.cmake b/cmake/HermitCore-Configuration.cmake
index daebd24cb..bd5753eef 100644
--- a/cmake/HermitCore-Configuration.cmake
+++ b/cmake/HermitCore-Configuration.cmake
@@ -1,4 +1,4 @@
-set(PACKAGE_VERSION "0.1" CACHE STRING
+set(PACKAGE_VERSION "0.2.1" CACHE STRING
 	"HermitCore current version")
 
 set(MAX_CORES "512" CACHE STRING
diff --git a/config/bzImage b/config/bzImage
index 04f570ccc..14c60fd39 100644
Binary files a/config/bzImage and b/config/bzImage differ
diff --git a/config/linux_config b/config/linux_config
index 5613e1f79..f52a07bd5 100644
--- a/config/linux_config
+++ b/config/linux_config
@@ -139,7 +139,7 @@ CONFIG_ARCH_SUPPORTS_INT128=y
 # CONFIG_SYSFS_DEPRECATED is not set
 # CONFIG_RELAY is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../config/initrd.cpio"
+CONFIG_INITRAMFS_SOURCE="/work/lankes/HermitCore/config/initrd.cpio"
 CONFIG_INITRAMFS_ROOT_UID=0
 CONFIG_INITRAMFS_ROOT_GID=0
 CONFIG_RD_GZIP=y
diff --git a/drivers/net/e1000.c b/drivers/net/e1000.c
index 8fb4f4b57..c460f96f6 100644
--- a/drivers/net/e1000.c
+++ b/drivers/net/e1000.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2012 Stefan Lankes, Chair for Operating Systems,
  *                               RWTH Aachen University
  *
@@ -58,7 +58,7 @@ typedef struct {
 	uint32_t device;
 } board_t;
 
-static board_t board_tbl[] = 
+static board_t board_tbl[] =
 {
 	{"Intel", "Intel E1000 (82542)", 0x8086, 0x1000},
 	{"Intel", "Intel E1000 (82543GC FIBER)", 0x8086, 0x1001},
@@ -67,7 +67,7 @@ static board_t board_tbl[] =
 	{"Intel", "Intel E1000 (82544EI FIBER)", 0x8086, 0x1009},
 	{"Intel", "Intel E1000 (82544GC COPPER)", 0x8086, 0x100C},
 	{"Intel", "Intel E1000 (82544GC LOM)", 0x8086, 0x100D},
-	{"Intel", "Intel E1000 (82540EM)", 0x8086, 0x100E},	
+	{"Intel", "Intel E1000 (82540EM)", 0x8086, 0x100E},
 	{"Intel", "Intel E1000 (82540EM LOM)", 0x8086, 0x1015},
 	{"Intel", "Intel E1000 (82540EP LOM)", 0x8086, 0x1016},
 	{"Intel", "Intel E1000 (82540EP)", 0x8086, 0x1017},
@@ -132,7 +132,7 @@ static uint16_t eeprom_read(volatile uint8_t* base, uint8_t addr)
 
 	e1000_write(base, E1000_EERD, 1 | ((uint32_t)(addr) << 8));
 
-	while(!((tmp = e1000_read(base, E1000_EERD)) & (1 << 4))) 
+	while(!((tmp = e1000_read(base, E1000_EERD)) & (1 << 4)))
 		udelay(1);
 
 	data = (uint16_t)((tmp >> 16) & 0xFFFF);
@@ -148,7 +148,7 @@ static uint16_t eeprom_read(uint8_t* base, uint8_t addr)
 
 	e1000_write(base, E1000_EERD, 1 | ((uint32_t)(addr) << 2));
 
-	while(!((tmp = e1000_read(base, E1000_EERD)) & (1 << 1))) 
+	while(!((tmp = e1000_read(base, E1000_EERD)) & (1 << 1)))
 		udelay(1);
 
 	data = (uint16_t)((tmp >> 16) & 0xFFFF);
@@ -198,7 +198,7 @@ static err_t e1000if_output(struct netif* netif, struct pbuf* p)
 
 	// update the tail so the hardware knows it's ready
 	e1000if->tx_tail = (e1000if->tx_tail + 1) % NUM_TX_DESCRIPTORS;
-	e1000_write(e1000if->bar0, E1000_TDT, e1000if->tx_tail);	
+	e1000_write(e1000if->bar0, E1000_TDT, e1000if->tx_tail);
 
 #if ETH_PAD_SIZE
 	pbuf_header(p, ETH_PAD_SIZE); /* reclaim the padding word */
@@ -256,7 +256,7 @@ static void e1000_rx_inthandler(struct netif* netif)
 			LINK_STATS_INC(link.drop);
 		}
 
-no_eop:		
+no_eop:
 		e1000if->rx_desc[e1000if->rx_tail].status = 0;
 
 		// update tail and write the value to the device
@@ -333,12 +333,12 @@ err_t e1000if_init(struct netif* netif)
 	uint16_t tmp16, speed, cold = 0x40;
 	uint8_t tmp8, is64bit, mem_type, prefetch;
 	static uint8_t num = 0;
-	
+
 	LWIP_ASSERT("netif != NULL", (netif != NULL));
 
 	tmp8 = 0;
 	while (board_tbl[tmp8].vendor_str) {
-		if (pci_get_device_info(board_tbl[tmp8].vendor, board_tbl[tmp8].device, &pci_info, 1) == 0)
+		if (pci_get_device_info(board_tbl[tmp8].vendor, board_tbl[tmp8].device, PCI_IGNORE_SUBID, &pci_info, 1) == 0)
 			break;
 		tmp8++;
 	}
@@ -370,11 +370,11 @@ err_t e1000if_init(struct netif* netif)
 	netif->state = e1000if;
 	mynetif = netif;
 
-	e1000if->bar0 = (uint8_t*) vma_alloc(PAGE_FLOOR(pci_info.size[0]), VMA_READ|VMA_WRITE);
+	e1000if->bar0 = (uint8_t*) vma_alloc(PAGE_CEIL(pci_info.size[0]), VMA_READ|VMA_WRITE);
 	if (BUILTIN_EXPECT(!e1000if->bar0, 0))
 		goto oom;
 
-	int ret = page_map((size_t)e1000if->bar0, PAGE_CEIL(pci_info.base[0]), PAGE_FLOOR(pci_info.size[0]) >> PAGE_BITS, PG_GLOBAL|PG_RW|PG_PCD);
+	int ret = page_map((size_t)e1000if->bar0, PAGE_FLOOR(pci_info.base[0]), PAGE_CEIL(pci_info.size[0]) >> PAGE_BITS, PG_GLOBAL|PG_RW|PG_PCD);
 	if (BUILTIN_EXPECT(ret, 0))
 		goto oom;
 
@@ -394,7 +394,7 @@ err_t e1000if_init(struct netif* netif)
 		goto oom;
 	memset((void*) e1000if->tx_desc, 0x00, NUM_TX_DESCRIPTORS*sizeof(tx_desc_t));
 
-	LWIP_DEBUGF(NETIF_DEBUG, ("e1000if_init: Found %s at mmio 0x%x (size 0x%x), irq %u\n", board_tbl[tmp8].device_str, 
+	LWIP_DEBUGF(NETIF_DEBUG, ("e1000if_init: Found %s at mmio 0x%x (size 0x%x), irq %u\n", board_tbl[tmp8].device_str,
 		pci_info.base[0] & ~0xF, pci_info.size[0], e1000if->irq));
 	//LWIP_DEBUGF(NETIF_DEBUG, ("e1000if_init: Map iobase to %p\n", e1000if->bar0));
 	LWIP_DEBUGF(NETIF_DEBUG, ("e1000if_init: is64bit %u, prefetch %u\n", is64bit, prefetch));
@@ -439,7 +439,7 @@ err_t e1000if_init(struct netif* netif)
 
 	// transmit buffer length; NUM_TX_DESCRIPTORS 16-byte descriptors
 	e1000_write(e1000if->bar0, E1000_TDLEN , (uint32_t)(NUM_TX_DESCRIPTORS * sizeof(tx_desc_t)));
-	
+
 	// setup head and tail pointers
 	e1000_write(e1000if->bar0, E1000_TDH, 0);
 	e1000_write(e1000if->bar0, E1000_TDT, 0);
@@ -472,7 +472,7 @@ err_t e1000if_init(struct netif* netif)
 	tmp32 = 0;
 	for(tmp8=0; tmp8<2; tmp8++)
 		((uint8_t*) &tmp32)[tmp8] = netif->hwaddr[tmp8+4];
-	e1000_write(e1000if->bar0, E1000_RA+4, tmp32 | (1 << 31)); // set also AV bit to check incoming packets 
+	e1000_write(e1000if->bar0, E1000_RA+4, tmp32 | (1 << 31)); // set also AV bit to check incoming packets
 
 	/* Zero out the other receive addresses. */
 	for (tmp8=1; tmp8<16; tmp8++) {
diff --git a/drivers/net/mmnif.c b/drivers/net/mmnif.c
index 3ff08aa44..eab0284c6 100644
--- a/drivers/net/mmnif.c
+++ b/drivers/net/mmnif.c
@@ -594,7 +594,7 @@ err_t mmnif_init(struct netif *netif)
 		goto out;
 	}
 
-	err = vma_add((size_t)header_start_address, PAGE_FLOOR((size_t)header_start_address + ((nodes * header_size) >> PAGE_BITS)), VMA_READ|VMA_WRITE|VMA_CACHEABLE);
+	err = vma_add((size_t)header_start_address, PAGE_CEIL((size_t)header_start_address + ((nodes * header_size) >> PAGE_BITS)), VMA_READ|VMA_WRITE|VMA_CACHEABLE);
 	if (BUILTIN_EXPECT(err, 0)) {
 		LOG_ERROR("mmnif init(): vma_add failed for header_start_address %p\n", header_start_address);
 		goto out;
@@ -620,7 +620,7 @@ err_t mmnif_init(struct netif *netif)
 		goto out;
 	}
 
-	err = vma_add((size_t)heap_start_address, PAGE_FLOOR((size_t)heap_start_address + ((nodes * heap_size) >> PAGE_BITS)), VMA_READ|VMA_WRITE|VMA_CACHEABLE);
+	err = vma_add((size_t)heap_start_address, PAGE_CEIL((size_t)heap_start_address + ((nodes * heap_size) >> PAGE_BITS)), VMA_READ|VMA_WRITE|VMA_CACHEABLE);
 	if (BUILTIN_EXPECT(!heap_start_address, 0))
 	{
 		LOG_ERROR("mmnif init(): vma_add failed for heap_start_address %p\n", heap_start_address);
@@ -686,8 +686,6 @@ err_t mmnif_init(struct netif *netif)
 	/* maximum transfer unit */
 	netif->mtu = 1500;
 
-	/* broadcast capability, keep all default flags */
-	//netif->flags |= NETIF_FLAG_BROADCAST;
 	/* set link up */
 	netif->flags |= NETIF_FLAG_LINK_UP;
 
diff --git a/drivers/net/rtl8139.c b/drivers/net/rtl8139.c
index e3e5bd0e0..3358cfbcf 100644
--- a/drivers/net/rtl8139.c
+++ b/drivers/net/rtl8139.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright 2010 Stefan Lankes, Chair for Operating Systems,
  *                               RWTH Aachen University
  *
@@ -68,7 +68,7 @@ typedef struct {
 	uint32_t device;
 } board_t;
 
-static board_t board_tbl[] = 
+static board_t board_tbl[] =
 {
 	{"RealTek", "RealTek RTL8139", 0x10ec, 0x8139},
 	{"RealTek", "RealTek RTL8129 Fast Ethernet", 0x10ec, 0x8129},
@@ -307,7 +307,7 @@ err_t rtl8139if_init(struct netif* netif)
 
 	tmp8 = 0;
 	while (board_tbl[tmp8].vendor_str) {
-		if (pci_get_device_info(board_tbl[tmp8].vendor, board_tbl[tmp8].device, &pci_info, 1) == 0)
+		if (pci_get_device_info(board_tbl[tmp8].vendor, board_tbl[tmp8].device, PCI_IGNORE_SUBID, &pci_info, 1) == 0)
 			break;
 		tmp8++;
 	}
@@ -388,8 +388,8 @@ err_t rtl8139if_init(struct netif* netif)
 	outportb(rtl8139if->iobase + CR, CR_RST);
 
 	/*
-	 * The RST bit must be checked to make sure that the chip has finished the reset. 
-	 * If the RST bit is high (1), then the reset is still in operation. 
+	 * The RST bit must be checked to make sure that the chip has finished the reset.
+	 * If the RST bit is high (1), then the reset is still in operation.
 	 */
 	udelay(10000);
 	tmp16 = 10000;
@@ -419,7 +419,7 @@ err_t rtl8139if_init(struct netif* netif)
 	outportb(rtl8139if->iobase + CONFIG1, 0);
 
 	// disable driver loaded and lanwake bits, turn driver loaded bit back on
-	outportb(rtl8139if->iobase + CONFIG1, 
+	outportb(rtl8139if->iobase + CONFIG1,
 		(inportb(rtl8139if->iobase + CONFIG1) & ~(CONFIG1_DVRLOAD | CONFIG1_LWACT)) | CONFIG1_DVRLOAD);
 
 	// unlock config register
@@ -430,7 +430,7 @@ err_t rtl8139if_init(struct netif* netif)
 	 * AB - Accept Broadcast: Accept broadcast packets sent to mac ff:ff:ff:ff:ff:ff
 	 * AM - Accept Multicast: Accept multicast packets.
 	 * APM - Accept Physical Match: Accept packets send to NIC's MAC address.
-	 * AAP - Accept All Packets. Accept all packets (run in promiscuous mode). 
+	 * AAP - Accept All Packets. Accept all packets (run in promiscuous mode).
 	 */
 	outportl(rtl8139if->iobase + RCR, RCR_MXDMA2|RCR_MXDMA1|RCR_MXDMA0|RCR_AB|RCR_AM|RCR_APM|RCR_AAP); // The WRAP bit isn't set!
 
@@ -456,7 +456,7 @@ err_t rtl8139if_init(struct netif* netif)
 	if (tmp16 & BMCR_SPD1000)
 		speed = 1000;
 	else if (tmp16 & BMCR_SPD100)
-		speed = 100; 
+		speed = 100;
 	else
 		speed = 10;
 	// Enable Receive and Transmitter
diff --git a/drivers/net/vioif.c b/drivers/net/vioif.c
new file mode 100644
index 000000000..68c6a345f
--- /dev/null
+++ b/drivers/net/vioif.c
@@ -0,0 +1,458 @@
+/*
+ * Copyright (c) 2017, Stefan Lankes, RWTH Aachen University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of the University nor the names of its contributors
+ *      may be used to endorse or promote products derived from this
+ *      software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <hermit/stddef.h>
+#include <hermit/stdio.h>
+#include <hermit/string.h>
+#include <hermit/processor.h>
+#include <hermit/mailbox.h>
+#include <hermit/logging.h>
+#include <hermit/virtio_net.h>
+#include <hermit/virtio_ring.h>
+#include <hermit/virtio_pci.h>
+#include <hermit/virtio_net.h>
+#include <asm/page.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/pci.h>
+#include <lwip/sys.h>
+#include <lwip/stats.h>
+#include <lwip/netif.h>
+#include <lwip/tcpip.h>
+#include <lwip/snmp.h>
+#include <lwip/ethip6.h>
+#include <netif/etharp.h>
+#include <net/vioif.h>
+
+#define VENDOR_ID 0x1AF4
+#define VIOIF_BUFFER_SIZE 0x2048
+#define MIN(a, b)	(a) < (b) ? (a) : (b)
+#define QUEUE_LIMIT 256
+
+/* NOTE: RX queue is 0, TX queue is 1 - Virtio Std. §5.1.2  */
+#define TX_NUM	1
+#define RX_NUM	0
+
+static struct netif* mynetif = NULL;
+
+static inline void vioif_enable_interrupts(virt_queue_t* vq)
+{
+	vq->vring.used->flags = 0;
+}
+
+static inline void vioif_disable_interrupts(virt_queue_t* vq)
+{
+	vq->vring.used->flags = 1;
+}
+
+/*
+ * @return error code
+ * - ERR_OK: packet transferred to hardware
+ * - ERR_CONN: no link or link failure
+ * - ERR_IF: could not transfer to link (hardware buffer full?)
+ */
+static err_t vioif_output(struct netif* netif, struct pbuf* p)
+{
+	vioif_t* vioif = netif->state;
+	virt_queue_t* vq = &vioif->queues[TX_NUM];
+	struct pbuf *q;
+	uint32_t i;
+	uint16_t buffer_index;
+
+	if (BUILTIN_EXPECT(p->tot_len > 1792, 0)) {
+		LOG_ERROR("vioif_output: packet is longer than 1792 bytes\n");
+		return ERR_IF;
+	}
+
+	for(buffer_index=0; buffer_index<vq->vring.num; buffer_index++) {
+		if (!vq->vring.desc[buffer_index].len) {
+			LOG_DEBUG("vioif_output: buffer %u is free\n", buffer_index);
+			break;
+		}
+	}
+	LOG_DEBUG("vioif: found free buffer %d\n", buffer_index);
+
+	if (BUILTIN_EXPECT(buffer_index >= vq->vring.num, 0)) {
+		LOG_ERROR("vioif_output: too many packets at once\n");
+		return ERR_IF;
+	}
+
+#if ETH_PAD_SIZE
+	pbuf_header(p, -ETH_PAD_SIZE); /* drop the padding word */
+#endif
+
+	const size_t hdr_sz = sizeof(struct virtio_net_hdr);
+	// NOTE: packet is fully checksummed => all flags are set to zero
+	memset((void*) (vq->virt_buffer + buffer_index * VIOIF_BUFFER_SIZE), 0x00, hdr_sz);
+
+	vq->vring.desc[buffer_index].addr = vq->phys_buffer + buffer_index * VIOIF_BUFFER_SIZE;
+	vq->vring.desc[buffer_index].len = p->tot_len + hdr_sz;
+	vq->vring.desc[buffer_index].flags = 0;
+	// we send only one buffer because it is large enough for our packet
+	vq->vring.desc[buffer_index].next = 0; //(buffer_index+1) % vq->vring.num;
+
+
+	/*
+	 * q traverses through linked list of pbuf's
+	 * This list MUST consist of a single packet ONLY
+	 */
+	for (q = p, i = 0; q != 0; q = q->next) {
+		memcpy((void*) (vq->virt_buffer + hdr_sz + buffer_index * VIOIF_BUFFER_SIZE + i), q->payload, q->len);
+		i += q->len;
+	}
+
+	// Add it in the available ring
+	uint16_t index = vq->vring.avail->idx % vq->vring.num;
+	vq->vring.avail->ring[index] = buffer_index;
+
+	// besure that everything is written
+	mb();
+
+	vq->vring.avail->idx++;
+
+	// besure that everything is written
+	mb();
+
+	/*
+	 * Notify the changes
+	 * NOTE: RX queue is 0, TX queue is 1 - Virtio Std. §5.1.2
+	 */
+    outportw(vioif->iobase+VIRTIO_PCI_QUEUE_NOTIFY, TX_NUM);
+
+#if ETH_PAD_SIZE
+	pbuf_header(p, ETH_PAD_SIZE); /* reclaim the padding word */
+#endif
+
+	LINK_STATS_INC(link.xmit);
+
+	return ERR_OK;
+}
+
+static void vioif_rx_inthandler(struct netif* netif)
+{
+	vioif_t* vioif = mynetif->state;
+	virt_queue_t* vq = &vioif->queues[RX_NUM];
+
+	while(vq->last_seen_used != vq->vring.used->idx)
+	{
+		const size_t hdr_sz = sizeof(struct virtio_net_hdr);
+		struct vring_used_elem* used = &vq->vring.used->ring[vq->last_seen_used % vq->vring.num];
+		struct virtio_net_hdr* hdr = (struct virtio_net_hdr*) (vq->virt_buffer + used->id * VIOIF_BUFFER_SIZE);
+
+		LOG_DEBUG("vq->vring.used->idx %d, vq->vring.used->flags %d, vq->last_seen_used %d\n", vq->vring.used->idx, vq->vring.used->flags, vq->last_seen_used);
+		LOG_DEBUG("used id %d, len %d\n", used->id, used->len);
+		LOG_DEBUG("hdr len %d, flags %d\n", hdr->hdr_len, hdr->flags);
+
+		struct pbuf* p = pbuf_alloc(PBUF_RAW, used->len, PBUF_POOL);
+		if (p) {
+			uint16_t pos;
+			struct pbuf* q;
+
+#if ETH_PAD_SIZE
+			pbuf_header(p, -ETH_PAD_SIZE); /* drop the padding word */
+#endif
+			for(q=p, pos=0; q!=NULL; q=q->next) {
+				memcpy((uint8_t*) q->payload,
+					(uint8_t*) (vq->virt_buffer + hdr_sz + used->id * VIOIF_BUFFER_SIZE + pos),
+					q->len);
+				pos += q->len;
+			}
+#if ETH_PAD_SIZE
+			pbuf_header(p, ETH_PAD_SIZE); /* reclaim the padding word */
+#endif
+			LINK_STATS_INC(link.recv);
+
+			// forward packet to LwIP
+			netif->input(p, netif);
+		} else {
+			LOG_ERROR("vioif_rx_inthandler: not enough memory!\n");
+			LINK_STATS_INC(link.memerr);
+			LINK_STATS_INC(link.drop);
+			goto oom;
+		}
+
+		vq->vring.avail->ring[vq->vring.avail->idx % vq->vring.num] = used->id;
+		vq->vring.avail->idx++;
+		vq->last_seen_used++;
+	}
+
+oom:
+	vioif->polling = 0;
+	vioif_enable_interrupts(vq);
+	mb();
+}
+
+
+/* this function is called in the context of the tcpip thread or the irq handler (by using NO_SYS) */
+static void vioif_poll(void* ctx)
+{
+	vioif_rx_inthandler(mynetif);
+}
+
+static void vioif_handler(struct state* s)
+{
+	vioif_t* vioif = mynetif->state;
+
+	LOG_DEBUG("vioif: receive interrupt\n");
+
+	// reset interrupt by reading the isr port
+	uint8_t isr = inportb(vioif->iobase+VIRTIO_PCI_ISR);
+
+	// do we receiven an interrupt for this device?
+	if (!(isr & 0x01))
+		return;
+
+	// free TX queue
+	virt_queue_t* vq = &vioif->queues[1];
+
+	vioif_disable_interrupts(vq);
+	while(vq->last_seen_used != vq->vring.used->idx)
+	{
+		struct vring_used_elem* used = &vq->vring.used->ring[vq->last_seen_used % vq->vring.num];
+		LOG_DEBUG("consumed TX elements: index %u, len %u\n", used->id, used->len);
+		// mark as free
+		vq->vring.desc[used->id].len = 0;
+		vq->last_seen_used++;
+	}
+	vioif_enable_interrupts(vq);
+	mb();
+
+	// check RX qeueue
+	vq = &vioif->queues[0];
+	vioif_disable_interrupts(vq);
+	if (!vioif->polling && (vq->last_seen_used != vq->vring.used->idx))
+	{
+#if NO_SYS
+		vioif_poll(NULL);
+#else
+		if (tcpip_callback_with_block(vioif_poll, NULL, 0) == ERR_OK) {
+			vioif->polling = 1;
+		} else {
+			LOG_ERROR("rtl8139if_handler: unable to send a poll request to the tcpip thread\n");
+		}
+#endif
+	} else vioif_enable_interrupts(vq);
+	mb();
+}
+
+static int vioif_queue_setup(vioif_t* dev)
+{
+	virt_queue_t* vq;
+	uint32_t total_size;
+	unsigned int num;
+
+	for (uint32_t index=0; index<VIOIF_NUM_QUEUES; index++) {
+		vq = &dev->queues[index];
+
+	    memset(vq, 0x00, sizeof(virt_queue_t));
+
+		// determine queue size
+		outportw(dev->iobase+VIRTIO_PCI_QUEUE_SEL, index);
+		num = inportw(dev->iobase+VIRTIO_PCI_QUEUE_NUM);
+		if (!num) return -1;
+
+		LOG_INFO("vioif: queue_size %u (index %u)\n", num, index);
+
+		total_size = vring_size(num, PAGE_SIZE);
+
+		// allocate and init memory for the virtual queue
+		void* vring_base = page_alloc(total_size, VMA_READ|VMA_WRITE|VMA_CACHEABLE);
+		if (BUILTIN_EXPECT(!vring_base, 0)) {
+			LOG_INFO("Not enough memory to create queue %u\n", index);
+			return -1;
+		}
+		memset((void*)vring_base, 0x00, total_size);
+		vring_init(&vq->vring, num, vring_base, PAGE_SIZE);
+
+		if (num > QUEUE_LIMIT) {
+			vq->vring.num = num = QUEUE_LIMIT;
+			LOG_INFO("vioif: set queue limit to %u (index %u)\n", vq->vring.num, index);
+		}
+
+		vq->virt_buffer = (uint64_t) page_alloc(num*VIOIF_BUFFER_SIZE, VMA_READ|VMA_WRITE|VMA_CACHEABLE);
+		if (BUILTIN_EXPECT(!vq->virt_buffer, 0)) {
+			LOG_INFO("Not enough memory to create buffer %u\n", index);
+			return -1;
+		}
+		vq->phys_buffer = virt_to_phys(vq->virt_buffer);
+
+		for(int i=0; i<num; i++) {
+			vq->vring.desc[i].addr = vq->phys_buffer + i * VIOIF_BUFFER_SIZE;
+			if (index == RX_NUM) {
+				/* NOTE: RX queue is 0, TX queue is 1 - Virtio Std. §5.1.2  */
+				vq->vring.desc[i].len = VIOIF_BUFFER_SIZE;
+				vq->vring.desc[i].flags = VRING_DESC_F_WRITE;
+				vq->vring.avail->ring[vq->vring.avail->idx % num] = i;
+				vq->vring.avail->idx++;
+			}
+		}
+
+		// register buffer
+		outportw(dev->iobase+VIRTIO_PCI_QUEUE_SEL, index);
+		outportl(dev->iobase+VIRTIO_PCI_QUEUE_PFN, virt_to_phys((size_t) vring_base) >> PAGE_BITS);
+	}
+
+	return 0;
+}
+
+err_t vioif_init(struct netif* netif)
+{
+	static uint8_t num = 0;
+	vioif_t* vioif;
+	pci_info_t pci_info;
+	int i;
+
+	LWIP_ASSERT("netif != NULL", (netif != NULL));
+
+	for(i=0x100; i<=0x103F; i++) {
+		if ((pci_get_device_info(VENDOR_ID, i, 1, &pci_info, 1) == 0)) {
+			LOG_INFO("Found vioif (Vendor ID 0x%x, Device Id 0x%x)\n", VENDOR_ID, i);
+			break;
+		}
+	}
+
+	if (i > 0x103F)
+		return ERR_ARG;
+
+	vioif = kmalloc(sizeof(vioif_t));
+	if (!vioif) {
+		LOG_ERROR("virtioif_init: out of memory\n");
+		return ERR_MEM;
+	}
+	memset(vioif, 0x00, sizeof(vioif_t));
+
+	vioif->iomem = pci_info.base[1];
+	vioif->iobase = pci_info.base[0];
+	vioif->irq = pci_info.irq;
+	LOG_INFO("vioif uses IRQ %d and IO port 0x%x, IO men 0x%x\n", (int32_t) vioif->irq, vioif->iobase, vioif->iomem);
+
+	// reset interface
+	outportb(vioif->iobase + VIRTIO_PCI_STATUS, 0);
+	LOG_INFO("vioif status: 0x%x\n", (uint32_t) inportb(vioif->iobase + VIRTIO_PCI_STATUS));
+
+	// tell the device that we have noticed it
+	outportb(vioif->iobase + VIRTIO_PCI_STATUS, VIRTIO_CONFIG_S_ACKNOWLEDGE);
+	// tell the device that we will support it.
+	outportb(vioif->iobase + VIRTIO_PCI_STATUS, VIRTIO_CONFIG_S_ACKNOWLEDGE|VIRTIO_CONFIG_S_DRIVER);
+
+	LOG_INFO("host features 0x%x\n", inportl(vioif->iobase + VIRTIO_PCI_HOST_FEATURES));
+
+	uint32_t features = inportl(vioif->iobase + VIRTIO_PCI_HOST_FEATURES);
+	uint32_t required = (1UL << VIRTIO_NET_F_MAC) | (1UL << VIRTIO_NET_F_STATUS);
+
+	if ((features & required) != required) {
+		LOG_ERROR("Host isn't able to fulfill HermireCore's requirements\n");
+		outportb(vioif->iobase + VIRTIO_PCI_STATUS, VIRTIO_CONFIG_S_FAILED);
+		kfree(vioif);
+		return ERR_ARG;
+	}
+
+	required = features;
+	required &= ~(1UL << VIRTIO_NET_F_CTRL_VQ);
+    required &= ~(1UL << VIRTIO_NET_F_GUEST_TSO4);
+    required &= ~(1UL << VIRTIO_NET_F_GUEST_TSO6);
+    required &= ~(1UL << VIRTIO_NET_F_GUEST_UFO);
+    required &= ~(1UL << VIRTIO_RING_F_EVENT_IDX);
+    required &= ~(1UL << VIRTIO_NET_F_MRG_RXBUF);
+	required &= ~(1UL << VIRTIO_NET_F_MQ);
+
+	LOG_INFO("wanted guest features 0x%x\n", required);
+	outportl(vioif->iobase + VIRTIO_PCI_GUEST_FEATURES, required);
+	vioif->features = inportl(vioif->iobase + VIRTIO_PCI_GUEST_FEATURES);
+	LOG_INFO("current guest features 0x%x\n", vioif->features);
+
+	// tell the device that the features are OK
+	outportb(vioif->iobase + VIRTIO_PCI_STATUS, VIRTIO_CONFIG_S_ACKNOWLEDGE|VIRTIO_CONFIG_S_DRIVER|VIRTIO_CONFIG_S_FEATURES_OK);
+
+	// check if the host accept these features
+	uint8_t status = inportb(vioif->iobase + VIRTIO_PCI_STATUS);
+	if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
+		LOG_ERROR("device features are ignored: status 0x%x\n", (uint32_t) status);
+		outportb(vioif->iobase + VIRTIO_PCI_STATUS, VIRTIO_CONFIG_S_FAILED);
+		kfree(vioif);
+		return ERR_ARG;
+	}
+
+	/* hardware address length */
+	netif->hwaddr_len = ETHARP_HWADDR_LEN;
+
+	// determine the mac address of this card
+	LWIP_DEBUGF(NETIF_DEBUG, ("vioif_init: MAC address "));
+	for (uint8_t tmp8=0; tmp8<ETHARP_HWADDR_LEN; tmp8++) {
+		netif->hwaddr[tmp8] = inportb(vioif->iobase + VIRTIO_PCI_CONFIG_OFF(vioif->msix_enabled) + tmp8);
+		LWIP_DEBUGF(NETIF_DEBUG, ("%02x ", netif->hwaddr[tmp8]));
+	}
+	LWIP_DEBUGF(NETIF_DEBUG, ("\n"));
+
+	// Setup virt queues
+	if (BUILTIN_EXPECT(vioif_queue_setup(vioif) < 0, 0)) {
+		outportb(vioif->iobase + VIRTIO_PCI_STATUS, VIRTIO_CONFIG_S_FAILED);
+		kfree(vioif);
+		return ERR_ARG;
+	}
+
+	netif->state = vioif;
+	mynetif = netif;
+
+	irq_install_handler(vioif->irq+32, vioif_handler);
+
+	/*
+	 * Initialize the snmp variables and counters inside the struct netif.
+	 * The last argument should be replaced with your link speed, in units
+	 * of bits per second.
+	 */
+	NETIF_INIT_SNMP(netif, snmp_ifType_ethernet_csmacd, 1000);
+
+	/* administrative details */
+	netif->name[0] = 'e';
+	netif->name[1] = 'n';
+	netif->num = num;
+	num++;
+	/* downward functions */
+	netif->output = etharp_output;
+	netif->linkoutput = vioif_output;
+	/* set maximum transfer unit
+	 * Google Compute Platform supports only a MTU of 1460
+	 */
+	netif->mtu = 1460;
+	/* broadcast capability */
+	netif->flags |= NETIF_FLAG_BROADCAST | NETIF_FLAG_ETHARP | NETIF_FLAG_IGMP | NETIF_FLAG_LINK_UP | NETIF_FLAG_MLD6;
+#if LWIP_IPV6
+	netif->output_ip6 = ethip6_output;
+	netif_create_ip6_linklocal_address(netif, 1);
+	netif->ip6_autoconfig_enabled = 1;
+#endif
+
+	// tell the device that the drivers is initialized
+	outportb(vioif->iobase + VIRTIO_PCI_STATUS, VIRTIO_CONFIG_S_ACKNOWLEDGE|VIRTIO_CONFIG_S_DRIVER|VIRTIO_CONFIG_S_DRIVER_OK|VIRTIO_CONFIG_S_FEATURES_OK);
+
+	LOG_INFO("vioif status: 0x%x\n", (uint32_t) inportb(vioif->iobase + VIRTIO_PCI_STATUS));
+	LOG_INFO("vioif link is %s\n",
+		inportl(vioif->iobase + VIRTIO_PCI_CONFIG_OFF(vioif->msix_enabled) + ETHARP_HWADDR_LEN) & VIRTIO_NET_S_LINK_UP ? "up" : "down");
+
+	return ERR_OK;
+}
diff --git a/drivers/net/vioif.h b/drivers/net/vioif.h
new file mode 100644
index 000000000..20dc9e82d
--- /dev/null
+++ b/drivers/net/vioif.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2017, Stefan Lankes, RWTH Aachen University
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of the University nor the names of its contributors
+ *      may be used to endorse or promote products derived from this
+ *      software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __NET_VIOIF_H__
+#define __NET_VIOIF_H__
+
+#include <hermit/stddef.h>
+#include <hermit/virtio_ring.h>
+
+#define VIOIF_NUM_QUEUES	2
+
+typedef struct
+{
+	struct vring vring;
+	uint64_t virt_buffer;
+	uint64_t phys_buffer;
+	uint16_t last_seen_used;
+} virt_queue_t;
+
+/*
+ * Helper struct to hold private data used to operate your ethernet interface.
+ */
+typedef struct vioif {
+	struct eth_addr *ethaddr;
+	/* Add whatever per-interface state that is needed here. */
+	uint32_t		iomem;
+	uint32_t		iobase;
+	uint32_t		features;
+	uint8_t			msix_enabled;
+	uint8_t			irq;
+	uint8_t			polling;
+	virt_queue_t	queues[VIOIF_NUM_QUEUES];
+} vioif_t;
+
+/*
+ * Initialize the network driver for the virtio network interface
+ */
+err_t vioif_init(struct netif* netif);
+
+#endif
diff --git a/include/hermit/malloc.h b/include/hermit/malloc.h
index 8537078f0..fbdf4fa98 100644
--- a/include/hermit/malloc.h
+++ b/include/hermit/malloc.h
@@ -54,7 +54,7 @@ union buddy;
 /** @brief Buddy
  *
  * Every free memory block is stored in a linked list according to its size.
- *  We can use this free memory to store store this buddy_t union which represents
+ *  We can use this free memory to store this buddy_t union which represents
  *  this block (the buddy_t union is alligned to the front).
  *  Therefore the address of the buddy_t union is equal with the address
  *  of the underlying free memory block.
@@ -71,8 +71,6 @@ typedef union buddy {
 		uint8_t exponent;
 		/// Must be equal to BUDDY_MAGIC for a valid memory block
 		uint16_t magic;
-		/// padding to gurantee a sizeof 32Byte
-		uint8_t padding[28];
 	} prefix;
 } buddy_t;
 
@@ -84,4 +82,3 @@ void buddy_dump(void);
 #endif
 
 #endif
-
diff --git a/include/hermit/spinlock.h b/include/hermit/spinlock.h
index fbfba6519..cd44b1116 100644
--- a/include/hermit/spinlock.h
+++ b/include/hermit/spinlock.h
@@ -38,6 +38,7 @@
 #include <hermit/spinlock_types.h>
 #include <hermit/tasks_types.h>
 #include <hermit/errno.h>
+#include <hermit/tasks.h>
 #include <asm/atomic.h>
 #include <asm/processor.h>
 #include <asm/irqflags.h>
@@ -59,8 +60,8 @@ inline static int spinlock_init(spinlock_t* s) {
 	if (BUILTIN_EXPECT(!s, 0))
 		return -EINVAL;
 
-	atomic_int32_set(&s->queue, 0);
-	atomic_int32_set(&s->dequeue, 1);
+	atomic_int64_set(&s->queue, 0);
+	atomic_int64_set(&s->dequeue, 1);
 	s->owner = MAX_TASKS;
 	s->counter = 0;
 
@@ -68,7 +69,7 @@ inline static int spinlock_init(spinlock_t* s) {
 }
 
 /** @brief Destroy spinlock after use
- * @return 
+ * @return
  * - 0 on success
  * - -EINVAL (-22) on failure
  */
@@ -82,13 +83,13 @@ inline static int spinlock_destroy(spinlock_t* s) {
 	return 0;
 }
 
-/** @brief Lock spinlock at entry of critical section 
+/** @brief Lock spinlock at entry of critical section
  * @return
  * - 0 on success
  * - -EINVAL (-22) on failure
  */
 inline static int spinlock_lock(spinlock_t* s) {
-	int32_t ticket;
+	int64_t ticket;
 	task_t* curr_task;
 
 	if (BUILTIN_EXPECT(!s, 0))
@@ -100,22 +101,18 @@ inline static int spinlock_lock(spinlock_t* s) {
 		return 0;
 	}
 
-#if 1
-	ticket = atomic_int32_inc(&s->queue);
-	while(atomic_int32_read(&s->dequeue) != ticket) {
+	ticket = atomic_int64_inc(&s->queue);
+	while(atomic_int64_read(&s->dequeue) != ticket) {
 		PAUSE;
 	}
 	s->owner = curr_task->id;
 	s->counter = 1;
-#else
-	while( atomic_int32_test_and_set(&s->dequeue,0) );
-#endif
 
 	return 0;
 }
 
-/** @brief Unlock spinlock on exit of critical section 
- * @return 
+/** @brief Unlock spinlock on exit of critical section
+ * @return
  * - 0 on success
  * - -EINVAL (-22) on failure
  */
@@ -126,11 +123,7 @@ inline static int spinlock_unlock(spinlock_t* s) {
 	s->counter--;
 	if (!s->counter) {
 		s->owner = MAX_TASKS;
-#if 1
-		atomic_int32_inc(&s->dequeue);
-#else
-		atomic_int32_set(&s->dequeue,1);
-#endif
+		atomic_int64_inc(&s->dequeue);
 	}
 
 	return 0;
@@ -140,7 +133,7 @@ inline static int spinlock_unlock(spinlock_t* s) {
  *
  * Initialize each irqsave spinlock before use!
  *
- * @return 
+ * @return
  * - 0 on success
  * - -EINVAL (-22) on failure
  */
@@ -148,8 +141,8 @@ inline static int spinlock_irqsave_init(spinlock_irqsave_t* s) {
 	if (BUILTIN_EXPECT(!s, 0))
 		return -EINVAL;
 
-	atomic_int32_set(&s->queue, 0);
-	atomic_int32_set(&s->dequeue, 1);
+	atomic_int64_set(&s->queue, 0);
+	atomic_int64_set(&s->dequeue, 1);
 	s->flags = 0;
 	s->coreid = (uint32_t)-1;
 	s->counter = 0;
@@ -158,7 +151,7 @@ inline static int spinlock_irqsave_init(spinlock_irqsave_t* s) {
 }
 
 /** @brief Destroy irqsave spinlock after use
- * @return 
+ * @return
  * - 0 on success
  * - -EINVAL (-22) on failure
  */
@@ -174,13 +167,13 @@ inline static int spinlock_irqsave_destroy(spinlock_irqsave_t* s) {
 }
 
 /** @brief Lock spinlock on entry of critical section and disable interrupts
- * @return 
+ * @return
  * - 0 on success
  * - -EINVAL (-22) on failure
  */
 inline static int spinlock_irqsave_lock(spinlock_irqsave_t* s) {
+	int64_t ticket;
 	uint8_t flags;
-	int32_t ticket;
 
 	if (BUILTIN_EXPECT(!s, 0))
 		return -EINVAL;
@@ -191,8 +184,8 @@ inline static int spinlock_irqsave_lock(spinlock_irqsave_t* s) {
 		return 0;
 	}
 
-	ticket = atomic_int32_inc(&s->queue);
-	while (atomic_int32_read(&s->dequeue) != ticket) {
+	ticket = atomic_int64_inc(&s->queue);
+	while (atomic_int64_read(&s->dequeue) != ticket) {
 		PAUSE;
 	}
 
@@ -204,7 +197,7 @@ inline static int spinlock_irqsave_lock(spinlock_irqsave_t* s) {
 }
 
 /** @brief Unlock spinlock on exit of critical section and re-enable interrupts
- * @return 
+ * @return
  * - 0 on success
  * - -EINVAL (-22) on failure
  */
@@ -220,7 +213,7 @@ inline static int spinlock_irqsave_unlock(spinlock_irqsave_t* s) {
 		s->coreid = (uint32_t) -1;
 		s->flags = 0;
 
-		atomic_int32_inc(&s->dequeue);
+		atomic_int64_inc(&s->dequeue);
 
 		irq_nested_enable(flags);
 	}
diff --git a/include/hermit/spinlock_types.h b/include/hermit/spinlock_types.h
index ba61a6a2b..dd7873fe1 100644
--- a/include/hermit/spinlock_types.h
+++ b/include/hermit/spinlock_types.h
@@ -44,9 +44,9 @@ extern "C" {
 /** @brief Spinlock structure */
 typedef struct spinlock {
 	/// Internal queue
-	atomic_int32_t queue;
-	/// Internal dequeue 
-	atomic_int32_t dequeue;
+	atomic_int64_t queue;
+	/// Internal dequeue
+	atomic_int64_t dequeue;
 	/// Owner of this spinlock structure
 	tid_t owner;
 	/// Internal counter var
@@ -55,9 +55,9 @@ typedef struct spinlock {
 
 typedef struct spinlock_irqsave {
 	/// Internal queue
-	atomic_int32_t queue;
+	atomic_int64_t queue;
 	/// Internal dequeue
-	atomic_int32_t dequeue;
+	atomic_int64_t dequeue;
 	/// Core Id of the lock owner
 	uint32_t coreid;
 	/// Internal counter var
diff --git a/include/hermit/stddef.h b/include/hermit/stddef.h
index d62cab7a1..818ad5fc8 100644
--- a/include/hermit/stddef.h
+++ b/include/hermit/stddef.h
@@ -48,7 +48,7 @@ extern const size_t image_size;
 #define TIMER_FREQ	100 /* in HZ */
 #define CLOCK_TICK_RATE	1193182 /* 8254 chip's internal oscillator frequency */
 #define CACHE_LINE	64
-#define HEAP_START	(PAGE_2M_FLOOR((size_t)&kernel_start + image_size) + 4*PAGE_SIZE)
+#define HEAP_START	(PAGE_2M_CEIL((size_t)&kernel_start + image_size) + 4*PAGE_SIZE)
 #define HEAP_SIZE	(1ULL << 32)
 #define KMSG_SIZE	0x1000
 #define INT_SYSCALL	0x80
@@ -83,9 +83,6 @@ typedef unsigned int tid_t;
 struct task;
 DECLARE_PER_CORE(struct task*, current_task);
 
-/* allows fast access to the kernel stack */
-DECLARE_PER_CORE(char*, kernel_stack);
-
 #if MAX_CORES > 1
 /* allows fast access to the core id */
 DECLARE_PER_CORE(uint32_t, __core_id);
diff --git a/include/hermit/tasks.h b/include/hermit/tasks.h
index c32a120b9..9842bd7aa 100644
--- a/include/hermit/tasks.h
+++ b/include/hermit/tasks.h
@@ -166,6 +166,14 @@ void reschedule(void);
  */
 int wakeup_task(tid_t);
 
+/** @brief Wake up a core_id
+ *
+ * Wakeup core to be sure that
+ * the core isn't in halt state
+ *
+ * @param core_id Specifies the core
+ */
+void wakeup_core(uint32_t core_id);
 
 /** @brief Block current task
  *
diff --git a/include/hermit/virtio_config.h b/include/hermit/virtio_config.h
new file mode 100644
index 000000000..203ed7878
--- /dev/null
+++ b/include/hermit/virtio_config.h
@@ -0,0 +1,64 @@
+#ifndef __VIRTIO_CONFIG_H
+#define __VIRTIO_CONFIG_H
+/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
+ * anyone can use the definitions to implement compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
+
+/* Virtio devices use a standardized configuration space to define their
+ * features and pass configuration information, but each implementation can
+ * store and access that space differently. */
+#include <hermit/stddef.h>
+
+/* Status byte for guest to report progress, and synchronize features. */
+/* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */
+#define VIRTIO_CONFIG_S_ACKNOWLEDGE	1
+/* We have found a driver for the device. */
+#define VIRTIO_CONFIG_S_DRIVER		2
+/* Driver has used its parts of the config, and is happy */
+#define VIRTIO_CONFIG_S_DRIVER_OK	4
+/* Driver has finished configuring features */
+#define VIRTIO_CONFIG_S_FEATURES_OK	8
+/* We've given up on this device. */
+#define VIRTIO_CONFIG_S_FAILED		0x80
+
+/* Some virtio feature bits (currently bits 28 through 32) are reserved for the
+ * transport being used (eg. virtio_ring), the rest are per-device feature
+ * bits. */
+#define VIRTIO_TRANSPORT_F_START	28
+#define VIRTIO_TRANSPORT_F_END		33
+
+#ifndef VIRTIO_CONFIG_NO_LEGACY
+/* Do we get callbacks when the ring is completely used, even if we've
+ * suppressed them? */
+#define VIRTIO_F_NOTIFY_ON_EMPTY	24
+
+/* Can the device handle any descriptor layout? */
+#define VIRTIO_F_ANY_LAYOUT		27
+#endif /* VIRTIO_CONFIG_NO_LEGACY */
+
+/* v1.0 compliant. */
+#define VIRTIO_F_VERSION_1		32
+
+#endif /* _LINUX_VIRTIO_CONFIG_H */
diff --git a/include/hermit/virtio_ids.h b/include/hermit/virtio_ids.h
new file mode 100644
index 000000000..4ff9b038c
--- /dev/null
+++ b/include/hermit/virtio_ids.h
@@ -0,0 +1,45 @@
+#ifndef __VIRTIO_IDS_H
+#define __VIRTIO_IDS_H
+/*
+ * Virtio IDs
+ *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
+
+#define VIRTIO_ID_NET		1 /* virtio net */
+#define VIRTIO_ID_BLOCK		2 /* virtio block */
+#define VIRTIO_ID_CONSOLE	3 /* virtio console */
+#define VIRTIO_ID_RNG		4 /* virtio rng */
+#define VIRTIO_ID_BALLOON	5 /* virtio balloon */
+#define VIRTIO_ID_RPMSG		7 /* virtio remote processor messaging */
+#define VIRTIO_ID_SCSI		8 /* virtio scsi */
+#define VIRTIO_ID_9P		9 /* 9p virtio console */
+#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
+#define VIRTIO_ID_CAIF	       12 /* Virtio caif */
+#define VIRTIO_ID_GPU          16 /* virtio GPU */
+#define VIRTIO_ID_INPUT        18 /* virtio input */
+
+#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/hermit/virtio_net.h b/include/hermit/virtio_net.h
new file mode 100644
index 000000000..21d2cc52c
--- /dev/null
+++ b/include/hermit/virtio_net.h
@@ -0,0 +1,245 @@
+#ifndef __VIRTIO_NET_H
+#define __VIRTIO_NET_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
+#include <hermit/stdlib.h>
+#include <hermit/virtio_ids.h>
+#include <hermit/virtio_config.h>
+#include <hermit/virtio_types.h>
+#include <netif/etharp.h>
+
+/* The feature bitmap for virtio net */
+#define VIRTIO_NET_F_CSUM	0	/* Host handles pkts w/ partial csum */
+#define VIRTIO_NET_F_GUEST_CSUM	1	/* Guest handles pkts w/ partial csum */
+#define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Dynamic offload configuration. */
+#define VIRTIO_NET_F_MAC	5	/* Host has given MAC address. */
+#define VIRTIO_NET_F_GUEST_TSO4	7	/* Guest can handle TSOv4 in. */
+#define VIRTIO_NET_F_GUEST_TSO6	8	/* Guest can handle TSOv6 in. */
+#define VIRTIO_NET_F_GUEST_ECN	9	/* Guest can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_GUEST_UFO	10	/* Guest can handle UFO in. */
+#define VIRTIO_NET_F_HOST_TSO4	11	/* Host can handle TSOv4 in. */
+#define VIRTIO_NET_F_HOST_TSO6	12	/* Host can handle TSOv6 in. */
+#define VIRTIO_NET_F_HOST_ECN	13	/* Host can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_HOST_UFO	14	/* Host can handle UFO in. */
+#define VIRTIO_NET_F_MRG_RXBUF	15	/* Host can merge receive buffers. */
+#define VIRTIO_NET_F_STATUS	16	/* virtio_net_config.status available */
+#define VIRTIO_NET_F_CTRL_VQ	17	/* Control channel available */
+#define VIRTIO_NET_F_CTRL_RX	18	/* Control channel RX mode support */
+#define VIRTIO_NET_F_CTRL_VLAN	19	/* Control channel VLAN filtering */
+#define VIRTIO_NET_F_CTRL_RX_EXTRA 20	/* Extra RX mode control support */
+#define VIRTIO_NET_F_GUEST_ANNOUNCE 21	/* Guest can announce device on the
+					 * network */
+#define VIRTIO_NET_F_MQ	22	/* Device supports Receive Flow
+					 * Steering */
+#define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
+
+#ifndef VIRTIO_NET_NO_LEGACY
+#define VIRTIO_NET_F_GSO	6	/* Host handles pkts w/ any GSO type */
+#endif /* VIRTIO_NET_NO_LEGACY */
+
+#define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
+#define VIRTIO_NET_S_ANNOUNCE	2	/* Announcement is needed */
+
+struct virtio_net_config {
+	/* The config defining mac address (if VIRTIO_NET_F_MAC) */
+	__u8 mac[ETHARP_HWADDR_LEN];
+	/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
+	__u16 status;
+	/* Maximum number of each of transmit and receive queues;
+	 * see VIRTIO_NET_F_MQ and VIRTIO_NET_CTRL_MQ.
+	 * Legal values are between 1 and 0x8000
+	 */
+	__u16 max_virtqueue_pairs;
+} __attribute__((packed));
+
+/*
+ * This header comes first in the scatter-gather list.  If you don't
+ * specify GSO or CSUM features, you can simply ignore the header.
+ *
+ * This is bitwise-equivalent to the legacy struct virtio_net_hdr_mrg_rxbuf,
+ * only flattened.
+ */
+struct virtio_net_hdr_v1 {
+#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1	/* Use csum_start, csum_offset */
+#define VIRTIO_NET_HDR_F_DATA_VALID	2	/* Csum is valid */
+	__u8 flags;
+#define VIRTIO_NET_HDR_GSO_NONE		0	/* Not a GSO frame */
+#define VIRTIO_NET_HDR_GSO_TCPV4	1	/* GSO frame, IPv4 TCP (TSO) */
+#define VIRTIO_NET_HDR_GSO_UDP		3	/* GSO frame, IPv4 UDP (UFO) */
+#define VIRTIO_NET_HDR_GSO_TCPV6	4	/* GSO frame, IPv6 TCP */
+#define VIRTIO_NET_HDR_GSO_ECN		0x80	/* TCP has ECN set */
+	__u8 gso_type;
+	__virtio16 hdr_len;	/* Ethernet + IP + tcp/udp hdrs */
+	__virtio16 gso_size;	/* Bytes to append to hdr_len per frame */
+	__virtio16 csum_start;	/* Position to start checksumming from */
+	__virtio16 csum_offset;	/* Offset after that to place checksum */
+	__virtio16 num_buffers;	/* Number of merged rx buffers */
+};
+
+#ifndef VIRTIO_NET_NO_LEGACY
+/* This header comes first in the scatter-gather list.
+ * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must
+ * be the first element of the scatter-gather list.  If you don't
+ * specify GSO or CSUM features, you can simply ignore the header. */
+struct virtio_net_hdr {
+	/* See VIRTIO_NET_HDR_F_* */
+	__u8 flags;
+	/* See VIRTIO_NET_HDR_GSO_* */
+	__u8 gso_type;
+	__virtio16 hdr_len;		/* Ethernet + IP + tcp/udp hdrs */
+	__virtio16 gso_size;		/* Bytes to append to hdr_len per frame */
+	__virtio16 csum_start;	/* Position to start checksumming from */
+	__virtio16 csum_offset;	/* Offset after that to place checksum */
+};
+
+/* This is the version of the header to use when the MRG_RXBUF
+ * feature has been negotiated. */
+struct virtio_net_hdr_mrg_rxbuf {
+	struct virtio_net_hdr hdr;
+	__virtio16 num_buffers;	/* Number of merged rx buffers */
+};
+#endif /* ...VIRTIO_NET_NO_LEGACY */
+
+/*
+ * Control virtqueue data structures
+ *
+ * The control virtqueue expects a header in the first sg entry
+ * and an ack/status response in the last entry.  Data for the
+ * command goes in between.
+ */
+struct virtio_net_ctrl_hdr {
+	__u8 class;
+	__u8 cmd;
+} __attribute__((packed));
+
+typedef __u8 virtio_net_ctrl_ack;
+
+#define VIRTIO_NET_OK     0
+#define VIRTIO_NET_ERR    1
+
+/*
+ * Control the RX mode, ie. promisucous, allmulti, etc...
+ * All commands require an "out" sg entry containing a 1 byte
+ * state value, zero = disable, non-zero = enable.  Commands
+ * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature.
+ * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA.
+ */
+#define VIRTIO_NET_CTRL_RX    0
+ #define VIRTIO_NET_CTRL_RX_PROMISC      0
+ #define VIRTIO_NET_CTRL_RX_ALLMULTI     1
+ #define VIRTIO_NET_CTRL_RX_ALLUNI       2
+ #define VIRTIO_NET_CTRL_RX_NOMULTI      3
+ #define VIRTIO_NET_CTRL_RX_NOUNI        4
+ #define VIRTIO_NET_CTRL_RX_NOBCAST      5
+
+/*
+ * Control the MAC
+ *
+ * The MAC filter table is managed by the hypervisor, the guest should
+ * assume the size is infinite.  Filtering should be considered
+ * non-perfect, ie. based on hypervisor resources, the guest may
+ * received packets from sources not specified in the filter list.
+ *
+ * In addition to the class/cmd header, the TABLE_SET command requires
+ * two out scatterlists.  Each contains a 4 byte count of entries followed
+ * by a concatenated byte stream of the ETH_ALEN MAC addresses.  The
+ * first sg list contains unicast addresses, the second is for multicast.
+ * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature
+ * is available.
+ *
+ * The ADDR_SET command requests one out scatterlist, it contains a
+ * 6 bytes MAC address. This functionality is present if the
+ * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available.
+ */
+struct virtio_net_ctrl_mac {
+	__virtio32 entries;
+	__u8 macs[][ETHARP_HWADDR_LEN];
+} __attribute__((packed));
+
+#define VIRTIO_NET_CTRL_MAC    1
+ #define VIRTIO_NET_CTRL_MAC_TABLE_SET        0
+ #define VIRTIO_NET_CTRL_MAC_ADDR_SET         1
+
+/*
+ * Control VLAN filtering
+ *
+ * The VLAN filter table is controlled via a simple ADD/DEL interface.
+ * VLAN IDs not added may be filterd by the hypervisor.  Del is the
+ * opposite of add.  Both commands expect an out entry containing a 2
+ * byte VLAN ID.  VLAN filterting is available with the
+ * VIRTIO_NET_F_CTRL_VLAN feature bit.
+ */
+#define VIRTIO_NET_CTRL_VLAN       2
+ #define VIRTIO_NET_CTRL_VLAN_ADD             0
+ #define VIRTIO_NET_CTRL_VLAN_DEL             1
+
+/*
+ * Control link announce acknowledgement
+ *
+ * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
+ * driver has recevied the notification; device would clear the
+ * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
+ * this command.
+ */
+#define VIRTIO_NET_CTRL_ANNOUNCE       3
+ #define VIRTIO_NET_CTRL_ANNOUNCE_ACK         0
+
+/*
+ * Control Receive Flow Steering
+ *
+ * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET
+ * enables Receive Flow Steering, specifying the number of the transmit and
+ * receive queues that will be used. After the command is consumed and acked by
+ * the device, the device will not steer new packets on receive virtqueues
+ * other than specified nor read from transmit virtqueues other than specified.
+ * Accordingly, driver should not transmit new packets  on virtqueues other than
+ * specified.
+ */
+struct virtio_net_ctrl_mq {
+	__virtio16 virtqueue_pairs;
+};
+
+#define VIRTIO_NET_CTRL_MQ   4
+ #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET        0
+ #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
+ #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
+
+/*
+ * Control network offloads
+ *
+ * Reconfigures the network offloads that Guest can handle.
+ *
+ * Available with the VIRTIO_NET_F_CTRL_GUEST_OFFLOADS feature bit.
+ *
+ * Command data format matches the feature bit mask exactly.
+ *
+ * See VIRTIO_NET_F_GUEST_* for the list of offloads
+ * that can be enabled/disabled.
+ */
+#define VIRTIO_NET_CTRL_GUEST_OFFLOADS   5
+#define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET        0
+
+#endif /* _LINUX_VIRTIO_NET_H */
diff --git a/include/hermit/virtio_pci.h b/include/hermit/virtio_pci.h
new file mode 100644
index 000000000..a210bb328
--- /dev/null
+++ b/include/hermit/virtio_pci.h
@@ -0,0 +1,200 @@
+/*
+ * Virtio PCI driver
+ *
+ * This module allows virtio devices to be used over a virtual PCI device.
+ * This can be used with QEMU based VMMs like KVM or Xen.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors:
+ *  Anthony Liguori  <aliguori@us.ibm.com>
+ *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef __VIRTIO_PCI_H
+#define __VIRTIO_PCI_Hq
+
+#include <hermit/stdlib.h>
+#include <hermit/virtio_types.h>
+
+#ifndef VIRTIO_PCI_NO_LEGACY
+
+/* A 32-bit r/o bitmask of the features supported by the host */
+#define VIRTIO_PCI_HOST_FEATURES	0
+
+/* A 32-bit r/w bitmask of features activated by the guest */
+#define VIRTIO_PCI_GUEST_FEATURES	4
+
+/* A 32-bit r/w PFN for the currently selected queue */
+#define VIRTIO_PCI_QUEUE_PFN		8
+
+/* A 16-bit r/o queue size for the currently selected queue */
+#define VIRTIO_PCI_QUEUE_NUM		12
+
+/* A 16-bit r/w queue selector */
+#define VIRTIO_PCI_QUEUE_SEL		14
+
+/* A 16-bit r/w queue notifier */
+#define VIRTIO_PCI_QUEUE_NOTIFY		16
+
+/* An 8-bit device status register.  */
+#define VIRTIO_PCI_STATUS		18
+
+/* An 8-bit r/o interrupt status register.  Reading the value will return the
+ * current contents of the ISR and will also clear it.  This is effectively
+ * a read-and-acknowledge. */
+#define VIRTIO_PCI_ISR			19
+
+/* MSI-X registers: only enabled if MSI-X is enabled. */
+/* A 16-bit vector for configuration changes. */
+#define VIRTIO_MSI_CONFIG_VECTOR        20
+/* A 16-bit vector for selected queue notifications. */
+#define VIRTIO_MSI_QUEUE_VECTOR         22
+
+/* The remaining space is defined by each driver as the per-driver
+ * configuration space */
+#define VIRTIO_PCI_CONFIG_OFF(msix_enabled)	((msix_enabled) ? 24 : 20)
+/* Deprecated: please use VIRTIO_PCI_CONFIG_OFF instead */
+#define VIRTIO_PCI_CONFIG(dev)	VIRTIO_PCI_CONFIG_OFF((dev)->msix_enabled)
+
+/* Virtio ABI version, this must match exactly */
+#define VIRTIO_PCI_ABI_VERSION		0
+
+/* How many bits to shift physical queue address written to QUEUE_PFN.
+ * 12 is historical, and due to x86 page size. */
+#define VIRTIO_PCI_QUEUE_ADDR_SHIFT	12
+
+/* The alignment to use between consumer and producer parts of vring.
+ * x86 pagesize again. */
+#define VIRTIO_PCI_VRING_ALIGN		4096
+
+#endif /* VIRTIO_PCI_NO_LEGACY */
+
+/* The bit of the ISR which indicates a device configuration change. */
+#define VIRTIO_PCI_ISR_CONFIG		0x2
+/* Vector value used to disable MSI for queue */
+#define VIRTIO_MSI_NO_VECTOR            0xffff
+
+#ifndef VIRTIO_PCI_NO_MODERN
+
+/* IDs for different capabilities.  Must all exist. */
+
+/* Common configuration */
+#define VIRTIO_PCI_CAP_COMMON_CFG	1
+/* Notifications */
+#define VIRTIO_PCI_CAP_NOTIFY_CFG	2
+/* ISR access */
+#define VIRTIO_PCI_CAP_ISR_CFG		3
+/* Device specific configuration */
+#define VIRTIO_PCI_CAP_DEVICE_CFG	4
+/* PCI configuration access */
+#define VIRTIO_PCI_CAP_PCI_CFG		5
+
+/* This is the PCI capability header: */
+struct virtio_pci_cap {
+	__u8 cap_vndr;		/* Generic PCI field: PCI_CAP_ID_VNDR */
+	__u8 cap_next;		/* Generic PCI field: next ptr. */
+	__u8 cap_len;		/* Generic PCI field: capability length */
+	__u8 cfg_type;		/* Identifies the structure. */
+	__u8 bar;		/* Where to find it. */
+	__u8 padding[3];	/* Pad to full dword. */
+	__le32 offset;		/* Offset within bar. */
+	__le32 length;		/* Length of the structure, in bytes. */
+};
+
+struct virtio_pci_notify_cap {
+	struct virtio_pci_cap cap;
+	__le32 notify_off_multiplier;	/* Multiplier for queue_notify_off. */
+};
+
+/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */
+struct virtio_pci_common_cfg {
+	/* About the whole device. */
+	__le32 device_feature_select;	/* read-write */
+	__le32 device_feature;		/* read-only */
+	__le32 guest_feature_select;	/* read-write */
+	__le32 guest_feature;		/* read-write */
+	__le16 msix_config;		/* read-write */
+	__le16 num_queues;		/* read-only */
+	__u8 device_status;		/* read-write */
+	__u8 config_generation;		/* read-only */
+
+	/* About a specific virtqueue. */
+	__le16 queue_select;		/* read-write */
+	__le16 queue_size;		/* read-write, power of 2. */
+	__le16 queue_msix_vector;	/* read-write */
+	__le16 queue_enable;		/* read-write */
+	__le16 queue_notify_off;	/* read-only */
+	__le32 queue_desc_lo;		/* read-write */
+	__le32 queue_desc_hi;		/* read-write */
+	__le32 queue_avail_lo;		/* read-write */
+	__le32 queue_avail_hi;		/* read-write */
+	__le32 queue_used_lo;		/* read-write */
+	__le32 queue_used_hi;		/* read-write */
+};
+
+/* Fields in VIRTIO_PCI_CAP_PCI_CFG: */
+struct virtio_pci_cfg_cap {
+	struct virtio_pci_cap cap;
+	__u8 pci_cfg_data[4]; /* Data for BAR access. */
+};
+
+/* Macro versions of offsets for the Old Timers! */
+#define VIRTIO_PCI_CAP_VNDR		0
+#define VIRTIO_PCI_CAP_NEXT		1
+#define VIRTIO_PCI_CAP_LEN		2
+#define VIRTIO_PCI_CAP_CFG_TYPE		3
+#define VIRTIO_PCI_CAP_BAR		4
+#define VIRTIO_PCI_CAP_OFFSET		8
+#define VIRTIO_PCI_CAP_LENGTH		12
+
+#define VIRTIO_PCI_NOTIFY_CAP_MULT	16
+
+#define VIRTIO_PCI_COMMON_DFSELECT	0
+#define VIRTIO_PCI_COMMON_DF		4
+#define VIRTIO_PCI_COMMON_GFSELECT	8
+#define VIRTIO_PCI_COMMON_GF		12
+#define VIRTIO_PCI_COMMON_MSIX		16
+#define VIRTIO_PCI_COMMON_NUMQ		18
+#define VIRTIO_PCI_COMMON_STATUS	20
+#define VIRTIO_PCI_COMMON_CFGGENERATION	21
+#define VIRTIO_PCI_COMMON_Q_SELECT	22
+#define VIRTIO_PCI_COMMON_Q_SIZE	24
+#define VIRTIO_PCI_COMMON_Q_MSIX	26
+#define VIRTIO_PCI_COMMON_Q_ENABLE	28
+#define VIRTIO_PCI_COMMON_Q_NOFF	30
+#define VIRTIO_PCI_COMMON_Q_DESCLO	32
+#define VIRTIO_PCI_COMMON_Q_DESCHI	36
+#define VIRTIO_PCI_COMMON_Q_AVAILLO	40
+#define VIRTIO_PCI_COMMON_Q_AVAILHI	44
+#define VIRTIO_PCI_COMMON_Q_USEDLO	48
+#define VIRTIO_PCI_COMMON_Q_USEDHI	52
+
+#endif /* VIRTIO_PCI_NO_MODERN */
+
+#endif
diff --git a/include/hermit/virtio_ring.h b/include/hermit/virtio_ring.h
new file mode 100644
index 000000000..7680935df
--- /dev/null
+++ b/include/hermit/virtio_ring.h
@@ -0,0 +1,171 @@
+#ifndef __VIRTIO_RING_H
+#define __VIRTIO_RING_H
+/* An interface for efficient virtio implementation, currently for use by KVM
+ * and lguest, but hopefully others soon.  Do NOT change this since it will
+ * break existing servers and clients.
+ *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Copyright Rusty Russell IBM Corporation 2007. */
+#include <hermit/stdlib.h>
+#include <hermit/virtio_types.h>
+
+/* This marks a buffer as continuing via the next field. */
+#define VRING_DESC_F_NEXT	1
+/* This marks a buffer as write-only (otherwise read-only). */
+#define VRING_DESC_F_WRITE	2
+/* This means the buffer contains a list of buffer descriptors. */
+#define VRING_DESC_F_INDIRECT	4
+
+/* The Host uses this in used->flags to advise the Guest: don't kick me when
+ * you add a buffer.  It's unreliable, so it's simply an optimization.  Guest
+ * will still kick if it's out of buffers. */
+#define VRING_USED_F_NO_NOTIFY	1
+/* The Guest uses this in avail->flags to advise the Host: don't interrupt me
+ * when you consume a buffer.  It's unreliable, so it's simply an
+ * optimization.  */
+#define VRING_AVAIL_F_NO_INTERRUPT	1
+
+/* We support indirect buffer descriptors */
+#define VIRTIO_RING_F_INDIRECT_DESC	28
+
+/* The Guest publishes the used index for which it expects an interrupt
+ * at the end of the avail ring. Host should ignore the avail->flags field. */
+/* The Host publishes the avail index for which it expects a kick
+ * at the end of the used ring. Guest should ignore the used->flags field. */
+#define VIRTIO_RING_F_EVENT_IDX		29
+
+/* Virtio ring descriptors: 16 bytes.  These can chain together via "next". */
+struct vring_desc {
+	/* Address (guest-physical). */
+	__virtio64 addr;
+	/* Length. */
+	__virtio32 len;
+	/* The flags as indicated above. */
+	__virtio16 flags;
+	/* We chain unused descriptors via this, too */
+	__virtio16 next;
+};
+
+struct vring_avail {
+	__virtio16 flags;
+	__virtio16 idx;
+	__virtio16 ring[];
+};
+
+/* u32 is used here for ids for padding reasons. */
+struct vring_used_elem {
+	/* Index of start of used descriptor chain. */
+	__virtio32 id;
+	/* Total length of the descriptor chain which was used (written to) */
+	__virtio32 len;
+};
+
+struct vring_used {
+	__virtio16 flags;
+	__virtio16 idx;
+	struct vring_used_elem ring[];
+};
+
+struct vring {
+	unsigned int num;
+
+	struct vring_desc *desc;
+
+	struct vring_avail *avail;
+
+	struct vring_used *used;
+};
+
+/* Alignment requirements for vring elements.
+ * When using pre-virtio 1.0 layout, these fall out naturally.
+ */
+#define VRING_AVAIL_ALIGN_SIZE 2
+#define VRING_USED_ALIGN_SIZE 4
+#define VRING_DESC_ALIGN_SIZE 16
+
+/* The standard layout for the ring is a continuous chunk of memory which looks
+ * like this.  We assume num is a power of 2.
+ *
+ * struct vring
+ * {
+ *	// The actual descriptors (16 bytes each)
+ *	struct vring_desc desc[num];
+ *
+ *	// A ring of available descriptor heads with free-running index.
+ *	__virtio16 avail_flags;
+ *	__virtio16 avail_idx;
+ *	__virtio16 available[num];
+ *	__virtio16 used_event_idx;
+ *
+ *	// Padding to the next align boundary.
+ *	char pad[];
+ *
+ *	// A ring of used descriptor heads with free-running index.
+ *	__virtio16 used_flags;
+ *	__virtio16 used_idx;
+ *	struct vring_used_elem used[num];
+ *	__virtio16 avail_event_idx;
+ * };
+ */
+/* We publish the used event index at the end of the available ring, and vice
+ * versa. They are at the end for backwards compatibility. */
+#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num])
+#define vring_avail_event(vr) (*(__virtio16 *)&(vr)->used->ring[(vr)->num])
+
+static __inline__ void vring_init(struct vring *vr, unsigned int num, void *p,
+			      unsigned long align)
+{
+	vr->num = num;
+	vr->desc = p;
+	vr->avail = p + num*sizeof(struct vring_desc);
+	vr->used = (void *)(((uintptr_t)&vr->avail->ring[num] + sizeof(__virtio16)
+		+ align-1) & ~(align - 1));
+}
+
+static __inline__ unsigned vring_size(unsigned int num, unsigned long align)
+{
+	return ((sizeof(struct vring_desc) * num + sizeof(__virtio16) * (3 + num)
+		 + align - 1) & ~(align - 1))
+		+ sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num;
+}
+
+/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */
+/* Assuming a given event_idx value from the other side, if
+ * we have just incremented index from old to new_idx,
+ * should we trigger an event? */
+static __inline__ int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old)
+{
+	/* Note: Xen has similar logic for notification hold-off
+	 * in include/xen/interface/io/ring.h with req_event and req_prod
+	 * corresponding to event_idx + 1 and new_idx respectively.
+	 * Note also that req_event and req_prod in Xen start at 1,
+	 * event indexes in virtio start at 0. */
+	return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old);
+}
+
+#endif /* _LINUX_VIRTIO_RING_H */
diff --git a/include/hermit/virtio_types.h b/include/hermit/virtio_types.h
new file mode 100644
index 000000000..4792dea96
--- /dev/null
+++ b/include/hermit/virtio_types.h
@@ -0,0 +1,57 @@
+#ifndef __VIRTIO_TYPES_H
+#define __VIRTIO_TYPES_H
+/* Type definitions for virtio implementations.
+ *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Copyright (C) 2014 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ */
+#include <hermit/stddef.h>
+
+/*
+ * __virtio{16,32,64} have the following meaning:
+ * - __u{16,32,64} for virtio devices in legacy mode, accessed in native endian
+ * - __le{16,32,64} for standard-compliant virtio devices
+ */
+
+typedef uint8_t	__u8;
+typedef uint16_t	__u16;
+typedef uint32_t	__u32;
+typedef uint64_t	__u64;
+typedef size_t	uintptr_t;
+
+#define __bitwise__
+
+typedef __u16 __bitwise__ __virtio16;
+typedef __u32 __bitwise__ __virtio32;
+typedef __u64 __bitwise__ __virtio64;
+typedef __u32 __bitwise__ __le32;
+typedef __u16 __bitwise__ __le16;
+typedef __u8 __bitwise__ __le8;
+
+#endif /* __VIRTIO_TYPES_H */
diff --git a/kernel/main.c b/kernel/main.c
index 1b8d151be..d2fe61327 100644
--- a/kernel/main.c
+++ b/kernel/main.c
@@ -40,6 +40,7 @@
 #include <asm/irq.h>
 #include <asm/page.h>
 #include <asm/uart.h>
+#include <asm/multiboot.h>
 
 #include <lwip/init.h>
 #include <lwip/sys.h>
@@ -58,6 +59,7 @@
 #include <net/mmnif.h>
 #include <net/rtl8139.h>
 #include <net/e1000.h>
+#include <net/vioif.h>
 
 #define HERMIT_PORT	0x494E
 #define HERMIT_MAGIC	0x7E317
@@ -98,20 +100,6 @@ rcce_mpb_t* rcce_mpb = NULL;
 
 extern void signal_init();
 
-#if 0
-static int foo(void* arg)
-{
-	int i;
-
-	for(i=0; i<5; i++) {
-		LOG_INFO("hello from %s\n", (char*) arg);
-		sleep(1);
-	}
-
-	return 0;
-}
-#endif
-
 static int hermit_init(void)
 {
 	uint32_t i;
@@ -172,11 +160,15 @@ static int init_netifs(void)
 	LOG_INFO("TCP/IP initialized.\n");
 	sys_sem_free(&sem);
 
-	if (is_uhyve())
+	if (is_uhyve()) {
+		LOG_INFO("HermitCore is running on uhyve!\n");
 		return -ENODEV;
+	}
 
 	if (!is_single_kernel())
 	{
+		LOG_INFO("HermitCore is running side-by-side to Linux!\n");
+
 		/* Set network address variables */
 		IP_ADDR4(&gw, 192,168,28,1);
 		IP_ADDR4(&ipaddr, 192,168,28,isle+2);
@@ -189,16 +181,11 @@ static int init_netifs(void)
 		 *  - gw : the gateway wicht should be used
 		 *  - mmnif_init : the initialization which has to be done in order to use our interface
 		 *  - ip_input : tells him that he should use ip_input
-		 */
-#if LWIP_TCPIP_CORE_LOCKING_INPUT
-		if ((err = netifapi_netif_add(&default_netif, ip_2_ip4(&ipaddr), ip_2_ip4(&netmask), ip_2_ip4(&gw), NULL, mmnif_init, ip_input)) != ERR_OK)
-#else
-		/*
+		 *
 		 * Note: Our drivers guarantee that the input function will be called in the context of the tcpip thread.
 		 * => Therefore, we are able to use ip_input instead of tcpip_input
 		 */
 		if ((err = netifapi_netif_add(&default_netif, ip_2_ip4(&ipaddr), ip_2_ip4(&netmask), ip_2_ip4(&gw), NULL, mmnif_init, ip_input)) != ERR_OK)
-#endif
 		{
 			LOG_ERROR("Unable to add the intra network interface: err = %d\n", err);
 			return -ENODEV;
@@ -215,6 +202,8 @@ static int init_netifs(void)
 
 		/* Note: Our drivers guarantee that the input function will be called in the context of the tcpip thread.
 		 * => Therefore, we are able to use ethernet_input instead of tcpip_input */
+		if ((err = netifapi_netif_add(&default_netif, ip_2_ip4(&ipaddr), ip_2_ip4(&netmask), ip_2_ip4(&gw), NULL, vioif_init, ethernet_input)) == ERR_OK)
+			goto success;
 		if ((err = netifapi_netif_add(&default_netif, ip_2_ip4(&ipaddr), ip_2_ip4(&netmask), ip_2_ip4(&gw), NULL, rtl8139if_init, ethernet_input)) == ERR_OK)
 			goto success;
 		if ((err = netifapi_netif_add(&default_netif, ip_2_ip4(&ipaddr), ip_2_ip4(&netmask), ip_2_ip4(&gw), NULL, e1000if_init, ethernet_input)) == ERR_OK)
@@ -291,8 +280,6 @@ int smp_main(void)
 	while(atomic_int32_read(&cpu_online) < atomic_int32_read(&possible_cpus))
 		PAUSE;
 
-	//create_kernel_task(NULL, foo, "foo2", NORMAL_PRIO);
-
 	while(1) {
 		check_workqueues();
 		wait_for_task();
@@ -324,43 +311,6 @@ static int init_rcce(void)
 	return 0;
 }
 
-#if 0
-// some stress tests
-static void lock_test(void)
-{
-	uint64_t start, end;
-	int i;
-	static spinlock_t _lock = SPINLOCK_INIT;
-	static sem_t _sem = SEM_INIT(1);
-
-	start = rdtsc();
-
-	for(i=0; i<10000; i++)
-	{
-		spinlock_lock(&_lock);
-		NOP;
-		spinlock_unlock(&_lock);
-	}
-
-	end = rdtsc();
-
-	LOG_INFO("locks %lld (iterations %d)\n", end-start, i);
-
-	start = rdtsc();
-
-	for(i=0; i<10000; i++)
-	{
-		sem_wait(&_sem, 0);
-		NOP;
-		sem_post(&_sem);
-	}
-
-	end = rdtsc();
-
-	LOG_INFO("sem %lld (iterations %d)\n", end-start, i);
-}
-#endif
-
 int libc_start(int argc, char** argv, char** env);
 
 // init task => creates all other tasks an initialize the LwIP
@@ -392,17 +342,14 @@ static int initd(void* arg)
 	}
 
 	curr_task->heap->flags = VMA_HEAP|VMA_USER;
-	curr_task->heap->start = PAGE_FLOOR(heap);
-	curr_task->heap->end = PAGE_FLOOR(heap);
+	curr_task->heap->start = PAGE_CEIL(heap);
+	curr_task->heap->end = PAGE_CEIL(heap);
 
 	// region is already reserved for the heap, we have to change the
 	// property of the first page
 	vma_free(curr_task->heap->start, curr_task->heap->start+PAGE_SIZE);
 	vma_add(curr_task->heap->start, curr_task->heap->start+PAGE_SIZE, VMA_HEAP|VMA_USER);
 
-	//create_kernel_task(NULL, foo, "foo1", NORMAL_PRIO);
-	//create_kernel_task(NULL, foo, "foo2", NORMAL_PRIO);
-
 	// initialize network
 	err = init_netifs();
 
@@ -586,6 +533,8 @@ int hermit_main(void)
 	LOG_INFO("Current available memory: %zd MiB\n", atomic_int64_read(&total_available_pages) * PAGE_SIZE / (1024ULL*1024ULL));
 	LOG_INFO("Core %d is the boot processor\n", boot_processor);
 	LOG_INFO("System is able to use %d processors\n", possible_cpus);
+	if (mb_info)
+		LOG_INFO("Kernel cmdline: %s\n", (char*) (size_t) mb_info->cmdline);
 	if (hbmem_base)
 		LOG_INFO("Found high bandwidth memory at 0x%zx (size 0x%zx)\n", hbmem_base, hbmem_size);
 
diff --git a/kernel/syscall.c b/kernel/syscall.c
index 719a30944..9307704b7 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -90,7 +90,7 @@ typedef struct {
 void NORETURN sys_exit(int arg)
 {
 	if (is_uhyve()) {
-		uhyve_send(UHYVE_PORT_EXIT, (unsigned) (size_t) &arg);
+		uhyve_send(UHYVE_PORT_EXIT, (unsigned) virt_to_phys((size_t) &arg));
 	} else {
 		sys_exit_t sysargs = {__NR_exit, arg};
 
@@ -290,11 +290,11 @@ ssize_t sys_sbrk(ssize_t incr)
 		heap->end += incr;
 
 		// reserve VMA regions
-		if (PAGE_CEIL(heap->end) > PAGE_CEIL(ret)) {
+		if (PAGE_FLOOR(heap->end) > PAGE_FLOOR(ret)) {
 			// region is already reserved for the heap, we have to change the
 			// property
-			vma_free(PAGE_CEIL(ret), PAGE_FLOOR(heap->end));
-			vma_add(PAGE_CEIL(ret), PAGE_FLOOR(heap->end), VMA_HEAP|VMA_USER);
+			vma_free(PAGE_FLOOR(ret), PAGE_CEIL(heap->end));
+			vma_add(PAGE_FLOOR(ret), PAGE_CEIL(heap->end), VMA_HEAP|VMA_USER);
 		}
 	} else ret = -ENOMEM;
 
@@ -424,6 +424,56 @@ out:
 	return ret;
 }
 
+int sys_spinlock_init(spinlock_t** lock)
+{
+	int ret;
+
+	if (BUILTIN_EXPECT(!lock, 0))
+		return -EINVAL;
+
+	*lock = (spinlock_t*) kmalloc(sizeof(spinlock_t));
+	if (BUILTIN_EXPECT(!(*lock), 0))
+		return -ENOMEM;
+
+	ret = spinlock_init(*lock);
+	if (ret) {
+		kfree(*lock);
+		*lock = NULL;
+	}
+
+	return ret;
+}
+
+int sys_spinlock_destroy(spinlock_t* lock)
+{
+	int ret;
+
+	if (BUILTIN_EXPECT(!lock, 0))
+		return -EINVAL;
+
+	ret = spinlock_destroy(lock);
+	if (!ret)
+		kfree(lock);
+
+	return ret;
+}
+
+int sys_spinlock_lock(spinlock_t* lock)
+{
+	if (BUILTIN_EXPECT(!lock, 0))
+		return -EINVAL;
+
+	return spinlock_lock(lock);
+}
+
+int sys_spinlock_unlock(spinlock_t* lock)
+{
+	if (BUILTIN_EXPECT(!lock, 0))
+		return -EINVAL;
+
+	return spinlock_unlock(lock);
+}
+
 void sys_msleep(unsigned int ms)
 {
 	if (ms * TIMER_FREQ / 1000 > 0)
diff --git a/kernel/tasks.c b/kernel/tasks.c
index cd92d1010..f886fc0a2 100644
--- a/kernel/tasks.c
+++ b/kernel/tasks.c
@@ -67,7 +67,6 @@ static readyqueues_t readyqueues[1] = {[0] = {task_table+0, NULL, 0, 0, 0, {[0 .
 #endif
 
 DEFINE_PER_CORE(task_t*, current_task, task_table+0);
-DEFINE_PER_CORE(char*, kernel_stack, NULL);
 
 #if MAX_CORES > 1
 DEFINE_PER_CORE(uint32_t, __core_id, 0);
@@ -176,6 +175,10 @@ static void readyqueues_push_back(uint32_t core_id, task_t* task)
 
 	// increase the number of ready tasks
 	readyqueues[core_id].nr_tasks++;
+
+	// should we wakeup the core?
+	if (readyqueues[core_id].nr_tasks == 1)
+		wakeup_core(core_id);
 }
 
 
@@ -278,9 +281,8 @@ int multitasking_init(void)
 	task_table[0].prio = IDLE_PRIO;
 	task_table[0].stack = (char*) ((size_t)&boot_stack + core_id * KERNEL_STACK_SIZE);
 	task_table[0].ist_addr = (char*)&boot_ist;
-	set_per_core(kernel_stack, task_table[0].stack + KERNEL_STACK_SIZE - 0x10);
 	set_per_core(current_task, task_table+0);
-  arch_init_task(task_table+0);
+	arch_init_task(task_table+0);
 
 	readyqueues[core_id].idle = task_table+0;
 
@@ -303,12 +305,11 @@ int set_idle_task(void)
 			task_table[i].last_stack_pointer = NULL;
 			task_table[i].stack = (char*) ((size_t)&boot_stack + core_id * KERNEL_STACK_SIZE);
 			task_table[i].ist_addr = create_stack(KERNEL_STACK_SIZE);
-			set_per_core(kernel_stack, task_table[i].stack + KERNEL_STACK_SIZE - 0x10);
 			task_table[i].prio = IDLE_PRIO;
 			task_table[i].heap = NULL;
 			readyqueues[core_id].idle = task_table+i;
 			set_per_core(current_task, readyqueues[core_id].idle);
-      arch_init_task(task_table+i);
+			arch_init_task(task_table+i);
 			ret = 0;
 
 			break;
@@ -483,7 +484,7 @@ int clone_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio)
 			task_table[i].stack = stack;
 			task_table[i].prio = prio;
 			task_table[i].heap = curr_task->heap;
-                        task_table[i].start_tick = get_clock_tick();
+			task_table[i].start_tick = get_clock_tick();
 			task_table[i].last_tsc = 0;
 			task_table[i].parent = curr_task->id;
 			task_table[i].tls_addr = curr_task->tls_addr;
@@ -513,6 +514,9 @@ int clone_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio)
 				readyqueues[core_id].queue[prio-1].last->next = task_table+i;
 				readyqueues[core_id].queue[prio-1].last = task_table+i;
 			}
+			// should we wakeup the core?
+			if (readyqueues[core_id].nr_tasks == 1)
+				wakeup_core(core_id);
 			spinlock_irqsave_unlock(&readyqueues[core_id].lock);
  			break;
 		}
@@ -530,11 +534,6 @@ out:
 		destroy_stack(ist, KERNEL_STACK_SIZE);
 	}
 
-#if 0
-	if (core_id != CORE_ID)
-		apic_send_ipi(core_id, 121);
-#endif
-
 	return ret;
 }
 
@@ -634,11 +633,6 @@ out:
 		kfree(counter);
 	}
 
-#if 0
-	if (core_id != CORE_ID)
-		apic_send_ipi(core_id, 121);
-#endif
-
 	return ret;
 }
 
@@ -674,6 +668,8 @@ int wakeup_task(tid_t id)
 	core_id = task->last_core;
 
 	if (task->status == TASK_BLOCKED) {
+		LOG_DEBUG("wakeup task %d\n", id);
+
 		task->status = TASK_READY;
 		ret = 0;
 
@@ -711,6 +707,8 @@ int block_task(tid_t id)
 	core_id = task->last_core;
 
 	if (task->status == TASK_RUNNING) {
+		LOG_DEBUG("block task %d\n", id);
+
 		task->status = TASK_BLOCKED;
 
 		spinlock_irqsave_lock(&readyqueues[core_id].lock);
diff --git a/lwip b/lwip
index ab6d60a62..51d48fe0c 160000
--- a/lwip
+++ b/lwip
@@ -1 +1 @@
-Subproject commit ab6d60a6276788949b38c020a62d51564fc69a8e
+Subproject commit 51d48fe0c67131da346c9ef280b2019c77f6e607
diff --git a/mm/malloc.c b/mm/malloc.c
index 7bdafc67d..dac637062 100644
--- a/mm/malloc.c
+++ b/mm/malloc.c
@@ -134,13 +134,13 @@ void buddy_dump(void)
 void* palloc(size_t sz, uint32_t flags)
 {
 	size_t phyaddr, viraddr, bits;
-	uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS;
+	uint32_t npages = PAGE_CEIL(sz) >> PAGE_BITS;
 	int err;
 
 	LOG_DEBUG("palloc(%zd) (%u pages)\n", sz, npages);
 
 	// get free virtual address space
-	viraddr = vma_alloc(PAGE_FLOOR(sz), flags);
+	viraddr = vma_alloc(PAGE_CEIL(sz), flags);
 	if (BUILTIN_EXPECT(!viraddr, 0))
 		return NULL;
 
@@ -168,7 +168,7 @@ void* palloc(size_t sz, uint32_t flags)
 void* create_stack(size_t sz)
 {
 	size_t phyaddr, viraddr, bits;
-	uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS;
+	uint32_t npages = PAGE_CEIL(sz) >> PAGE_BITS;
 	int err;
 
 	LOG_DEBUG("create_stack(0x%zx) (%u pages)\n", DEFAULT_STACK_SIZE, npages);
@@ -204,7 +204,7 @@ void* create_stack(size_t sz)
 int destroy_stack(void* viraddr, size_t sz)
 {
 	size_t phyaddr;
-	uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS;
+	uint32_t npages = PAGE_CEIL(sz) >> PAGE_BITS;
 
 	LOG_DEBUG("destroy_stack(0x%zx) (size 0x%zx)\n", viraddr, DEFAULT_STACK_SIZE);
 
diff --git a/mm/vma.c b/mm/vma.c
index 88f30bdd1..c89b5a3c4 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -54,12 +54,12 @@ int vma_init(void)
 	int ret;
 
 	LOG_INFO("vma_init: reserve vma region 0x%llx - 0x%llx\n",
-		PAGE_2M_CEIL((size_t) &kernel_start),
-		PAGE_2M_FLOOR((size_t) &kernel_start + image_size));
+		PAGE_2M_FLOOR((size_t) &kernel_start),
+		PAGE_2M_CEIL((size_t) &kernel_start + image_size));
 
 	// add Kernel
-	ret  = vma_add(PAGE_2M_CEIL((size_t) &kernel_start),
-		PAGE_2M_FLOOR((size_t) &kernel_start + image_size),
+	ret  = vma_add(PAGE_2M_FLOOR((size_t) &kernel_start),
+		PAGE_2M_CEIL((size_t) &kernel_start + image_size),
 		VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE);
 	if (BUILTIN_EXPECT(ret, 0))
 		goto out;
diff --git a/test.sh b/test.sh
deleted file mode 100644
index 45f43446a..000000000
--- a/test.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-#
-# do not use this script
-# it is written only for internal tests via Travis CI
-
-FILES="usr/tests/hello usr/tests/hellof usr/tests/hello++ usr/tests/thr_hello usr/tests/pi usr/benchmarks/stream usr/benchmarks/basic usr/tests/signals"
-PROXY=/opt/hermit/bin/proxy
-
-for f in $FILES; do echo "check $f..."; timeout --kill-after=5m 5m $PROXY $f || exit 1; done
-
-# test echo server at port 8000
-HERMIT_APP_PORT=8000 $PROXY usr/tests/server &
-sleep 10
-curl http://127.0.0.1:8000/help
-sleep 1
-curl http://127.0.0.1:8000/hello
-sleep 1
-
-# kill server
-kill $!
diff --git a/tests.sh b/tests.sh
new file mode 100755
index 000000000..3cabbadbe
--- /dev/null
+++ b/tests.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+#
+# do not use this script
+# it is written only for internal tests via Travis CI
+
+TDIR=build/local_prefix/opt/hermit/x86_64-hermit/extra
+FILES="$TDIR/tests/hello $TDIR/tests/hellof $TDIR/tests/hello++ $TDIR/tests/thr_hello $TDIR/tests/pi $TDIR/benchmarks/stream $TDIR/benchmarks/basic $TDIR/tests/signals $TDIR/tests/test-malloc $TDIR/tests/test-malloc-mt"
+PROXY=build/local_prefix/opt/hermit/bin/proxy
+
+for f in $FILES; do echo "check $f..."; HERMIT_ISLE=qemu HERMIT_CPUS=1 HERMIT_KVM=0 HERMIT_VERBOSE=1 timeout --kill-after=5m 5m $PROXY $f || exit 1; done
+
+# test echo server at port 8000
+HERMIT_ISLE=qemu HERMIT_CPUS=1 HERMIT_KVM=0 HERMIT_VERBOSE=1 HERMIT_APP_PORT=8000 $PROXY $TDIR/tests/server &
+sleep 10
+curl http://127.0.0.1:8000/help
+sleep 1
+curl http://127.0.0.1:8000/hello
+sleep 1
+
+# kill server
+kill $!
diff --git a/tools/proxy.c b/tools/proxy.c
index 3ebac52a6..8c5adf472 100644
--- a/tools/proxy.c
+++ b/tools/proxy.c
@@ -104,10 +104,12 @@ static void qemu_fini(void)
 		unlink(pidname);
 
 		if (id >= 0) {
-			int status = 0;
+			int ret;
 
-			kill(id, SIGINT);
-			wait(&status);
+			do {
+				ret = kill(id, SIGINT);
+				sched_yield();
+			} while((ret < 0) && (errno == ESRCH));
 		}
 	}
 
@@ -268,7 +270,7 @@ static void wait_hermit_available(void)
 		return;
 
 	int fd = inotify_init();
-	if ( fd < 0 ) {
+	if (fd < 0) {
 		perror( "inotify_init" );
 		exit(1);
 	}
diff --git a/tools/uhyve.c b/tools/uhyve.c
index 556f020a1..da84a3276 100644
--- a/tools/uhyve.c
+++ b/tools/uhyve.c
@@ -156,6 +156,7 @@ static bool cap_tsc_deadline = false;
 static bool cap_irqchip = false;
 static bool cap_adjust_clock_stable = false;
 static bool verbose = false;
+static bool full_checkpoint = false;
 static uint32_t ncores = 1;
 static uint8_t* guest_mem = NULL;
 static uint8_t* klog = NULL;
@@ -435,6 +436,7 @@ static int load_checkpoint(uint8_t* mem, char* path)
 	size_t paddr = elf_entry;
 	int ret;
 	struct timeval begin, end;
+	uint32_t i;
 
 	if (verbose)
 		gettimeofday(&begin, NULL);
@@ -457,7 +459,8 @@ static int load_checkpoint(uint8_t* mem, char* path)
 		return ret;
 #endif
 
-	for(uint32_t i=0; i<=no_checkpoint; i++)
+	i = full_checkpoint ? no_checkpoint : 0;
+	for(; i<=no_checkpoint; i++)
 	{
 		snprintf(fname, MAX_FNAME, "checkpoint/chk%u_mem.dat", i);
 
@@ -803,8 +806,10 @@ static int vcpu_loop(void)
 					unsigned data = *((unsigned*)((size_t)run+run->io.data_offset));
 					uhyve_close_t* uhyve_close = (uhyve_close_t*) (guest_mem+data);
 
-					if (uhyve_close->ret > 2)
+					if (uhyve_close->fd > 2)
 						uhyve_close->ret = close(uhyve_close->fd);
+					else
+						uhyve_close->ret = 0;
 					break;
 				}
 
@@ -1064,12 +1069,16 @@ int uhyve_init(char *path)
 
 	FILE* f = fopen("checkpoint/chk_config.txt", "r");
 	if (f != NULL) {
+		int tmp = 0;
 		restart = true;
 
 		fscanf(f, "number of cores: %u\n", &ncores);
 		fscanf(f, "memory size: 0x%zx\n", &guest_size);
 		fscanf(f, "checkpoint number: %u\n", &no_checkpoint);
 		fscanf(f, "entry point: 0x%zx", &elf_entry);
+		fscanf(f, "full checkpoint: %d", &tmp);
+		full_checkpoint = tmp ? true : false;
+
 		if (verbose)
 			fprintf(stderr, "Restart from checkpoint %u (ncores %d, mem size 0x%zx)\n", no_checkpoint, ncores, guest_size);
 		fclose(f);
@@ -1081,6 +1090,10 @@ int uhyve_init(char *path)
 		const char* hermit_cpus = getenv("HERMIT_CPUS");
 		if (hermit_cpus)
 			ncores = (uint32_t) atoi(hermit_cpus);
+
+		const char* full_chk = getenv("HERMIT_FULLCHECKPOINT");
+		if (full_chk && (strcmp(full_chk, "0") != 0))
+			full_checkpoint = true;
 	}
 
 	vcpu_threads = (pthread_t*) calloc(ncores, sizeof(pthread_t));
@@ -1133,7 +1146,7 @@ int uhyve_init(char *path)
 		mprotect(guest_mem + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE);
 	}
 
-	char* merge = getenv("HERMIT_MERGEABLE");
+	const char* merge = getenv("HERMIT_MERGEABLE");
 	if (merge && (strcmp(merge, "0") != 0)) {
 		/*
 		 * The KSM feature is intended for applications that generate
@@ -1206,7 +1219,7 @@ int uhyve_init(char *path)
 static void timer_handler(int signum)
 {
 	struct stat st = {0};
-	const size_t flag = no_checkpoint > 0 ? PG_DIRTY : PG_ACCESSED;
+	const size_t flag = (!full_checkpoint && (no_checkpoint > 0)) ? PG_DIRTY : PG_ACCESSED;
 	char fname[MAX_FNAME];
 	struct timeval begin, end;
 
@@ -1320,7 +1333,8 @@ nextslot:
 					for(size_t l=0; l<(1 << PAGE_MAP_BITS); l++) {
 						if ((pgt[l] & (PG_PRESENT|flag)) == (PG_PRESENT|flag)) {
 							//printf("\t\t\t*pgt[%zd] 0x%zx, 4KB\n", l, pgt[l] & ~PG_XD);
-							pgt[l] = pgt[l] & ~(PG_DIRTY|PG_ACCESSED);
+							if (!full_checkpoint)
+								pgt[l] = pgt[l] & ~(PG_DIRTY|PG_ACCESSED);
 							size_t pgt_entry = pgt[l] & ~PG_PSE; // because PAT use the same bit as PSE
 							if (fwrite(&pgt_entry, sizeof(size_t), 1, f) != 1)
 								err(1, "fwrite failed");
@@ -1330,7 +1344,8 @@ nextslot:
 					}
 				} else if ((pgd[k] & flag) == flag) {
 					//printf("\t\t*pgd[%zd] 0x%zx, 2MB\n", k, pgd[k] & ~PG_XD);
-					pgd[k] = pgd[k] & ~(PG_DIRTY|PG_ACCESSED);
+					if (!full_checkpoint)
+						pgd[k] = pgd[k] & ~(PG_DIRTY|PG_ACCESSED);
 					if (fwrite(pgd+k, sizeof(size_t), 1, f) != 1)
 						err(1, "fwrite failed");
 					if (fwrite((size_t*) (guest_mem + (pgd[k] & PAGE_2M_MASK)), (1UL << PAGE_2M_BITS), 1, f) != 1)
@@ -1355,6 +1370,10 @@ nextslot:
 	fprintf(f, "memory size: 0x%zx\n", guest_size);
 	fprintf(f, "checkpoint number: %u\n", no_checkpoint);
 	fprintf(f, "entry point: 0x%zx", elf_entry);
+	if (full_checkpoint)
+		fprintf(f, "full checkpoint: 1");
+	else
+		fprintf(f, "full checkpoint: 0");
 
 	fclose(f);
 
diff --git a/usr/tests/CMakeLists.txt b/usr/tests/CMakeLists.txt
index a54926f79..e910a016c 100644
--- a/usr/tests/CMakeLists.txt
+++ b/usr/tests/CMakeLists.txt
@@ -9,6 +9,11 @@ add_executable(hello++ hello++.cpp)
 add_executable(hellof hellof.f90)
 add_executable(pi pi.go)
 
+add_executable(test-malloc test-malloc.c)
+add_executable(test-malloc-mt test-malloc-mt.c)
+target_compile_options(test-malloc-mt PRIVATE -pthread)
+target_link_libraries(test-malloc-mt pthread)
+
 add_executable(server server.go)
 target_link_libraries(server netgo)
 
@@ -16,9 +21,11 @@ add_executable(RCCE_minimum RCCE_minimum.c)
 target_link_libraries(RCCE_minimum ircce)
 
 add_executable(thr_hello thr_hello.c)
+target_compile_options(thr_hello PRIVATE -pthread)
 target_link_libraries(thr_hello pthread)
 
 add_executable(signals signals.c)
+target_compile_options(signals PRIVATE -pthread)
 target_link_libraries(signals pthread)
 
 # deployment
diff --git a/usr/tests/test-malloc-mt.c b/usr/tests/test-malloc-mt.c
new file mode 100644
index 000000000..9e35d2de0
--- /dev/null
+++ b/usr/tests/test-malloc-mt.c
@@ -0,0 +1,67 @@
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <malloc.h>
+#include <pthread.h>
+
+#ifndef NUM_THREADS
+#define NUM_THREADS     3
+#endif
+
+#ifndef NUM_ITER
+#define NUM_ITER    10000
+#endif
+
+#ifndef SIZE
+#define SIZE    16384
+#endif
+
+__thread void* buf;
+
+static void* perform_work( void* argument )
+{
+    int passed_in_value;
+
+    passed_in_value = *( ( int* )argument );
+    printf( "Hello World! It's me, thread %d with argument %d!\n", getpid(), passed_in_value );
+
+    /* optionally: insert more useful stuff here */
+    for(int i=0; i<NUM_ITER; i++)
+    {
+        buf = malloc(SIZE*i);
+        free(buf);
+    }
+    malloc_stats();
+
+    return NULL;
+}
+
+int main( int argc, char** argv )
+{
+    pthread_t threads[ NUM_THREADS ];
+    int thread_args[ NUM_THREADS ];
+    int result_code;
+    unsigned index;
+
+    // create all threads one by one
+    for( index = 0; index < NUM_THREADS; ++index )
+    {
+        thread_args[ index ] = index;
+        printf("In main: creating thread %d\n", index);
+        result_code = pthread_create( threads + index, NULL, perform_work, &thread_args[index] );
+        assert( !result_code );
+    }
+
+    // wait for each thread to complete
+    for( index = 0; index < NUM_THREADS; ++index )
+    {
+        // block until thread 'index' completes
+        result_code = pthread_join( threads[ index ], NULL );
+        assert( !result_code );
+        printf( "In main: thread %d has completed\n", index );
+    }
+
+    printf( "In main: All threads completed successfully\n" );
+    exit( EXIT_SUCCESS );
+}
diff --git a/usr/tests/test-malloc.c b/usr/tests/test-malloc.c
new file mode 100644
index 000000000..1da64da25
--- /dev/null
+++ b/usr/tests/test-malloc.c
@@ -0,0 +1,30 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <malloc.h>
+
+#ifndef NUM_ITER
+#define NUM_ITER    100000
+#endif
+
+#ifndef SIZE
+#define SIZE    16*1024
+#endif 
+
+void* buf;
+
+int main(int argc, char** argv)
+{
+    /* optionally: insert more useful stuff here */
+
+    for(int i=0; i<NUM_ITER; i++)
+    {
+        buf = malloc(SIZE*i);
+        free(buf);
+    }
+    malloc_stats();
+
+    return 0;
+}
+
+