diff --git a/CMakeLists.txt b/CMakeLists.txt index 605730565..76774f4b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,11 @@ add_subdirectory(include/hermit) add_kernel_module_sources("kernel" "kernel/*.c") add_kernel_module_sources("libkern" "libkern/*.c") add_kernel_module_sources("mm" "mm/*.c") +if("${TARGET_ARCH}" STREQUAL "x86_64-hermit") add_kernel_module_sources("drivers" "drivers/net/*.c") +else() +add_kernel_module_sources("drivers" "drivers/net/uhyve-net.c") +endif() set(LWIP_SRC lwip/src) add_kernel_module_sources("lwip" "${LWIP_SRC}/api/*.c") @@ -53,12 +57,18 @@ foreach(MODULE ${KERNEL_MODULES}) endforeach() +if("${TARGET_ARCH}" STREQUAL "aarch64-hermit") +# add arch/aarch64 and its objects +add_subdirectory(arch/aarch64) +list(APPEND KERNEL_OBJECTS + $) +else() # add arch/x86 and its objects -# TODO: make this conditional when new architectures are implemented add_subdirectory(arch/x86) list(APPEND KERNEL_OBJECTS $ $) +endif() # finally build libhermit.a add_library(hermit-bootstrap STATIC ${KERNEL_OBJECTS}) @@ -67,7 +77,11 @@ set_target_properties(hermit-bootstrap PROPERTIES # after compiling ASM sources, we need to post-process them. Adding this # dependency makes sure that this is done before hermit is linked +if("${TARGET_ARCH}" STREQUAL "aarch64-hermit") +add_dependencies(hermit-bootstrap ${AARCH64_KERNEL_TARGET}) +else() add_dependencies(hermit-bootstrap ${X86_KERNEL_TARGET}) +endif() add_custom_command( TARGET @@ -117,7 +131,7 @@ install(DIRECTORY include/hermit # needed during the compilation of the cross toolchain add_custom_target(hermit-bootstrap-install DEPENDS - hermit-bootstrap + hermit-bootstrap ${ARCH_KERNEL_TARGET} COMMAND ${CMAKE_COMMAND} -DCMAKE_INSTALL_COMPONENT=bootstrap @@ -137,6 +151,9 @@ add_custom_target(hermit ## HermitCore's own tools such as Qemu/KVM proxy build_external(tools ${HERMIT_ROOT}/tools "") + +if("${TARGET_ARCH}" STREQUAL "x86_64-hermit") + build_external(arch_x86_loader ${HERMIT_ROOT}/arch/x86/loader "") ## Intel's OpenMP runtime for x86 (libomp) @@ -157,10 +174,15 @@ add_dependencies(hermit ircce) build_external(xray ${HERMIT_ROOT}/usr/xray "") add_dependencies(hermit xray) +## end of x86 specific part +endif() + ## Tests and benchmarks +if("${TARGET_ARCH}" STREQUAL "x86_64-hermit") build_external(tests ${HERMIT_ROOT}/usr/tests hermit) build_external(benchmarks ${HERMIT_ROOT}/usr/benchmarks hermit) build_external(openmpbench ${HERMIT_ROOT}/usr/openmpbench hermit) +endif() ## relocate the local prefix to our install destination install(DIRECTORY ${LOCAL_PREFIX_DIR}/ diff --git a/arch/aarch64/CMakeLists.txt b/arch/aarch64/CMakeLists.txt new file mode 100644 index 000000000..5aa676c68 --- /dev/null +++ b/arch/aarch64/CMakeLists.txt @@ -0,0 +1,32 @@ +cmake_minimum_required(VERSION 3.7) +include(../../cmake/HermitCore.cmake) + +project(arch_aarch64_kernel C ASM) + +set_parent(AARCH64_KERNEL_TARGET ${PROJECT_NAME}) +set_parent(ARCH_KERNEL_TARGET ${PROJECT_NAME}) +set_parent(AARCH64_KERNEL_C_TARGET ${AARCH64_KERNEL_TARGET}_c) + +set_source_files_properties(kernel/*.S PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp") + +add_custom_target(${AARCH64_KERNEL_TARGET}) + +# compiling kernel code here +add_definitions(-D__KERNEL__) + +### C sources ### + +file(GLOB KERNEL_SOURCES "kernel/*.c") +file(GLOB KERNEL_S_SOURCES "kernel/*.S") +file(GLOB LIBKERN_S_SOURCES "libkern/*.S") +file(GLOB MM_SOURCES "mm/*.c") + +add_library(${AARCH64_KERNEL_C_TARGET} OBJECT + ${KERNEL_SOURCES} ${KERNEL_S_SOURCES} ${MM_SOURCES} ${LIBKERN_S_SOURCES}) + +target_include_directories(${AARCH64_KERNEL_C_TARGET} BEFORE + PUBLIC ${HERMIT_KERNEL_INCLUDES} + PRIVATE ${GENERATED_CONFIG_DIR}) + +target_compile_options(${AARCH64_KERNEL_C_TARGET} + PRIVATE ${HERMIT_KERNEL_FLAGS}) diff --git a/arch/aarch64/include/asm/atomic.h b/arch/aarch64/include/asm/atomic.h new file mode 100644 index 000000000..5f2902c53 --- /dev/null +++ b/arch/aarch64/include/asm/atomic.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @author Stefan Lankes + * @file arch/arm64/include/asm/atomic.h + * @brief Functions for atomic operations + * + * This file prepare atomic operations on int32 & int64_t variables + * which will be used in locking-mechanisms. + */ + +#ifndef __ARCH_ATOMIC_H__ +#define __ARCH_ATOMIC_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void atomic_barrier(void) +{ + asm volatile ("" : : : "memory"); +} + +/** @brief Makro for initialization of atomic vars + * + * Whenever you use an atomic variable, init it with + * this macro first.\n + * Example: atomic_int32_t myAtomicVar = ATOMIC_INIT(123); + * + * @param i The number value you want to init it with. + */ +#define ATOMIC_INIT(i) { (i) } + + +#include +#include + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/arch/aarch64/include/asm/atomic32.h b/arch/aarch64/include/asm/atomic32.h new file mode 100644 index 000000000..2f42655c5 --- /dev/null +++ b/arch/aarch64/include/asm/atomic32.h @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2017, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @author Stefan Lankes + * @file arch/arm64/include/asm/atomic.h + * @brief Functions for atomic operations + * + * This file defines functions for atomic operations on int32 variables + * which will be used in locking-mechanisms. + */ + +#ifndef __ARCH_ATOMIC32_H__ +#define __ARCH_ATOMIC32_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** @brief Standard-datatype for atomic operations + * + * It just consists of an int32_t variable internally, marked as volatile. + */ +typedef struct { volatile int32_t counter; } atomic_int32_t; + +/** @brief Atomic test and set operation for int32 vars. + * + * This function will atomically exchange the value of an atomic variable and + * return its old value. Is used in locking-operations.\n + * \n + * Intel manuals: If a memory operand is referenced, the processor's locking + * protocol is automatically implemented for the duration of the exchange + * operation, regardless of the presence or absence of the LOCK prefix. + * + * @param d Pointer to the atomic_int_32_t with the value you want to exchange + * @param v the value you want the var test for + * + * @return The old value of the atomic_int_32_t var before exchange + */ +inline static int32_t atomic_int32_test_and_set(atomic_int32_t* d, int32_t v) +{ + asm volatile( + "%=:\n\t" + "ldxr w0, %0\n\t" + "ldr w1, %1\n\t" + "str w0, %1\n\t" + "stxr w1, w1, %0\n\t" + "cbnz w1, %=b" + : "+Q"(d->counter), "+m"(v) + : + : "memory", "w0", "w1"); + return v; +} + +/** @brief Atomic addition of values to atomic_int32_t vars + * + * This function lets you add values in an atomic operation + * + * @param d Pointer to the atomit_int32_t var you want do add a value to + * @param i The value you want to increment by + * + * @return The mathematical result + */ +inline static int32_t atomic_int32_add(atomic_int32_t *d, int32_t i) +{ + asm volatile( + "ldr w1, %1\n\t" + "%=:\n\t" + "ldxr w0, %0\n\t" + "add w0, w0, w1\n\t" + "stxr w1, w0, %0\n\t" + "cbnz w1, %=b\n\t" + "str w0, %1" + : "+Q"(d->counter), "+m"(i) + : + : "memory", "w0", "w1"); + return i; +} + +/** @brief Atomic subtraction of values from atomic_int32_t vars + * + * This function lets you subtract values in an atomic operation.\n + * This function is just for convenience. It uses atomic_int32_add(d, -i) + * + * @param d Pointer to the atomic_int32_t var you want to subtract from + * @param i The value you want to subtract by + * + * @return The mathematical result + */ +inline static int32_t atomic_int32_sub(atomic_int32_t *d, int32_t i) +{ + return atomic_int32_add(d, -i); +} + +/** @brief Atomic increment by one + * + * The atomic_int32_t var will be atomically incremented by one.\n + * + * @param d The atomic_int32_t var you want to increment + */ +inline static int32_t atomic_int32_inc(atomic_int32_t* d) { + return atomic_int32_add(d, 1); +} + +/** @brief Atomic decrement by one + * + * The atomic_int32_t var will be atomically decremented by one.\n + * + * @param d The atomic_int32_t var you want to decrement + */ +inline static int32_t atomic_int32_dec(atomic_int32_t* d) { + return atomic_int32_add(d, -1); +} + +/** @brief Read out an atomic_int32_t var + * + * This function is for convenience: It looks into the atomic_int32_t struct + * and returns the internal value for you. + * + * @param d Pointer to the atomic_int32_t var you want to read out + * @return It's number value + */ +inline static int32_t atomic_int32_read(atomic_int32_t *d) { + return d->counter; +} + +/** @brief Set the value of an atomic_int32_t var + * + * This function is for convenience: It sets the internal value of + * an atomic_int32_t var for you. + * + * @param d Pointer to the atomic_int32_t var you want to set + * @param v The value to set + */ +inline static void atomic_int32_set(atomic_int32_t *d, int32_t v) { + atomic_int32_test_and_set(d, v); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/arch/aarch64/include/asm/atomic64.h b/arch/aarch64/include/asm/atomic64.h new file mode 100644 index 000000000..125891a37 --- /dev/null +++ b/arch/aarch64/include/asm/atomic64.h @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2017, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @author Stefan Lankes + * @file arch/arm64/include/asm/atomic.h + * @brief Functions for atomic operations + * + * This file defines functions for atomic operations on int64 variables + * which will be used in locking-mechanisms. + */ + +#ifndef __ARCH_ATOMIC64_H__ +#define __ARCH_ATOMIC64_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** @brief Standard-datatype for atomic operations + * + * It just consists of an int64_t variable internally, marked as volatile. + */ +typedef struct { volatile int64_t counter; } atomic_int64_t; + +/** @brief Atomic test and set operation for int64 vars. + * + * This function will atomically exchange the value of an atomic variable and + * return its old value. Is used in locking-operations.\n + * \n + * Intel manuals: If a memory operand is referenced, the processor's locking + * protocol is automatically implemented for the duration of the exchange + * operation, regardless of the presence or absence of the LOCK prefix. + * + * @param d Pointer to the atomic_int_64_t with the value you want to exchange + * @param v the value you want the var test for + * + * @return The old value of the atomic_int_64_t var before exchange + */ +inline static int64_t atomic_int64_test_and_set(atomic_int64_t* d, int64_t v) +{ + asm volatile( + "%=:\n\t" + "ldxr x0, %0\n\t" + "ldr x1, %1\n\t" + "str x0, %1\n\t" + "stxr w1, x1, %0\n\t" + "cbnz w1, %=b" + : "+Q"(d->counter), "+m"(v) + : + : "memory", "x0", "x1"); + return v; +} + +/** @brief Atomic addition of values to atomic_int64_t vars + * + * This function lets you add values in an atomic operation + * + * @param d Pointer to the atomit_int64_t var you want do add a value to + * @param i The value you want to increment by + * + * @return The mathematical result + */ +inline static int64_t atomic_int64_add(atomic_int64_t *d, int64_t i) +{ + asm volatile( + "ldr x1, %1\n\t" + "%=:\n\t" + "ldxr x0, %0\n\t" + "add x0, x0, x1\n\t" + "stxr w1, x0, %0\n\t" + "cbnz w1, %=b\n\t" + "str x0, %1" + : "+Q"(d->counter), "+m"(i) + : + : "memory", "x0", "x1"); + return i; +} + +/** @brief Atomic subtraction of values from atomic_int64_t vars + * + * This function lets you subtract values in an atomic operation.\n + * This function is just for convenience. It uses atomic_int64_add(d, -i) + * + * @param d Pointer to the atomic_int64_t var you want to subtract from + * @param i The value you want to subtract by + * + * @return The mathematical result + */ +inline static int64_t atomic_int64_sub(atomic_int64_t *d, int64_t i) +{ + return atomic_int64_add(d, -i); +} + +/** @brief Atomic increment by one + * + * The atomic_int64_t var will be atomically incremented by one.\n + * + * @param d The atomic_int64_t var you want to increment + */ +inline static int64_t atomic_int64_inc(atomic_int64_t* d) { + return atomic_int64_add(d, 1); +} + +/** @brief Atomic decrement by one + * + * The atomic_int64_t var will be atomically decremented by one.\n + * + * @param d The atomic_int64_t var you want to decrement + */ +inline static int64_t atomic_int64_dec(atomic_int64_t* d) { + return atomic_int64_add(d, -1); +} + +/** @brief Read out an atomic_int64_t var + * + * This function is for convenience: It looks into the atomic_int64_t struct + * and returns the internal value for you. + * + * @param d Pointer to the atomic_int64_t var you want to read out + * @return It's number value + */ +inline static int64_t atomic_int64_read(atomic_int64_t *d) { + return d->counter; +} + +/** @brief Set the value of an atomic_int64_t var + * + * This function is for convenience: It sets the internal value of + * an atomic_int64_t var for you. + * + * @param d Pointer to the atomic_int64_t var you want to set + * @param v The value to set + */ +inline static void atomic_int64_set(atomic_int64_t *d, int64_t v) { + atomic_int64_test_and_set(d, v); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/arch/aarch64/include/asm/io.h b/arch/aarch64/include/asm/io.h new file mode 100644 index 000000000..5c0b36205 --- /dev/null +++ b/arch/aarch64/include/asm/io.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2017, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + /** + * @author Stefan Lankes + * @file arch/aarch64/include/asm/io.h + * @brief Functions related to processor IO + * + * This file contains inline functions for processor IO operations. + */ + +#ifndef __ARCH_IO_H__ +#define __ARCH_IO_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** @brief Read a byte from an IO port + * + * @param _port The port you want to read from + * @return The value which reads out from this port + */ +inline static unsigned char inportb(unsigned short _port) +{ + return *((unsigned char*)(size_t)_port); +} + +/** @brief Read a word (2 byte) from an IO port + * + * @param _port The port you want to read from + * @return The value which reads out from this port + */ +inline static unsigned short inportw(unsigned short _port) +{ + return *((unsigned short*)(size_t)_port); +} + +/** @brief Read a double word (4 byte) from an IO port + * + * @param _port The port you want to read from + * @return The value which reads out from this port + */ +inline static unsigned int inportl(unsigned short _port) +{ + return *((unsigned int*)(size_t)_port); +} + +/** @brief Write a byte to an IO port + * + * @param _port The port you want to write to + * @param _data the 1 byte value you want to write + */ +inline static void outportb(unsigned short _port, unsigned char _data) +{ + *((unsigned char*)(size_t)_port) = _data; +} + +/** @brief Write a word (2 bytes) to an IO port + * + * @param _port The port you want to write to + * @param _data the 2 byte value you want to write + */ +inline static void outportw(unsigned short _port, unsigned short _data) +{ + *((unsigned short*)(size_t)_port) = _data; +} + +/** @brief Write a double word (4 bytes) to an IO port + * + * @param _port The port you want to write to + * @param _data the 4 byte value you want to write + */ +inline static void outportl(unsigned short _port, unsigned int _data) +{ + *((unsigned int*)(size_t)_port) = _data; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/arch/aarch64/include/asm/irq.h b/arch/aarch64/include/asm/irq.h new file mode 100644 index 000000000..6aa2fa2be --- /dev/null +++ b/arch/aarch64/include/asm/irq.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2017, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @author Stefan Lankes + * @file arch/aarch64/include/asm/irq.h + * @brief Functions related to IRQs + * + * This file contains functions and a pointer type related to interrupt requests. + */ + +#ifndef __ARCH_IRQ_H__ +#define __ARCH_IRQ_H__ + +#include +//#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* GIC related constants */ +#define GICD_BASE (1ULL << 39) +#define GICC_BASE (GICD_BASE + GICD_SIZE) +#define GIC_SIZE (GICD_SIZE + GICC_SIZE) +#define GICD_SIZE 0x010000ULL +#define GICC_SIZE 0x020000ULL + +/* interrupts */ +#define INT_PPI_VMAINT (16+9) +#define INT_PPI_HYP_TIMER (16+10) +#define INT_PPI_VIRT_TIMER (16+11) +#define INT_PPI_SPHYS_TIMER (16+13) +#define INT_PPI_NSPHYS_TIMER (16+14) + +/** @brief Pointer-type to IRQ-handling functions + * + * Whenever you write a IRQ-handling function it has to match this signature. + */ +typedef void (*irq_handler_t)(struct state *); + +/** @brief Install a custom IRQ handler for a given IRQ + * + * @param irq The desired irq + * @param handler The handler to install + */ +int irq_install_handler(unsigned int irq, irq_handler_t handler); + +/** @brief Clear the handler for a given IRQ + * + * @param irq The handler's IRQ + */ +int irq_uninstall_handler(unsigned int irq); + +/** @brief Procedure to initialize IRQ + * + * This procedure is just a small collection of calls: + * - idt_install(); + * - isrs_install(); + * - irq_install(); + * + * @return Just returns 0 in any case + */ +static inline int irq_init(void) { return 0; } + +/** @brief reset the counters of the received interrupts + */ +void reset_irq_stats(void); + +/** @brief Print the number of received interrupts + */ +void print_irq_stats(void); + +/** @brief Switch from a fix to a dynamic timer period + * + * @return 0 on success + */ +inline static int enable_dynticks(void) { return 0; } + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/arch/aarch64/include/asm/irqflags.h b/arch/aarch64/include/asm/irqflags.h new file mode 100644 index 000000000..e2038400a --- /dev/null +++ b/arch/aarch64/include/asm/irqflags.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2017, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @author Stefan Lankes + * @file arch/arm64/include/asm/irqflags.h + * @brief Functions related to IRQ configuration + * + * This file contains definitions of inline functions + * for enabling and disabling exception handling. + */ + +#ifndef __ARCH_IRQFLAGS_H__ +#define __ARCH_IRQFLAGS_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#define IRQ_FLAG_F (1<<6) +#define IRQ_FLAG_I (1<<7) +#define IRQ_FLAG_A (1<<8) +#define IRQ_FLAG_D (1<<9) + +inline static uint32_t get_daif(void) +{ + size_t flags; + asm volatile("mrs %0, daif" : "=r"(flags) :: "memory"); + return flags; +} + +/** @brief Determines, if the exception bit mask bits (DAIF) allows exceptions + * + * @return + * - 1 DAIF is cleared and allows exceptions + * - 0 DAIF is cleared and allows exceptions + */ +inline static uint8_t is_irq_enabled(void) +{ + size_t flags = get_daif(); + if (flags & (IRQ_FLAG_A|IRQ_FLAG_I|IRQ_FLAG_F)) + return 0; + return 1; +} + +/** @brief Disable IRQs + * + * This inline function just set the exception bit mask bits + */ + static inline void irq_disable(void) { + asm volatile("msr daifset, 0b111" ::: "memory"); + } + +/** @brief Enable IRQs + * + * This inline function just clear out the exception bit mask bits + */ +static inline void irq_enable(void) { + asm volatile("msr daifclr, 0b111" ::: "memory"); +} + +/** @brief Disable IRQs (nested) + * + * Disable IRQs when unsure if IRQs were enabled at all. + * This function together with irq_nested_enable can be used + * in situations when interrupts shouldn't be activated if they + * were not activated before calling this function. + * + * @return Whether IRQs had been enabled or not before disabling + */ +inline static uint8_t irq_nested_disable(void) { + uint8_t was_enabled = is_irq_enabled(); + irq_disable(); + return was_enabled; +} + +/** @brief Enable IRQs (nested) + * + * Can be used in conjunction with irq_nested_disable() to only enable + * interrupts again if they were enabled before. + * + * @param was_enabled Whether IRQs should be enabled or not + */ +inline static void irq_nested_enable(uint8_t was_enabled) { + if (was_enabled) + irq_enable(); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/arch/aarch64/include/asm/limits.h b/arch/aarch64/include/asm/limits.h new file mode 100644 index 000000000..3e2a1c142 --- /dev/null +++ b/arch/aarch64/include/asm/limits.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2010, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * author Stefan Lankes + * @file arch/arm64/include/asm/limits.h + * @brief Define constants related to numerical value-ranges of variable types + * + * This file contains define constants for the numerical + * ranges of the most typical variable types. + */ + +#ifndef __ARCH_LIMITS_H__ +#define __ARCH_LIMITS_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** Number of bits in a char */ +#define CHAR_BIT 8 + +/** Maximum value for a signed char */ +#define SCHAR_MAX 0x7f +/** Minimum value for a signed char */ +#define SCHAR_MIN (-0x7f - 1) + +/** Maximum value for an unsigned char */ +#define UCHAR_MAX 0xff + +/** Maximum value for an unsigned short */ +#define USHRT_MAX 0xffff +/** Maximum value for a short */ +#define SHRT_MAX 0x7fff +/** Minimum value for a short */ +#define SHRT_MIN (-0x7fff - 1) + +/** Maximum value for an unsigned int */ +#define UINT_MAX 0xffffffffU +/** Maximum value for an int */ +#define INT_MAX 0x7fffffff +/** Minimum value for an int */ +#define INT_MIN (-0x7fffffff - 1) + +/** Maximum value for an unsigned long */ +#define ULONG_MAX 0xffffffffUL +/** Maximum value for a long */ +#define LONG_MAX 0x7fffffffL +/** Minimum value for a long */ +#define LONG_MIN (-0x7fffffffL - 1) + +/** Maximum value for an unsigned long long */ +#define ULLONG_MAX 0xffffffffffffffffULL +/** Maximum value for a long long */ +#define LLONG_MAX 0x7fffffffffffffffLL +/** Minimum value for a long long */ +#define LLONG_MIN (-0x7fffffffffffffffLL - 1) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/arch/aarch64/include/asm/page.h b/arch/aarch64/include/asm/page.h new file mode 100644 index 000000000..f946a50cc --- /dev/null +++ b/arch/aarch64/include/asm/page.h @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2017, Stefan Lankes, RWTH Aachen University + * 2014, Steffen Vogel, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @author Stefan Lankes + * @file arch/arm64/include/asm/page.h + * @brief Paging related functions + * + * This file contains the several functions to manage the page tables + */ + +#include +#include +#include + +#ifndef __PAGE_H__ +#define __PAGE_H__ + +/// Page offset bits +#define PAGE_BITS 12 +#define PAGE_2M_BITS 21 +/// The size of a single page in bytes +#define PAGE_SIZE ( 1L << PAGE_BITS) +#define PAGE_MASK ((~0L) << PAGE_BITS) +#define PAGE_2M_MASK ((~0L) << PAGE_2M_BITS) + +/// Total operand width in bits +#define BITS 64 +/// Physical address width (maximum value) +#define PHYS_BITS 48 +/// Linear/virtual address width +#define VIRT_BITS 48 +/// Page map bits +#define PAGE_MAP_BITS 9 +/// Number of page map indirections +#define PAGE_LEVELS 4 + +/// The number of entries in a page map table +#define PAGE_MAP_ENTRIES (1L << PAGE_MAP_BITS) + +/// Align to next page +#define PAGE_CEIL(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK) +/// Align to page +#define PAGE_FLOOR(addr) ( (addr) & PAGE_MASK) + +/// Align to next 2M boundary +#define PAGE_2M_CEIL(addr) (((addr) + (1L << 21) - 1) & ((~0L) << 21)) +/// Align to 2M boundary +#define PAGE_2M_FLOOR(addr) ( (addr) & ((~0L) << 21)) +// Align the kernel end +#define KERNEL_END_CEIL(addr) (((addr) + (16L << 10)) & ~0xFFFF) + +/// Page is present +#define PG_PRESENT (1UL << 0) +/// Page is read- and writable +#define PG_RW (1UL << 1) +/// Page is addressable from userspace +#define PG_USER (1UL << 2) +/// Page write through is activated +#define PG_PWT (1UL << 3) +/// Page cache is disabled +#define PG_PCD (1UL << 4) +/// Page was recently accessed (set by CPU) +#define PG_ACCESSED (1UL << 5) +/// Page is dirty due to recent write-access (set by CPU) +#define PG_DIRTY (1UL << 6) +/// Huge page: 4MB (or 2MB, 1GB) +#define PG_PSE (1UL << 7) +/// Page attribute table +#define PG_PAT PG_PSE + +#define PG_DEVICE (1UL << 8) +#define PG_NX 0 +#define PG_GLOBAL 0 + +/// This table is a self-reference and should skipped by page_map_copy() +#define PG_SELF (1UL << 63) + +#define PT_PT 0x713UL +#define PT_MEM 0x713UL +#define PT_MEM_CD 0x70FUL +#define PT_DEVICE 0x707UL + +#define PT_SELF (1UL << 55) +#define PT_AF (1UL << 10) /* Access Flag */ +#define PT_CONTIG (1UL << 52) /* Contiguous bit */ +#define PT_S (3UL << 8) +#define PT_PXN (1UL << 53) +#define PT_UXN (1UL << 54) + +/** @brief Converts a virtual address to a physical + * + * A non mapped virtual address causes a pagefault! + * + * @param addr Virtual address to convert + * @return physical address + */ +size_t virt_to_phys(size_t vir); + +/** @brief Initialize paging subsystem + * + * This function uses the existing bootstrap page tables (boot_{pgd, pgt}) + * to map required regions (video memory, kernel, etc..). + * Before calling page_init(), the bootstrap tables contain a simple identity + * paging. Which is replaced by more specific mappings. + */ +int page_init(void); + +/** @brief Map a continuous region of pages + * + * @param viraddr Desired virtual address + * @param phyaddr Physical address to map from + * @param npages The region's size in number of pages + * @param bits Further page flags + * @param do_ipi if set, inform via IPI all other cores + * @return + */ +int __page_map(size_t viraddr, size_t phyaddr, size_t npages, size_t bits); + +/** @brief Map a continuous region of pages + * + * @param viraddr Desired virtual address + * @param phyaddr Physical address to map from + * @param npages The region's size in number of pages + * @param bits Further page flags + * @return + */ +static inline int page_map(size_t viraddr, size_t phyaddr, size_t npages, size_t bits) +{ + return __page_map(viraddr, phyaddr, npages, bits); +} + +/** @brief Unmap a continuous region of pages + * + * @param viraddr The virtual start address + * @param npages The range's size in pages + * @return + */ +int page_unmap(size_t viraddr, size_t npages); + +/** @brief Change the page permission in the page tables of the current task + * + * Applies given flags noted in the 'flags' parameter to + * the range denoted by virtual start and end addresses. + * + * @param start Range's virtual start address + * @param end Range's virtual end address + * @param flags flags to apply + * + * @return + * - 0 on success + * - -EINVAL (-22) on failure. + */ +int page_set_flags(size_t viraddr, uint32_t npages, int flags); + +/** @brief Handler to map on demand pages for the heap + * + * @return + * - 0 on success + * - -EINVAL (-22) on failure. + */ +int page_fault_handler(size_t viraddr); + +/** @brief Flush Translation Lookaside Buffer + */ +static inline void tlb_flush(void) +{ + asm volatile( + "dsb ishst\n\t" // ensure write has completed + "tlbi vmalle1is\n\t" // invalidate all TLB entries + "dsb ish\n\t" // ensure completion of TLB invalidation + "isb" // synchronize context + : + : + : "memory" + ); +} + +/** @brief Flush a specific page entry in TLB + * @param addr The (virtual) address of the page to flush + */ +static inline void tlb_flush_one_page(size_t addr) +{ + addr = addr >> PAGE_BITS; + + asm volatile( + "dsb ishst\n\t" // ensure write has completed + "tlbi vale1is, %0 \n\t" + "dsb ish\n\t" // ensure completion of TLB invalidation + "isb" // synchronize context + : + : "r"(addr) + : "memory" + ); +} + +/** @brief Flush a range of page entries in TLB + * @param addr The (virtual) start address + * @param end The (virtual) end address + */ +static inline void tlb_flush_range(size_t start, size_t end) +{ + if ((end - start) > (1024ULL << PAGE_BITS)) { + tlb_flush(); + return; + } + + start = start >> PAGE_BITS; + end = end >> PAGE_BITS; + + asm volatile ("dsb ishst" ::: "memory"); + for (size_t addr = start; addr < end; addr++) + asm("tlbi vaae1is, %0" :: "r"(addr)); + asm volatile ("dsb ish" ::: "memory"); + asm volatile ("isb" ::: "memory"); +} + +#endif diff --git a/arch/aarch64/include/asm/pci.h b/arch/aarch64/include/asm/pci.h new file mode 100644 index 000000000..2607d9ff4 --- /dev/null +++ b/arch/aarch64/include/asm/pci.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2010-2015, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +/** + * @author Stefan Lankes + * @file arch/aarch64/include/asm/pci.h + * @brief functions related to PCI initialization and information + * + * This file contains a procedure to initialize the PCI environment + * and functions to access information about specific PCI devices. + */ + +#ifndef __ARCH_PCI_H__ +#define __ARCH_PCI_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + uint32_t base[6]; + uint32_t size[6]; + uint32_t irq; +} pci_info_t; + +#define PCI_IGNORE_SUBID (0) + +/** @brief Initialize the PCI environment + */ +int pci_init(void); + +/** @brief Determine the IObase address and the interrupt number of a specific device + * + * @param vendor_id The device's vendor ID + * @param device_id The device's ID + * @param subystem_id The subsystem DI + * @param info Pointer to the record pci_info_t where among other the IObase address will be stored + * @param enable_bus_master If true, the bus mastering will be enabled. + * + * @return + * - 0 on success + * - -EINVAL (-22) on failure + */ +int pci_get_device_info(uint32_t vendor_id, uint32_t device_id, uint32_t subsystem_id, pci_info_t* info, int8_t enble_bus_master); + +/** @brief Print information of existing pci adapters + * + * @return 0 in any case + */ +int print_pci_adapters(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/arch/aarch64/include/asm/processor.h b/arch/aarch64/include/asm/processor.h new file mode 100644 index 000000000..3a1d2cb16 --- /dev/null +++ b/arch/aarch64/include/asm/processor.h @@ -0,0 +1,450 @@ +/* + * Copyright (c) 2017, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @author Stefan Lankes + * @file arch/arm64/include/asm/processor.h + * @brief CPU-specific functions + * + * This file contains structures and functions related to CPU-specific assembler commands. + */ + +#ifndef __ARCH_PROCESSOR_H__ +#define __ARCH_PROCESSOR_H__ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define ARMV8_PMCR_E (1 << 0) /* Enable all counters */ +#define ARMV8_PMCR_P (1 << 1) /* Reset all counters */ +#define ARMV8_PMCR_C (1 << 2) /* Cycle counter reset */ + +#define ARMV8_PMUSERENR_EN (1 << 0) /* EL0 access enable */ +#define ARMV8_PMUSERENR_CR (1 << 2) /* Cycle counter read enable */ +#define ARMV8_PMUSERENR_ER (1 << 3) /* Event counter read enable */ + +#define ARMV8_PMCNTENSET_EL0_EN (1 << 31) /* Performance Monitors Count Enable Set register */ + +/* interrupts */ +#define INT_PPI_VMAINT (16+9) +#define INT_PPI_HYP_TIMER (16+10) +#define INT_PPI_VIRT_TIMER (16+11) +#define INT_PPI_SPHYS_TIMER (16+13) +#define INT_PPI_NSPHYS_TIMER (16+14) + +// determine the cpu features +int cpu_detection(void); + +inline static int has_hbmem(void) { + return 0; +} + +inline static size_t get_hbmem_base(void) { + return 0; +} + +inline static size_t get_hbmem_size(void) { + return 0; +} + +static inline uint32_t get_isr(void) +{ + uint32_t status; + asm volatile("mrs %0, isr_el1" : "=r"(status)); + return status; +} + +static inline uint32_t get_sctlr(void) +{ + uint32_t status; + asm volatile("mrs %0, sctlr_el1" : "=r"(status)); + return status; +} + +/** @brief get the current exception level + * + * Helper function to get the current exception level + * + */ +static inline uint32_t get_current_el(void) +{ + uint32_t curr; + asm volatile("mrs %0, CurrentEL" : "=r"(curr)); + return (curr>>2) & 0x3; +} + +/** @brief Get thread local storage + * + * Helper function to get the TLS of the current task + */ +static inline size_t get_tls(void) { + uint64_t addr = 0; + asm volatile( + "mrs %0, tpidr_el0" + : "+r"(addr) + : + : ); + return addr; +} + +/** @brief Set thread local storage + * + * Helper function to set the TLS of the current task + */ +static inline void set_tls(size_t addr) { + asm volatile( + "msr tpidr_el0, %0" + : "=r"(addr) + : + : ); +} + +/** @brief Read id_aa64mmfr0_el1 register + * @return id_aa64mmfr0_el1's value + */ +static inline size_t read_id_aa64mmfr0(void) { + size_t val; + asm volatile("mrs %0, id_aa64mmfr0_el1" : "=r"(val) :: "memory"); + return val; +} + +/** @brief Read sctlr_el1 register + * @return sctlr_el1's value + */ +static inline size_t read_sctlr(void) { + size_t val; + asm volatile("mrs %0, sctlr_el1" : "=r"(val) :: "memory"); + return val; +} + +/** @brief Write a value into sctlr_el1 register + * @param val The value you want to write into sctlr_el1 + */ +static inline void write_sctlr(size_t val) { + asm volatile("msr sctlr_el1, %0" :: "r"(val) : "memory"); +} + +/** @brief Read tcr_el1 register + * @return tcr_el1's value + */ +static inline size_t read_tcr(void) { + size_t val; + asm volatile("mrs %0, tcr_el1" : "=r"(val) :: "memory"); + return val; +} + +/** @brief Write a value into tcr_el1 register + * @param val The value you want to write into tcr_el1 + */ +static inline void write_tcr(size_t val) { + asm volatile("msr tcr_el1, %0" :: "r"(val) : "memory"); +} + +/** @brief Read mair_el1 register + * @return mair_el1's value + */ +static inline size_t read_mair(void) { + size_t val; + asm volatile("mrs %0, mair_el1" : "=r"(val) :: "memory"); + return val; +} + +/** @brief Write a value into mair_el1 register + * @param val The value you want to write into mair_el1 + */ +static inline void write_mair(size_t val) { + asm volatile("msr mair_el1, %0" :: "r"(val) : "memory"); +} + +/** @brief Read ttbr0_el1 register + * @return ttbr0_el1's value + */ +static inline size_t read_ttbr0(void) { + size_t val; + asm volatile("mrs %0, ttbr0_el1" : "=r"(val) :: "memory"); + return val; +} + +/** @brief Write a value into ttbr0_el1 register + * @param val The value you want to write into ttbr0_el1 + */ +static inline void write_ttbr0(size_t val) { + asm volatile("msr ttbr0_el1, %0" :: "r"(val) : "memory"); +} + +/** @brief Read ttbr1_el1 register + * @return ttbr1_el1's value + */ +static inline size_t read_ttbr1(void) { + size_t val; + asm volatile("mrs %0, ttbr1_el1" : "=r"(val) :: "memory"); + return val; +} + +/** @brief Write a value into ttbr1_el1 register + * @param val The value you want to write into ttbr1_el1 + */ +static inline void write_ttbr1(size_t val) { + asm volatile("msr ttbr1_el1, %0" :: "r"(val) : "memory"); +} + +/** @brief Read far_el1 register + * @return faulting virtual address + */ +static inline size_t read_far(void) { + size_t val = 0; + asm volatile("mrs %0, far_el1" : "=r"(val) :: "memory"); + return val; +} + +/** @brief Read esr_el1 register + * @return esr_el1's (Exception Syndrome Register) value + */ +static inline size_t read_esr(void) { + size_t val; + asm volatile("mrs %0, esr_el1" : "=r"(val) :: "memory"); + return val; +} + +static inline uint64_t get_cntpct(void) +{ + uint64_t value; + asm volatile("mrs %0, cntpct_el0" : "=r" (value) :: "memory"); + return value; +} + +inline static uint64_t get_rdtsc(void) { return get_cntpct(); } + +/// A one-instruction-do-nothing +#define NOP asm volatile ("nop") +/// The PAUSE instruction provides a hint to the processor that the code sequence is a spin-wait loop. +#define PAUSE asm volatile ("yield") +/// The HALT instruction stops the processor until the next interrupt arrives +#define HALT asm volatile ("wfi") + +/// Force strict CPU ordering, serializes load and store operations. +static inline void mb(void) { asm volatile ("dmb ish" : : : "memory"); } +/// Force strict CPU ordering, serializes load operations. +static inline void rmb(void) { asm volatile ("dmb ishld" : : : "memory"); } +/// Force strict CPU ordering, serializes store operations. +static inline void wmb(void) { asm volatile ("dmb ishst" : : : "memory"); } + +/** @brief search the first most significant bit + * + * @param i source operand + * @return + * - first bit, which is set in the source operand + * - invalid value, if not bit ist set + */ +static inline size_t msb(size_t i) { + size_t ret, tmp = 63; + + if (!i) + return (sizeof(size_t)*8); + asm volatile ( + "clz %0, %1\n\t" + "sub %0, %2, %0" + : "=r"(ret) + : "r"(i), "r"(tmp) + : "cc"); + + return ret; +} + +static inline uint32_t get_cntfrq(void) +{ + uint32_t val; + asm volatile("mrs %0, cntfrq_el0" : "=r" (val) :: "memory"); + return val; +} + +static inline void set_cntfrq(uint32_t value) +{ + asm volatile("msr cntfrq_el0, %0" :: "r"(value) : "memory"); +} + +static inline uint32_t get_cntkctl(void) +{ + uint32_t value; + asm volatile("mrs %0, cntkctl_el1" : "=r" (value) :: "memory"); + return value; +} + +static inline void set_cntkctl(uint32_t value) +{ + asm volatile("msr cntkctl_el1, %0" :: "r" (value) : "memory"); +} + +static inline void set_cntp_cval(uint64_t value) +{ + asm volatile("msr cntp_cval_el0, %0" :: "r"(value) : "memory"); +} + +static inline uint64_t get_cntp_cval(void) +{ + uint64_t value; + asm volatile("mrs %0, cntp_cval_el0" : "=r" (value) :: "memory"); + return value; +} + +static inline void set_cntp_tval(uint64_t value) +{ + asm volatile("msr cntp_tval_el0, %0" :: "r"(value) : "memory"); +} + +static inline uint64_t get_cntp_tval(void) +{ + uint64_t value; + asm volatile("mrs %0, cntp_tval_el0" : "=r" (value) :: "memory"); + return value; +} + +static inline void set_cntp_ctl(uint32_t value) +{ + asm volatile("msr cntp_ctl_el0, %0" :: "r"(value) : "memory"); +} + +static inline uint32_t get_cntp_ctl(void) +{ + uint32_t value; + asm volatile("mrs %0, cntp_ctl_el0" : "=r" (value) :: "memory"); + return value; +} + +static inline uint64_t get_cntvct(void) +{ + uint64_t value; + asm volatile("mrs %0, cntvct_el0" : "=r" (value) :: "memory"); + return value; +} + +static inline void set_cntv_cval(uint64_t value) +{ + asm volatile("msr cntv_cval_el0, %0" :: "r"(value) : "memory"); +} + +static inline uint64_t get_cntv_cval(void) +{ + uint64_t value; + asm volatile("mrs %0, cntv_cval_el0" : "=r" (value) :: "memory"); + return value; +} + +static inline void set_cntv_tval(uint64_t value) +{ + asm volatile("msr cntv_tval_el0, %0" :: "r"(value) : "memory"); +} + +static inline uint64_t get_cntv_tval(void) +{ + uint64_t value; + asm volatile("mrs %0, cntv_tval_el0" : "=r" (value) :: "memory"); + return value; +} + +static inline void set_cntv_ctl(uint32_t value) +{ + asm volatile("msr cntv_ctl_el0, %0" :: "r"(value) : "memory"); +} + +static inline uint32_t get_cntv_ctl(void) +{ + uint32_t value; + asm volatile("mrs %0, cntv_ctl_el0" : "=r" (value) :: "memory"); + return value; +} + +/** @brief Init several subsystems + * + * This function calls the initialization procedures for: + * - GDT + * - APIC + * - PCI [if configured] + * + * @return 0 in any case + */ +inline static int system_init(void) +{ + cpu_detection(); + + return 0; +} + +/** @brief Detect and read out CPU frequency + * + * @return The CPU frequency in MHz + */ +uint32_t detect_cpu_frequency(void); + +/** @brief Read out CPU frequency if detected before + * + * If you did not issue the detect_cpu_frequency() function before, + * this function will call it implicitly. + * + * @return The CPU frequency in MHz + */ +uint32_t get_cpu_frequency(void); + +/** @brief Busywait an microseconds interval of time + * @param usecs The time to wait in microseconds + */ +void udelay(uint32_t usecs); + +/// Finalize the GIC initialization +int irq_post_init(void); + +// Sets up the system clock +int timer_calibration(void); + +extern atomic_int32_t cpu_online; + +/** @brief System calibration + * + * This procedure will detect the CPU frequency and calibrate the APIC timer. + * + * @return 0 in any case. + */ +static inline int system_calibration(void) +{ + irq_post_init(); + timer_calibration(); + atomic_int32_inc(&cpu_online); + irq_enable(); + + return 0; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/arch/aarch64/include/asm/stddef.h b/arch/aarch64/include/asm/stddef.h new file mode 100644 index 000000000..01d5a36e2 --- /dev/null +++ b/arch/aarch64/include/asm/stddef.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2017, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @author Stefan Lankes + * @file arch/arm64/include/asm/stddef.h + * @brief Standard datatypes + * + * This file contains typedefs for standard datatypes for numerical and character values. + */ + +#ifndef __ARCH_STDDEF_H__ +#define __ARCH_STDDEF_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#define per_core(var) (var) + +#define set_per_core(var, value) (var = value) + +#define KERNEL_SPACE (1ULL << 30) + +// A popular type for addresses +typedef unsigned long long size_t; +/// Pointer differences +typedef long long ptrdiff_t; +typedef unsigned long long uintptr_t; +#ifdef __KERNEL__ +typedef long long ssize_t; +typedef long long off_t; +#endif + +/// Unsigned 64 bit integer +typedef unsigned long uint64_t; +/// Signed 64 bit integer +typedef long int64_t; +/// Unsigned 32 bit integer +typedef unsigned int uint32_t; +/// Signed 32 bit integer +typedef int int32_t; +/// Unsigned 16 bit integer +typedef unsigned short uint16_t; +/// Signed 16 bit integer +typedef short int16_t; +/// Unsigned 8 bit integer (/char) +typedef unsigned char uint8_t; +/// Signed 8 bit integer (/char) +typedef char int8_t; +/// 16 bit wide char type +typedef unsigned short wchar_t; + +#ifndef _WINT_T +#define _WINT_T +typedef wchar_t wint_t; +#endif + +/// This defines what the stack looks like after the task context is saved +struct state { + uint64_t elr_el1; + uint64_t spsr_el1; + uint64_t res; + uint64_t x0; + uint64_t x1; + uint64_t x2; + uint64_t x3; + uint64_t x4; + uint64_t x5; + uint64_t x6; + uint64_t x7; + uint64_t x8; + uint64_t x9; + uint64_t x10; + uint64_t x11; + uint64_t x12; + uint64_t x13; + uint64_t x14; + uint64_t x15; + uint64_t x16; + uint64_t x17; + uint64_t x18; + uint64_t x19; + uint64_t x20; + uint64_t x21; + uint64_t x22; + uint64_t x23; + uint64_t x24; + uint64_t x25; + uint64_t x26; + uint64_t x27; + uint64_t x28; + uint64_t x29; + uint64_t x30; +} __attribute__((aligned(16))); + +typedef struct { + void *ss_sp; /* Stack base or pointer. */ +} stack_t; + +const int32_t is_uhyve(void); +static inline const int32_t is_single_kernel(void) { return 1; } +static inline const char* get_cmdline(void) { return 0; } +static inline int init_rcce(void) { return 0; } +static inline void print_cpu_status(int isle) {} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/arch/aarch64/include/asm/string.h b/arch/aarch64/include/asm/string.h new file mode 100644 index 000000000..746be836d --- /dev/null +++ b/arch/aarch64/include/asm/string.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2017, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_STRING_H__ +#define __ARCH_STRING_H__ + +/** + * @author Stefan Lankes + * @file include/hermit/string.h + * @brief Definition of basic string and memory opeations + */ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if HAVE_ARCH_MEMCPY +void *aarch64_memcpy(void *dest, const void *src, size_t count); + +#define memcpy(dest, src, count) aarch64_memcpy((dest), (src), (count)) +#endif + +#if HAVE_ARCH_MEMSET +void *aarch64_memset(void *dest, int val, size_t count); + +#define memset(dest, val, count) aarch64_memset((dest), (val), (count)) +#endif + +#if HAVE_ARCH_MEMCMP +int aarch64_memcmp(const void *s1, const void *s2, size_t n); + +#define memcmp(s1, s2, n) aarch64_memcmp((s1), (s2), (n)) +#endif + +#if HAVE_ARCH_STRLEN +size_t aarch64_strlen(const char *str); + +#define strlen(str) aarch64_strlen((str)) +#endif + +#if HAVE_ARCH_STRNCPY +char *aarch64_strncpy(char *dest, const char *src, size_t n); + +#define strncpy(dest, src, n) aarch64_strncpy((dest), (src), (n)) +#endif + +#if HAVE_ARCH_STRCPY +char *aarch64_strcpy(char *dest, const char *src); + +#define strcpy(dest, src) aarch64_strcpy((dest), (src)) +#endif + +#if HAVE_ARCH_STRCMP +int arm64_strcmp(const char *s1, const char *s2); + +#define strcmp(s1, s2) arm64_strcmp((s1), (s2)) +#endif + +#if HAVE_ARCH_STRNCMP +int arm64_strncmp(const char *s1, const char *s2, size_t n); + +#define strncmp(s1, s2) arm64_strncmp((s1), (s2), (n)) +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/arch/aarch64/include/asm/tasks.h b/arch/aarch64/include/asm/tasks.h new file mode 100644 index 000000000..8c0e17bcc --- /dev/null +++ b/arch/aarch64/include/asm/tasks.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2010-2017, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @author Stefan Lankes + * @file arch/arm64/include/asm/tasks.h + * @brief Task related structure definitions + * + * This file contains the task_t structure definition + * and task state define constants + */ + +#ifndef __ASM_TASKS_H__ +#define __ASM_TASKS_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** @brief Setup a default frame for a new task + * + * @param task Pointer to the task structure + * @param ep The entry point for code execution + * @param arg Arguments list pointer for the task's stack + * @param core_id Id of the core, which is firstly used by the task + * @return + * - 0 on success + * - -EINVAL (-22) on failure + */ +int create_default_frame(task_t* task, entry_point_t ep, void* arg, uint32_t core_id); + +/** @brief Jump to user code + * + * This function runs the user code after stopping it just as if + * it was a return from a procedure. + * + * @return 0 in any case + */ +/*static inline int jump_to_user_code(size_t ep, size_t stack) +{ + // Create a pseudo interrupt on the stack and return to user function + asm volatile ("push %0; push %1; push $0x41202; push %2; push %3; iretq" :: "r"(0x33ULL), "r"(stack), "r"(0x2bULL), "r"(ep) : "memory"); + + return 0; +}*/ + +/** @brief Architecture dependent initialize routine + */ +static inline void arch_init_task(task_t* task) {} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/arch/aarch64/include/asm/tasks_types.h b/arch/aarch64/include/asm/tasks_types.h new file mode 100644 index 000000000..d1b0f7264 --- /dev/null +++ b/arch/aarch64/include/asm/tasks_types.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2010-2017, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @author Stefan Lankes + * @file arch/arm64/include/asm/tasks_types.h + * @brief Task related structure definitions + * + * This file contains the task_t structure definition + * and task state define constants + */ + +#ifndef __ASM_TASKS_TYPES_H__ +#define __ASM_TASKS_TYPES_H__ + +#include +//#include + +#ifdef __cplusplus +extern "C" { +#endif + +union fpu_state { + int dummy_value; +}; + +#define FPU_STATE_INIT + +/*typedef void (*handle_fpu_state)(union fpu_state* state); + +extern handle_fpu_state save_fpu_state; +extern handle_fpu_state restore_fpu_state; +extern handle_fpu_state fpu_init;*/ + +static inline void save_fpu_state(union fpu_state* state){} +static inline void restore_fpu_state(union fpu_state* state){} +static inline void fpu_init(union fpu_state* state){} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/arch/aarch64/include/asm/time.h b/arch/aarch64/include/asm/time.h new file mode 100644 index 000000000..1fadd56f5 --- /dev/null +++ b/arch/aarch64/include/asm/time.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2017, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @author Stefan Lankes + * @file arch/arm64/include/asm/string.h + * @brief Time related functions + */ + +#ifndef __ARCH_TIME_H__ +#define __ARCH_TIME_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int timer_deadline(uint32_t t); + +void timer_disable(void); + +int timer_is_running(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/arch/aarch64/include/asm/uart.h b/arch/aarch64/include/asm/uart.h new file mode 100644 index 000000000..8e29316d7 --- /dev/null +++ b/arch/aarch64/include/asm/uart.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2014-2017, Stefan Lankes, Daniel Krebs, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_UART_H__ +#define __ARCH_UART_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** @brief Initialize UART output + * + * @return Returns 0 on success + */ +int uart_init(void); + +/** @brief Initialize UART output without a device check + * + * @return Returns 0 on success + */ +int uart_early_init(char*); + +/** @brief Simple string output on a serial device. + * + * If you want a new line you will have to "\\n". + * + * @return Length of output in bytes + */ +int uart_puts(const char *text); + +/** @brief Simple character output on a serial device. + * + * @return The original input character casted to int + */ +int uart_putchar(unsigned char c); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/arch/aarch64/include/asm/uhyve.h b/arch/aarch64/include/asm/uhyve.h new file mode 100644 index 000000000..b0da32ed5 --- /dev/null +++ b/arch/aarch64/include/asm/uhyve.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @author Stefan Lankes + * @file arch/arm64/include/asm/uhyve.h + * @brief interface to our machine monitor + */ + +#ifndef __ARCH_UHYVE_H__ +#define __ARCH_UHYVE_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +inline static void uhyve_send(unsigned short _port, unsigned int _data) +{ + *((unsigned int*)(size_t)_port) = _data; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/arch/aarch64/kernel/Makefile b/arch/aarch64/kernel/Makefile new file mode 100644 index 000000000..572168512 --- /dev/null +++ b/arch/aarch64/kernel/Makefile @@ -0,0 +1,5 @@ +C_source := uart.c timer.c tasks.c signal.c #irq.c idt.c isrs.c gdt.c processor.c timer.c tasks.c apic.c pci.c vga.c uart.c syscall.c +ASM_source := entry.S +MODULE := arch_arm64_kernel + +include $(TOPDIR)/Makefile.inc diff --git a/arch/aarch64/kernel/entry.S b/arch/aarch64/kernel/entry.S new file mode 100644 index 000000000..520516587 --- /dev/null +++ b/arch/aarch64/kernel/entry.S @@ -0,0 +1,555 @@ +/* + * Copyright (c) 2017, Stefan Lankes, RWTH Aachen University, Germany + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This is the kernel's entry point, which is derived from Xen's Mini-OS. + */ + +#include + +#define MAIR(attr, mt) ((attr) << ((mt) * 8)) + +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1 << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +/* + * Memory types available. + */ +#define MT_DEVICE_nGnRnE 0 +#define MT_DEVICE_nGnRE 1 +#define MT_DEVICE_GRE 2 +#define MT_NORMAL_NC 3 +#define MT_NORMAL 4 + +/* + * TCR flags + */ +#define TCR_TxSZ(x) ((((64) - (x)) << 16) | (((64) - (x)) << 0)) +#define TCR_IRGN_WBWA (((1) << 8) | ((1) << 24)) +#define TCR_ORGN_WBWA (((1) << 10) | ((1) << 26)) +#define TCR_SHARED (((3) << 12) | ((3) << 28)) +#define TCR_TBI0 ((1) << 37) +#define TCR_TBI1 ((1) << 38) +#define TCR_ASID16 ((1) << 36) +#define TCR_TG1_16K ((1) << 30) +#define TCR_TG1_4K ((0) << 30) +#define TCR_FLAGS (TCR_IRGN_WBWA | TCR_ORGN_WBWA | TCR_SHARED) + +/* Number of virtual address bits for 4KB page */ +#define VA_BITS 48 + +#define PT_DEVICE 0x707 +#define PT_PT 0x713 +#define PT_MEM 0x713 +#define PT_MEM_CD 0x70F +#define PT_SELF ((1) << 55) + +#define ALIGN .align 4 + +#define END(name) \ + .size name, .-name + +#define ENDPROC(name) \ + .type name, @function; \ + END(name) + +#define ENTRY(name) \ + .globl name; \ + ALIGN; \ + name: + +.section .mboot + +.global _start +_start: +b start64 + +.align 8 +.global base +base: .quad 0 +.global limit +limit: .quad 0 +.global cpu_freq +cpu_freq: .dword 0 +.global boot_processor +boot_processor: .dword 0 +.global cpu_online +cpu_online: .dword 0 +.global possible_cpus +possible_cpus: .dword 0 +.global current_boot_id +current_boot_id: .dword 0 +isle: .dword -1 +.global possible_isles +possible_isles: .dword 1 +.global uhyve +uhyve: .dword 0 +.global single_kernel +single_kernel: .dword 1 +.global image_size +image_size: .quad 0 + +.global hcip +hcip: .byte 10,0,5,2 +.global hcgateway +hcgateway: .byte 10,0,5,1 +.global hcmask +hcmask: .byte 255,255,255,0 +.global host_logical_addr +host_logical_addr: .quad 0 + +start64: + //mrs x0, s3_1_c15_c3_0 // Read EL1 Configuration Base Address Register + + /* disable interrupts */ + msr daifset, #0b111 + + /* store x5=dtb */ + /*adrp x1, dtb + str x5, [x1, #:lo12:dtb]*/ + + /* + * Disable the MMU. We may have entered the kernel with it on and + * will need to update the tables later. If this has been set up + * with anything other than a VA == PA map then this will fail, + * but in this case the code to find where we are running from + * would have also failed. + */ + dsb sy + mrs x2, sctlr_el1 + bic x2, x2, #0x1 + msr sctlr_el1, x2 + isb + + /* Calculate where we are */ + //bl _calc_offset + + /* Setup CPU for turnning the MMU on. */ + bl _setup_cpu + + /* Setup the initial page table. */ + bl _setup_pgtable + + /* Load TTBRx */ + mov x0, xzr + msr ttbr1_el1, x0 + ldr x0, =l0_pgtable + msr ttbr0_el1, x0 + isb + + /* Set exception table */ + ldr x0, =vector_table + msr vbar_el1, x0 + + /* Turning on MMU */ + dsb sy + + /* + * Prepare system control register (SCTRL) + * + * + * UCI [26] Enables EL0 access in AArch64 for DC CVAU, DC CIVAC, + DC CVAC and IC IVAU instructions + * EE [25] Explicit data accesses at EL1 and Stage 1 translation + table walks at EL1 & EL0 are little-endian + * EOE [24] Explicit data accesses at EL0 are little-endian + * WXN [19] Regions with write permission are not forced to XN + * nTWE [18] WFE instructions are executed as normal + * nTWI [16] WFI instructions are executed as normal + * UCT [15] Enables EL0 access in AArch64 to the CTR_EL0 register + * DZE [14] Execution of the DC ZVA instruction is allowed at EL0 + * I [12] Instruction caches enabled at EL0 and EL1 + * UMA [9] Disable access to the interrupt masks from EL0 + * SED [8] The SETEND instruction is available + * ITD [7] The IT instruction functionality is available + * THEE [6] ThumbEE is disabled + * CP15BEN [5] CP15 barrier operations disabled + * SA0 [4] Stack Alignment check for EL0 enabled + * SA [3] Stack Alignment check enabled + * C [2] Data and unified enabled + * A [1] Alignment fault checking disabled + * M [0] MMU enable + */ + ldr x0, =0x4D5D91D + msr sctlr_el1, x0 + + ldr x0, =mmu_on + br x0 + +mmu_on: + /* Pointer to stack base */ + ldr x1, =(boot_stack+KERNEL_STACK_SIZE-0x10) + mov sp, x1 + + /* Test core ID */ + mrs x0, mpidr_el1 + + bl hermit_main + + /* halt */ +halt: + wfe + b halt + +.section .text + +_setup_cpu: + ic iallu + tlbi vmalle1is + dsb ish + + /* + * Setup memory attribute type tables + * + * Memory regioin attributes for LPAE: + * + * n = AttrIndx[2:0] + * n MAIR + * DEVICE_nGnRnE 000 00000000 (0x00) + * DEVICE_nGnRE 001 00000100 (0x04) + * DEVICE_GRE 010 00001100 (0x0c) + * NORMAL_NC 011 01000100 (0x44) + * NORMAL 100 11111111 (0xff) + */ + ldr x0, =(MAIR(0x00, MT_DEVICE_nGnRnE) | \ + MAIR(0x04, MT_DEVICE_nGnRE) | \ + MAIR(0x0c, MT_DEVICE_GRE) | \ + MAIR(0x44, MT_NORMAL_NC) | \ + MAIR(0xff, MT_NORMAL)) + msr mair_el1, x0 + + /* + * Setup translation control register (TCR) + */ + + // determine physical address size + mrs x0, id_aa64mmfr0_el1 + and x0, x0, 0xF + lsl x0, x0, 32 + + ldr x1, =(TCR_TxSZ(VA_BITS) | TCR_TG1_4K | TCR_FLAGS ) + orr x0, x0, x1 + + mrs x1, id_aa64mmfr0_el1 + bfi x0, x1, #32, #3 + msr tcr_el1, x0 + + /* + * Enable FP/ASIMD in Architectural Feature Access Control Register, + */ + mov x0, #3 << 20 + msr cpacr_el1, x0 + + /* + * Reset debug controll register + */ + msr mdscr_el1, xzr + + ret + +_setup_pgtable: + ldr x0, =kernel_end + /* align to a 16KByte boundary */ + add x0, x0, 0x10000 + mov x1, ~0xFFFF + and x0, x0, x1 + mov x3, x0 // x3 := address of the first allocated page table + + // create page table entries for the 1st GB + ldr x1, =l2_pgtable + add x1, x1, 8 + add x0, x0, PT_PT + mov x2, xzr +1: + str x0, [x1], 8 + add x0, x0, PAGE_SIZE + add x2, x2, 1 + cmp x2, 511 + b.lo 1b + + // create identity mapping + ldr x5, =kernel_start + mov x6, x5 + // Create contiguous bit + mov x7, 1 + lsl x7, x7, 52 + add x7, x7, PT_MEM + orr x5, x5, x7 + mov x0, x3 // map until the first page table + mov x7, xzr +2: + str x5, [x3], 8 + add x5, x5, PAGE_SIZE + add x6, x6, PAGE_SIZE + add x7, x7, 1 + cmp x6, x0 + b.lo 2b + + /* Clear rest of the boot page tables */ +3: + stp xzr, xzr, [x3], 16 + stp xzr, xzr, [x3], 16 + stp xzr, xzr, [x3], 16 + stp xzr, xzr, [x3], 16 + add x7, x7, 8 + cmp x7, 511*PAGE_SIZE + b.lo 3b + + ret + +//_calc_offset: +// ldr x22, =_start // x0 := vaddr(_start) +// adr x21, _start // x21 := paddr(_start) +// sub x22, x22, x21 // x22 := phys-offset (vaddr - paddr) +// ret + +/* + * There are no PUSH/POP instruction in ARMv8. + * Use STR and LDR for stack accesses. + */ +.macro push, xreg +str \xreg, [sp, #-8]! +.endm + +.macro pop, xreg +ldr \xreg, [sp], #8 +.endm + +.macro trap_entry, el + stp x29, x30, [sp, #-16]! + stp x27, x28, [sp, #-16]! + stp x25, x26, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x19, x20, [sp, #-16]! + stp x17, x18, [sp, #-16]! + stp x15, x16, [sp, #-16]! + stp x13, x14, [sp, #-16]! + stp x11, x12, [sp, #-16]! + stp x9, x10, [sp, #-16]! + stp x7, x8, [sp, #-16]! + stp x5, x6, [sp, #-16]! + stp x3, x4, [sp, #-16]! + stp x1, x2, [sp, #-16]! + str x0, [sp, #-16]! + + mrs x22, elr_el1 + mrs x23, spsr_el1 + stp x22, x23, [sp, #-16]! +.endm + +.macro trap_exit, el + ldp x22, x23, [sp], #16 + msr elr_el1, x22 + msr spsr_el1, x23 + + ldr x0, [sp], #16 + ldp x1, x2, [sp], #16 + ldp x3, x4, [sp], #16 + ldp x5, x6, [sp], #16 + ldp x7, x8, [sp], #16 + ldp x9, x10, [sp], #16 + ldp x11, x12, [sp], #16 + ldp x13, x14, [sp], #16 + ldp x15, x16, [sp], #16 + ldp x17, x18, [sp], #16 + ldp x19, x20, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x25, x26, [sp], #16 + ldp x27, x28, [sp], #16 + ldp x29, x30, [sp], #16 +.endm + + +/* + * SYNC & IRQ exception handler. + */ +.align 6 +el1_sync: + trap_entry 1 + mov x0, sp + bl do_sync + trap_exit 1 + eret +ENDPROC(el1_sync) + +.align 6 +el1_irq: + trap_entry 1 + mov x0, sp + bl do_irq + cmp x0, 0 + b.eq 1f + + mov x1, sp + str x1, [x0] /* store old sp */ + bl get_current_stack /* get new sp */ + mov sp, x0 + + /* call cleanup code */ + bl finish_task_switch + +1: trap_exit 1 + eret +ENDPROC(el1_irq) + +.align 6 +el1_fiq: + trap_entry 1 + mov x0, sp + bl do_fiq + cmp x0, 0 + b.eq 1f + + mov x1, sp + str x1, [x0] /* store old sp */ + bl get_current_stack /* get new sp */ + mov sp, x0 + + /* call cleanup code */ + bl finish_task_switch + +1: trap_exit 1 + eret +ENDPROC(el1_fiq) + +.align 6 +el1_error: + trap_entry 1 + mov x0, sp + bl do_error + trap_exit 1 + eret +ENDPROC(el1_error) + +/* + * Bad Abort numbers + */ +#define BAD_SYNC 0 +#define BAD_IRQ 1 +#define BAD_FIQ 2 +#define BAD_ERROR 3 + +/* + * Exception vector entry + */ +.macro ventry label +.align 7 +b \label +.endm + +.macro invalid, reason +mov x0, sp +mov x1, #\reason +b do_bad_mode +.endm + +el0_sync_invalid: + invalid BAD_SYNC +ENDPROC(el0_sync_invalid) + +el0_irq_invalid: + invalid BAD_IRQ +ENDPROC(el0_irq_invalid) + +el0_fiq_invalid: + invalid BAD_FIQ +ENDPROC(el0_fiq_invalid) + +el0_error_invalid: + invalid BAD_ERROR +ENDPROC(el0_error_invalid) + +el1_sync_invalid: + invalid BAD_SYNC +ENDPROC(el1_sync_invalid) + +el1_irq_invalid: + invalid BAD_IRQ +ENDPROC(el1_irq_invalid) + +el1_fiq_invalid: + invalid BAD_FIQ +ENDPROC(el1_fiq_invalid) + +el1_error_invalid: + invalid BAD_ERROR +ENDPROC(el1_error_invalid) + +.align 11 +ENTRY(vector_table) +/* Current EL with SP0 */ +ventry el1_sync_invalid // Synchronous EL1t +ventry el1_irq_invalid // IRQ EL1t +ventry el1_fiq_invalid // FIQ EL1t +ventry el1_error_invalid // Error EL1t + +/* Current EL with SPx */ +ventry el1_sync // Synchronous EL1h +ventry el1_irq // IRQ EL1h +ventry el1_fiq // FIQ EL1h +ventry el1_error // Error EL1h + +/* Lower EL using AArch64 */ +ventry el0_sync_invalid // Synchronous 64-bit EL0 +ventry el0_irq_invalid // IRQ 64-bit EL0 +ventry el0_fiq_invalid // FIQ 64-bit EL0 +ventry el0_error_invalid // Error 64-bit EL0 + +/* Lower EL using AArch32 */ +ventry el0_sync_invalid // Synchronous 32-bit EL0 +ventry el0_irq_invalid // IRQ 32-bit EL0 +ventry el0_fiq_invalid // FIQ 32-bit EL0 +ventry el0_error_invalid // Error 32-bit EL0 +END(vector_table) + +.section .data +.global boot_stack +.balign 0x10 +boot_stack: .skip KERNEL_STACK_SIZE +.global boot_ist +boot_ist: .skip KERNEL_STACK_SIZE + + +.global l0_pgtable +.align 12 +l0_pgtable: + .quad l1_pgtable + PT_PT + .space 510*8, 0 + .quad l0_pgtable + PT_PT + PT_SELF +l1_pgtable: + .quad l2_pgtable + PT_PT + .space 511*8, 0 +l2_pgtable: + .quad l3_pgtable + PT_PT + .space 511*8, 0 +l3_pgtable: + .quad 0x00000000 + PT_MEM_CD // map II ports + .quad 0x09000000 + PT_MEM_CD // map QEMU's uart port + .space 510*8, 0 diff --git a/arch/aarch64/kernel/irq.c b/arch/aarch64/kernel/irq.c new file mode 100644 index 000000000..554aeb243 --- /dev/null +++ b/arch/aarch64/kernel/irq.c @@ -0,0 +1,379 @@ +/* + * Copyright (c) 2014-2018, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* GIC related constants */ +#define GICR_BASE 0 + +/* GIC Distributor interface register offsets that are common to GICv3 & GICv2 */ +#define GICD_CTLR 0x0 +#define GICD_TYPER 0x4 +#define GICD_IIDR 0x8 +#define GICD_IGROUPR 0x80 +#define GICD_ISENABLER 0x100 +#define GICD_ICENABLER 0x180 +#define GICD_ISPENDR 0x200 +#define GICD_ICPENDR 0x280 +#define GICD_ISACTIVER 0x300 +#define GICD_ICACTIVER 0x380 +#define GICD_IPRIORITYR 0x400 +#define GICD_ITARGETSR 0x800 +#define GICD_ICFGR 0xc00 +#define GICD_NSACR 0xe00 +#define GICD_SGIR 0xF00 + +#define GICD_CTLR_ENABLEGRP0 (1 << 0) +#define GICD_CTLR_ENABLEGRP1 (1 << 1) + +/* Physical CPU Interface registers */ +#define GICC_CTLR 0x0 +#define GICC_PMR 0x4 +#define GICC_BPR 0x8 +#define GICC_IAR 0xC +#define GICC_EOIR 0x10 +#define GICC_RPR 0x14 +#define GICC_HPPIR 0x18 +#define GICC_AHPPIR 0x28 +#define GICC_IIDR 0xFC +#define GICC_DIR 0x1000 +#define GICC_PRIODROP GICC_EOIR + +#define GICC_CTLR_ENABLEGRP0 (1 << 0) +#define GICC_CTLR_ENABLEGRP1 (1 << 1) +#define GICC_CTLR_FIQEN (1 << 3) +#define GICC_CTLR_ACKCTL (1 << 2) + +#define MAX_HANDLERS 256 +#define RESCHED_INT 1 + +/** @brief IRQ handle pointers +* +* This array is actually an array of function pointers. We use +* this to handle custom IRQ handlers for a given IRQ +*/ +static irq_handler_t irq_routines[MAX_HANDLERS] = {[0 ... MAX_HANDLERS-1] = NULL}; + +static spinlock_irqsave_t mask_lock = SPINLOCK_IRQSAVE_INIT; + +static size_t gicd_base = GICD_BASE; +static size_t gicc_base = GICC_BASE; +static uint32_t nr_irqs = 0; + +static inline uint32_t gicd_read(size_t off) +{ + uint32_t value; + asm volatile("ldar %w0, [%1]" : "=r"(value) : "r"(gicd_base + off) : "memory"); + return value; +} + +static inline void gicd_write(size_t off, uint32_t value) +{ + asm volatile("str %w0, [%1]" : : "rZ" (value), "r" (gicd_base + off) : "memory"); +} + +static inline uint32_t gicc_read(size_t off) +{ + uint32_t value; + asm volatile("ldar %w0, [%1]" : "=r"(value) : "r"(gicc_base + off) : "memory"); + return value; +} + +static inline void gicc_write(size_t off, uint32_t value) +{ + asm volatile("str %w0, [%1]" : : "rZ" (value), "r" (gicc_base + off) : "memory"); +} + +static void gicc_enable(void) +{ + // Global enable signalling of interrupt from the cpu interface + gicc_write(GICC_CTLR, GICC_CTLR_ENABLEGRP0 | GICC_CTLR_ENABLEGRP1 | GICC_CTLR_FIQEN | GICC_CTLR_ACKCTL); +} + +static void gicc_disable(void) +{ + // Global disable signalling of interrupt from the cpu interface + gicc_write(GICC_CTLR, 0); +} + +static void gicd_enable(void) +{ + // Global enable forwarding interrupts from distributor to cpu interface + gicd_write(GICD_CTLR, GICD_CTLR_ENABLEGRP0 | GICD_CTLR_ENABLEGRP1); +} + +static void gicd_disable(void) +{ + // Global disable forwarding interrupts from distributor to cpu interface + gicd_write(GICD_CTLR, 0); +} + +static void gicc_set_priority(uint32_t priority) +{ + gicc_write(GICC_PMR, priority & 0xFF); +} + +static void gic_set_enable(uint32_t vector, uint8_t enable) +{ + if (enable) { + uint32_t regoff = GICD_ISENABLER + 4 * (vector / 32); + gicd_write(regoff, gicd_read(regoff) | (1 << (vector % 32))); + } else { + uint32_t regoff = GICD_ICENABLER + 4 * (vector / 32); + gicd_write(regoff, gicd_read(regoff) | (1 << (vector % 32))); + } +} + +static int unmask_interrupt(uint32_t vector) +{ + if (vector >= nr_irqs) + return -EINVAL; + + spinlock_irqsave_lock(&mask_lock); + gic_set_enable(vector, 1); + spinlock_irqsave_unlock(&mask_lock); + + return 0; +} + +static int mask_interrupt(uint32_t vector) +{ + if (vector >= nr_irqs) + return -EINVAL; + + spinlock_irqsave_lock(&mask_lock); + gic_set_enable(vector, 0); + spinlock_irqsave_unlock(&mask_lock); + + return 0; +} + +/* This installs a custom IRQ handler for the given IRQ */ +int irq_install_handler(unsigned int irq, irq_handler_t handler) +{ + if (irq >= MAX_HANDLERS) + return -EINVAL; + + irq_routines[irq] = handler; + + unmask_interrupt(irq); + + return 0; +} + +/* This clears the handler for a given IRQ */ +int irq_uninstall_handler(unsigned int irq) +{ + if (irq >= MAX_HANDLERS) + return -EINVAL; + + irq_routines[irq] = NULL; + + mask_interrupt(irq); + + return 0; +} + +int irq_post_init(void) +{ + int ret; + + LOG_INFO("Enable interrupt handling\n"); + + ret = vma_add(GICD_BASE, GICD_BASE+GIC_SIZE, VMA_READ|VMA_WRITE); + if (BUILTIN_EXPECT(ret, 0)) + goto oom; + + ret = page_map(gicd_base, GICD_BASE, GIC_SIZE >> PAGE_BITS, PG_GLOBAL|PG_RW|PG_DEVICE); + if (BUILTIN_EXPECT(ret, 0)) + goto oom; + + LOG_INFO("Map gicd 0x%zx at 0x%zx\n", GICD_BASE, gicd_base); + LOG_INFO("Map gicc 0x%zx at 0x%zx\n", GICC_BASE, gicc_base); + + gicc_disable(); + gicd_disable(); + + nr_irqs = ((gicd_read(GICD_TYPER) & 0x1f) + 1) * 32; + LOG_INFO("Number of supported interrupts %u\n", nr_irqs); + + gicd_write(GICD_ICENABLER, 0xffff0000); + gicd_write(GICD_ISENABLER, 0x0000ffff); + gicd_write(GICD_ICPENDR, 0xffffffff); + gicd_write(GICD_IGROUPR, 0); + + for (uint32_t i = 0; i < 32 / 4; i++) { + gicd_write(GICD_IPRIORITYR + i * 4, 0x80808080); + } + + for (uint32_t i = 32/16; i < nr_irqs / 16; i++) { + gicd_write(GICD_NSACR + i * 4, 0xffffffff); + } + + for (uint32_t i = 32/32; i < nr_irqs / 32; i++) { + gicd_write(GICD_ICENABLER + i * 4, 0xffffffff); + gicd_write(GICD_ICPENDR + i * 4, 0xffffffff); + gicd_write(GICD_IGROUPR + i * 4, 0); + } + + for (uint32_t i = 32/4; i < nr_irqs / 4; i++) { + gicd_write(GICD_ITARGETSR + i * 4, 0); + gicd_write(GICD_IPRIORITYR + i * 4, 0x80808080); + } + + gicd_enable(); + + gicc_set_priority(0xF0); + gicc_enable(); + + unmask_interrupt(RESCHED_INT); + + return 0; + +oom: + LOG_ERROR("Failed to intialize interrupt controller\n"); + + return ret; +} + +void do_sync(void *regs) +{ + uint32_t iar = gicc_read(GICC_IAR); + uint32_t esr = read_esr(); + uint32_t ec = esr >> 26; + uint32_t iss = esr & 0xFFFFFF; + + /* data abort from lower or current level */ + if ((ec == 0b100100) || (ec == 0b100101)) { + /* check if value in far_el1 is valid */ + if (!(iss & (1 << 10))) { + /* read far_el1 register, which holds the faulting virtual address */ + uint64_t far = read_far(); + + if (page_fault_handler(far) == 0) + return; + + LOG_ERROR("Unable to handle page fault at 0x%llx\n", far); + + // send EOI + gicc_write(GICC_EOIR, iar); + //do_abort(); + sys_exit(-EFAULT); + } else { + LOG_ERROR("Unknown exception\n"); + } + } else { + LOG_ERROR("Unsupported exception class\n"); + } + + while (1) { + HALT; + } +} + +size_t** do_fiq(void *regs) +{ + size_t** ret = NULL; + uint32_t iar = gicc_read(GICC_IAR); + uint32_t vector = iar & 0x3ff; + + //LOG_INFO("Receive fiq %d\n", vector); + + if (vector < MAX_HANDLERS && irq_routines[vector]) { + (irq_routines[vector])(regs); + } else if (vector != RESCHED_INT) { + LOG_INFO("Unable to handle fiq %d\n", vector); + } + + // Check if timers have expired that would unblock tasks + check_workqueues_in_irqhandler(vector); + + if ((vector == INT_PPI_NSPHYS_TIMER) || (vector == RESCHED_INT)) { + // a timer interrupt may have caused unblocking of tasks + ret = scheduler(); + } else if (get_highest_priority() > per_core(current_task)->prio) { + // there's a ready task with higher priority + ret = scheduler(); + } + + gicc_write(GICC_EOIR, iar); + + return ret; +} + +size_t** do_irq(void *regs) +{ + size_t** ret = NULL; + uint32_t iar = gicc_read(GICC_IAR); + uint32_t vector = iar & 0x3ff; + + LOG_INFO("Receive interrupt %d\n", vector); + + // Check if timers have expired that would unblock tasks + check_workqueues_in_irqhandler(vector); + + if (get_highest_priority() > per_core(current_task)->prio) { + // there's a ready task with higher priority + ret = scheduler(); + } + + gicc_write(GICC_EOIR, iar); + + return ret; +} + +void do_error(void *regs) +{ + LOG_ERROR("Receive error interrupt\n"); + + while (1) { + HALT; + } +} + +void do_bad_mode(void *regs, int reason) +{ + LOG_ERROR("Receive unhandled exception: %d\n", reason); + + while (1) { + HALT; + } +} + +void reschedule(void) +{ + // (2 << 24) = Forward the interrupt only to the CPU interface of the PE that requested the interrupt + gicd_write(GICD_SGIR, (2 << 24) | RESCHED_INT); +} diff --git a/arch/aarch64/kernel/processor.c b/arch/aarch64/kernel/processor.c new file mode 100644 index 000000000..5a08adf90 --- /dev/null +++ b/arch/aarch64/kernel/processor.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2014-2017, Stefan Lankes, Daniel Krebs, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +extern uint32_t cpu_freq; + +uint32_t get_cpu_frequency(void) +{ + return cpu_freq; +} + +int cpu_detection(void) +{ + LOG_INFO("HermitCore runs in exception level %d\n", get_current_el()); + LOG_INFO("System control register: 0x%x\n", get_sctlr()); + +#if 0 + uint32_t value = 0; + + LOG_INFO("Enable performance counter\n"); + + /* Enable Performance Counter */ + asm volatile("mrs %0, pmcr_el0" : "=r" (value)); + value |= ARMV8_PMCR_E; /* Enable */ + value |= ARMV8_PMCR_C; /* Cycle counter reset */ + value |= ARMV8_PMCR_P; /* Reset all counters */ + asm volatile("msr pmcr_el0, %0" : : "r" (value)); + + /* Enable cycle counter register */ + asm volatile("mrs %0, pmcntenset_el0" : "=r" (value)); + value |= ARMV8_PMCNTENSET_EL0_EN; + asm volatile("msr pmcntenset_el0, %0" : : "r" (value)); +#endif + + return 0; +} diff --git a/arch/aarch64/kernel/signal.c b/arch/aarch64/kernel/signal.c new file mode 100644 index 000000000..3a9ea4727 --- /dev/null +++ b/arch/aarch64/kernel/signal.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2017, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include + +int hermit_signal(signal_handler_t handler) +{ + //task_t* curr_task = per_core(current_task); + //curr_task->signal_handler = handler; + + return 0; +} + +int hermit_kill(tid_t dest, int signum) +{ + return 0; +} + +void signal_init(void) +{ +} diff --git a/arch/aarch64/kernel/signal.dep b/arch/aarch64/kernel/signal.dep new file mode 100644 index 000000000..001bb703c --- /dev/null +++ b/arch/aarch64/kernel/signal.dep @@ -0,0 +1,11 @@ +arch/arm64/kernel/signal.o: arch/arm64/kernel/signal.c \ + /home/stefan/HermitCore/include/hermit/signal.h \ + /home/stefan/HermitCore/include/hermit/stddef.h \ + /home/stefan/HermitCore/include/hermit/config.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/stddef.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/irqflags.h \ + /home/stefan/HermitCore/include/hermit/semaphore_types.h \ + /home/stefan/HermitCore/include/hermit/spinlock_types.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/atomic.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/atomic32.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/atomic64.h diff --git a/arch/aarch64/kernel/tasks.c b/arch/aarch64/kernel/tasks.c new file mode 100644 index 000000000..ee18675f7 --- /dev/null +++ b/arch/aarch64/kernel/tasks.c @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2017, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#define TLS_ALIGNBITS 5 +#define TLS_ALIGNSIZE (1L << TLS_ALIGNBITS) +#define TSL_ALIGNMASK ((~0L) << TLS_ALIGNBITS) +#define TLS_FLOOR(addr) ((((size_t)addr) + TLS_ALIGNSIZE - 1) & TSL_ALIGNMASK) + +/* + * Note that linker symbols are not variables, they have no memory allocated for + * maintaining a value, rather their address is their value. + */ +extern const void tls_start; +extern const void tls_end; + +extern atomic_int32_t cpu_online; + +static char tls[16][DEFAULT_STACK_SIZE]; +static int id = 0; + +static int init_tls(void) +{ + task_t* curr_task = per_core(current_task); + + // do we have a thread local storage? + if (((size_t) &tls_end - (size_t) &tls_start) > 0) { + char* tls_addr = NULL; + size_t tpidr_el0; + + curr_task->tls_addr = (size_t) &tls_start; + curr_task->tls_size = (size_t) &tls_end - (size_t) &tls_start; + + //tls_addr = kmalloc(curr_task->tls_size + TLS_ALIGNSIZE + sizeof(size_t)); + tls_addr = tls[id]; + id++; + if (BUILTIN_EXPECT(!tls_addr, 0)) { + LOG_ERROR("load_task: heap is missing!\n"); + return -ENOMEM; + } + + memset(tls_addr, 0x00, TLS_ALIGNSIZE); + memcpy((void*) TLS_FLOOR(tls_addr), (void*) curr_task->tls_addr, curr_task->tls_size); + tpidr_el0 = (size_t) TLS_FLOOR(tls_addr) + curr_task->tls_size; + *((size_t*)tpidr_el0) = tpidr_el0; + + // set tpidr_el0 register to the TLS segment + set_tls(tpidr_el0); + //LOG_INFO("TLS of task %d on core %d starts at 0x%zx (size 0x%zx)\n", curr_task->id, CORE_ID, TLS_FLOOR(tls_addr), curr_task->tls_size); + } else set_tls(0); // no TLS => clear tpidr_el0 register + + return 0; +} + +static int thread_entry(void* arg, size_t ep) +{ + + if (init_tls()) + return -ENOMEM; + + //vma_dump(); + + entry_point_t call_ep = (entry_point_t) ep; + call_ep(arg); + /* After finishing the task, we will return here and call the + cleanup function, which calls the scheduler */ + leave_kernel_task(); + + return 0; +} + +size_t* get_current_stack(void) +{ + task_t* curr_task = per_core(current_task); + size_t stptr = (size_t) curr_task->stack; + + if (curr_task->status == TASK_IDLE) + stptr += KERNEL_STACK_SIZE - 0x10; + else + stptr = (stptr + DEFAULT_STACK_SIZE - sizeof(size_t)) & ~0x1F; + + //set_tss(stptr, (size_t) curr_task->ist_addr + KERNEL_STACK_SIZE - 0x10); + + return curr_task->last_stack_pointer; +} + +int create_default_frame(task_t* task, entry_point_t ep, void* arg, uint32_t core_id) +{ + size_t *stack; + struct state *stptr; + size_t state_size; + + if (BUILTIN_EXPECT(!task, 0)) + return -EINVAL; + + if (BUILTIN_EXPECT(!task->stack, 0)) + return -EINVAL; + + LOG_INFO("Task %d uses memory region [%p - %p] as stack\n", task->id, task->stack, (char*) task->stack + DEFAULT_STACK_SIZE - 1); + LOG_INFO("Task %d uses memory region [%p - %p] as IST1\n", task->id, task->ist_addr, (char*) task->ist_addr + KERNEL_STACK_SIZE - 1); + + memset(task->stack, 0xCD, DEFAULT_STACK_SIZE); + + /* The difference between setting up a task for SW-task-switching + * and not for HW-task-switching is setting up a stack and not a TSS. + * This is the stack which will be activated and popped off for iret later. + */ + stack = (size_t*) (((size_t) task->stack + DEFAULT_STACK_SIZE - sizeof(size_t)) & ~0x1F); // => stack is 32byte aligned + + /* Only marker for debugging purposes, ... */ + *stack-- = 0xDEADBEEF; + *stack-- = 0xDEADBEEF; + + /* Next bunch on the stack is the initial register state. + * The stack must look like the stack of a task which was + * scheduled away previously. */ + state_size = sizeof(struct state); + stack = (size_t*) ((size_t) stack - state_size); + + stptr = (struct state *) stack; + memset(stptr, 0x00, state_size); + //stptr->sp = (size_t)stack + state_size; + /* the first-function-to-be-called's arguments, ... */ + stptr->x0 = (size_t) arg; + + /* The procedure link register needs to hold the address of the + * first function to be called when returning from switch_context. */ + stptr->elr_el1 = (size_t)thread_entry; + stptr->x1 = (size_t)ep; // use second argument to transfer the entry point + + /* Zero the condition flags. */ + stptr->spsr_el1 = 0x205; + + /* Set the task's stack pointer entry to the stack we have crafted right now. */ + task->last_stack_pointer = (size_t*)stack; + + return 0; +} + +int is_proxy(void) +{ + return 0; +} + +void wait_for_task(void) +{ + HALT; +} + +void wakeup_core(uint32_t core_id) +{ + // Currently not required... +} + +void shutdown_system(void) +{ + LOG_INFO("Try to shutdown system\n"); + + atomic_int32_dec(&cpu_online); + while(1) { + HALT; + } +} + +extern uint32_t uhyve; +const int32_t is_uhyve(void) +{ + return (uhyve != 0); +} + +#if 0 +extern uint32_t single_kernel; +const int32_t is_single_kernel(void) +{ + return (single_kernel != 0); +} +#endif diff --git a/arch/aarch64/kernel/tasks.dep b/arch/aarch64/kernel/tasks.dep new file mode 100644 index 000000000..05442ef05 --- /dev/null +++ b/arch/aarch64/kernel/tasks.dep @@ -0,0 +1,8 @@ +arch/arm64/kernel/tasks.o: arch/arm64/kernel/tasks.c \ + /home/stefan/HermitCore/include/hermit/stdio.h \ + /home/stefan/HermitCore/include/hermit/config.h \ + /home/stefan/HermitCore/include/hermit/stddef.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/stddef.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/irqflags.h \ + /home/stefan/HermitCore/include/hermit/stdarg.h \ + /home/stefan/HermitCore/include/hermit/stdlib.h diff --git a/arch/aarch64/kernel/timer.c b/arch/aarch64/kernel/timer.c new file mode 100644 index 000000000..1c39f510a --- /dev/null +++ b/arch/aarch64/kernel/timer.c @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2010-2017, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * This will keep track of how many ticks the system + * has been running for + */ +DEFINE_PER_CORE(uint64_t, timer_ticks, 0); +static uint32_t freq_hz; /* frequency in Hz (updates per second) */ + +#if 0 +extern int32_t boot_processor; +#endif + +#define MHZ 1000000 + +#ifdef DYNAMIC_TICKS +DEFINE_PER_CORE(uint64_t, last_tsc, 0); +static uint64_t boot_tsc = 0; + +void check_ticks(void) +{ + // do we already know the timer frequency? => if not, ignore this check + if (!freq_hz) + return; + + const uint64_t curr_tsc = get_cntpct(); + rmb(); + + const uint64_t diff_tsc = curr_tsc - per_core(last_tsc); + const uint64_t diff_ticks = (diff_tsc * (uint64_t) TIMER_FREQ) / freq_hz; + + if (diff_ticks > 0) { + set_per_core(timer_ticks, per_core(timer_ticks) + diff_ticks); + set_per_core(last_tsc, curr_tsc); + rmb(); + } +} +#else +static void restart_periodic_timer(void) +{ + set_cntp_tval(freq_hz / TIMER_FREQ); + set_cntp_ctl(1); +} +#endif + +int timer_deadline(uint32_t ticks) +{ + set_cntp_tval(ticks * freq_hz / TIMER_FREQ); + set_cntp_ctl(1); + + return 0; +} + +void timer_disable(void) +{ + /* stop timer */ + set_cntp_ctl(0); +} + +int timer_is_running(void) +{ + uint32_t v = get_cntp_ctl(); + + return (v & 0x1); +} + +/* + * Handles the timer. In this case, it's very simple: We + * increment the 'timer_ticks' variable every time the + * timer fires. + */ +static void timer_handler(struct state *s) +{ +#ifndef DYNAMIC_TICKS + /* Increment our 'tick counter' */ + set_per_core(timer_ticks, per_core(timer_ticks)+1); + restart_periodic_timer(); +#else + timer_disable(); +#endif + +#if 0 + /* + * Every TIMER_FREQ clocks (approximately 1 second), we will + * display a message on the screen + */ + if (timer_ticks % TIMER_FREQ == 0) { + LOG_INFO("One second has passed %d\n", CORE_ID); + } +#endif +} + +void udelay(uint32_t usecs) +{ + uint64_t diff, end, start = get_cntpct(); + uint64_t deadline = (usecs * freq_hz) / 1000000; + + do { + end = get_cntpct(); + rmb(); + diff = end > start ? end - start : start - end; + if ((diff < deadline) && (deadline - diff > 50000)) + check_workqueues(); + } while(diff < deadline); +} + +int timer_wait(unsigned int ticks) +{ + uint64_t eticks = per_core(timer_ticks) + ticks; + + task_t* curr_task = per_core(current_task); + + if (curr_task->status == TASK_IDLE) + { + /* + * This will continuously loop until the given time has + * been reached + */ + while (per_core(timer_ticks) < eticks) { + check_workqueues(); + + // recheck break condition + if (per_core(timer_ticks) >= eticks) + break; + + PAUSE; + } + } else if (per_core(timer_ticks) < eticks) { + check_workqueues(); + + if (per_core(timer_ticks) < eticks) { + set_timer(eticks); + reschedule(); + } + } + + return 0; +} + +int timer_init(void) +{ +#ifdef DYNAMIC_TICKS + boot_tsc = get_cntpct(); + set_per_core(last_tsc, boot_tsc); +#endif + + return 0; +} + +/* + * Sets up the system clock + */ +int timer_calibration(void) +{ + freq_hz = get_cntfrq(); + + LOG_INFO("aarch64_timer: frequency %d KHz\n", freq_hz / 1000); + + irq_install_handler(INT_PPI_NSPHYS_TIMER, timer_handler); + +#ifndef DYNAMIC_TICKS + restart_periodic_timer(); +#endif + + return 0; +} diff --git a/arch/aarch64/kernel/timer.dep b/arch/aarch64/kernel/timer.dep new file mode 100644 index 000000000..fa58269f8 --- /dev/null +++ b/arch/aarch64/kernel/timer.dep @@ -0,0 +1,31 @@ +arch/arm64/kernel/timer.o: arch/arm64/kernel/timer.c \ + /home/stefan/HermitCore/include/hermit/stdio.h \ + /home/stefan/HermitCore/include/hermit/config.h \ + /home/stefan/HermitCore/include/hermit/stddef.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/stddef.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/irqflags.h \ + /home/stefan/HermitCore/include/hermit/stdarg.h \ + /home/stefan/HermitCore/include/hermit/string.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/string.h \ + /home/stefan/HermitCore/include/hermit/processor.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/processor.h \ + /home/stefan/HermitCore/include/hermit/time.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/time.h \ + /home/stefan/HermitCore/include/hermit/tasks.h \ + /home/stefan/HermitCore/include/hermit/tasks_types.h \ + /home/stefan/HermitCore/include/hermit/spinlock_types.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/atomic.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/atomic32.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/atomic64.h \ + /home/stefan/HermitCore/include/hermit/vma.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/page.h \ + /home/stefan/HermitCore/include/hermit/stdlib.h \ + /home/stefan/HermitCore/include/hermit/signal.h \ + /home/stefan/HermitCore/include/hermit/semaphore_types.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/tasks_types.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/tasks.h \ + /home/stefan/HermitCore/include/hermit/errno.h \ + /home/stefan/HermitCore/include/hermit/spinlock.h \ + /home/stefan/HermitCore/include/hermit/logging.h \ + /home/stefan/HermitCore/include/hermit/syscall.h \ + /home/stefan/HermitCore/include/stdlib.h diff --git a/arch/aarch64/kernel/uart.c b/arch/aarch64/kernel/uart.c new file mode 100644 index 000000000..69edd3318 --- /dev/null +++ b/arch/aarch64/kernel/uart.c @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2014-2017, Stefan Lankes, Daniel Krebs, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +volatile static unsigned int* mmio = NULL; + +/* Puts a single character on a serial device */ +int uart_putchar(unsigned char c) +{ + if (mmio) + *mmio = (unsigned int) c; + + return (int) c; +} + +/* Uses the routine above to output a string... */ +int uart_puts(const char *text) +{ + size_t i, len = strlen(text); + + if (!mmio) + return 0; + + for (i = 0; i < len; i++) + uart_putchar(text[i]); + + return len; +} + +int uart_early_init(char* cmdline) +{ + if (is_uhyve()) + return 0; + + // default value of our QEMU configuration + mmio = (unsigned int*) 0x09000000; + + return 0; +} + +int uart_init(void) +{ + if (is_uhyve()) + return 0; + + mmio = (unsigned int*) 0x09000000; + + return 0; +} diff --git a/arch/aarch64/kernel/uart.dep b/arch/aarch64/kernel/uart.dep new file mode 100644 index 000000000..395587c84 --- /dev/null +++ b/arch/aarch64/kernel/uart.dep @@ -0,0 +1,13 @@ +arch/arm64/kernel/uart.o: arch/arm64/kernel/uart.c \ + /home/stefan/HermitCore/include/hermit/stdlib.h \ + /home/stefan/HermitCore/include/hermit/config.h \ + /home/stefan/HermitCore/include/hermit/stddef.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/stddef.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/irqflags.h \ + /home/stefan/HermitCore/include/hermit/stdio.h \ + /home/stefan/HermitCore/include/hermit/stdarg.h \ + /home/stefan/HermitCore/include/hermit/errno.h \ + /home/stefan/HermitCore/include/hermit/string.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/string.h \ + /home/stefan/HermitCore/include/hermit/ctype.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/uart.h diff --git a/arch/aarch64/libkern/Makefile b/arch/aarch64/libkern/Makefile new file mode 100644 index 000000000..4dd68931d --- /dev/null +++ b/arch/aarch64/libkern/Makefile @@ -0,0 +1,5 @@ +C_source := +ASM_source := memcpy.S strlen.S memset.S +MODULE := arch_arm64_libkern + +include $(TOPDIR)/Makefile.inc diff --git a/arch/aarch64/libkern/memcpy.S b/arch/aarch64/libkern/memcpy.S new file mode 100644 index 000000000..7ce926631 --- /dev/null +++ b/arch/aarch64/libkern/memcpy.S @@ -0,0 +1,223 @@ +/* Copyright (c) 2012, Linaro Limited + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Linaro nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + +/* + * Copyright (c) 2015 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Assumptions: + * + * ARMv8-a, AArch64, unaligned accesses. + * + */ + +#define dstin x0 +#define src x1 +#define count x2 +#define dst x3 +#define srcend x4 +#define dstend x5 +#define A_l x6 +#define A_lw w6 +#define A_h x7 +#define A_hw w7 +#define B_l x8 +#define B_lw w8 +#define B_h x9 +#define C_l x10 +#define C_h x11 +#define D_l x12 +#define D_h x13 +#define E_l src +#define E_h count +#define F_l srcend +#define F_h dst +#define tmp1 x9 + +#define L(l) .L ## l + +/* Copies are split into 3 main cases: small copies of up to 16 bytes, + medium copies of 17..96 bytes which are fully unrolled. Large copies + of more than 96 bytes align the destination and use an unrolled loop + processing 64 bytes per iteration. + Small and medium copies read all data before writing, allowing any + kind of overlap, and memmove tailcalls memcpy for these cases as + well as non-overlapping copies. +*/ + +#define ENTRY(sym) \ + .text; .globl sym; .align 2; .type sym,#function; sym: +#define EENTRY(sym) \ + .globl sym; sym: +#define END(sym) .size sym, . - sym +#define EEND(sym) + +ENTRY(aarch64_memcpy) + prfm PLDL1KEEP, [src] + add srcend, src, count + add dstend, dstin, count + cmp count, 16 + b.ls L(copy16) + cmp count, 96 + b.hi L(copy_long) + + /* Medium copies: 17..96 bytes. */ + sub tmp1, count, 1 + ldp A_l, A_h, [src] + tbnz tmp1, 6, L(copy96) + ldp D_l, D_h, [srcend, -16] + tbz tmp1, 5, 1f + ldp B_l, B_h, [src, 16] + ldp C_l, C_h, [srcend, -32] + stp B_l, B_h, [dstin, 16] + stp C_l, C_h, [dstend, -32] +1: + stp A_l, A_h, [dstin] + stp D_l, D_h, [dstend, -16] + ret + + .p2align 4 + /* Small copies: 0..16 bytes. */ +L(copy16): + cmp count, 8 + b.lo 1f + ldr A_l, [src] + ldr A_h, [srcend, -8] + str A_l, [dstin] + str A_h, [dstend, -8] + ret + .p2align 4 +1: + tbz count, 2, 1f + ldr A_lw, [src] + ldr A_hw, [srcend, -4] + str A_lw, [dstin] + str A_hw, [dstend, -4] + ret + + /* Copy 0..3 bytes. Use a branchless sequence that copies the same + byte 3 times if count==1, or the 2nd byte twice if count==2. */ +1: + cbz count, 2f + lsr tmp1, count, 1 + ldrb A_lw, [src] + ldrb A_hw, [srcend, -1] + ldrb B_lw, [src, tmp1] + strb A_lw, [dstin] + strb B_lw, [dstin, tmp1] + strb A_hw, [dstend, -1] +2: ret + + .p2align 4 + /* Copy 64..96 bytes. Copy 64 bytes from the start and + 32 bytes from the end. */ +L(copy96): + ldp B_l, B_h, [src, 16] + ldp C_l, C_h, [src, 32] + ldp D_l, D_h, [src, 48] + ldp E_l, E_h, [srcend, -32] + ldp F_l, F_h, [srcend, -16] + stp A_l, A_h, [dstin] + stp B_l, B_h, [dstin, 16] + stp C_l, C_h, [dstin, 32] + stp D_l, D_h, [dstin, 48] + stp E_l, E_h, [dstend, -32] + stp F_l, F_h, [dstend, -16] + ret + + /* Align DST to 16 byte alignment so that we don't cross cache line + boundaries on both loads and stores. There are at least 96 bytes + to copy, so copy 16 bytes unaligned and then align. The loop + copies 64 bytes per iteration and prefetches one iteration ahead. */ + + .p2align 4 +L(copy_long): + and tmp1, dstin, 15 + bic dst, dstin, 15 + ldp D_l, D_h, [src] + sub src, src, tmp1 + add count, count, tmp1 /* Count is now 16 too large. */ + ldp A_l, A_h, [src, 16] + stp D_l, D_h, [dstin] + ldp B_l, B_h, [src, 32] + ldp C_l, C_h, [src, 48] + ldp D_l, D_h, [src, 64]! + subs count, count, 128 + 16 /* Test and readjust count. */ + b.ls 2f +1: + stp A_l, A_h, [dst, 16] + ldp A_l, A_h, [src, 16] + stp B_l, B_h, [dst, 32] + ldp B_l, B_h, [src, 32] + stp C_l, C_h, [dst, 48] + ldp C_l, C_h, [src, 48] + stp D_l, D_h, [dst, 64]! + ldp D_l, D_h, [src, 64]! + subs count, count, 64 + b.hi 1b + + /* Write the last full set of 64 bytes. The remainder is at most 64 + bytes, so it is safe to always copy 64 bytes from the end even if + there is just 1 byte left. */ +2: + ldp E_l, E_h, [srcend, -64] + stp A_l, A_h, [dst, 16] + ldp A_l, A_h, [srcend, -48] + stp B_l, B_h, [dst, 32] + ldp B_l, B_h, [srcend, -32] + stp C_l, C_h, [dst, 48] + ldp C_l, C_h, [srcend, -16] + stp D_l, D_h, [dst, 64] + stp E_l, E_h, [dstend, -64] + stp A_l, A_h, [dstend, -48] + stp B_l, B_h, [dstend, -32] + stp C_l, C_h, [dstend, -16] + ret +END(aarch64_memcpy) diff --git a/arch/aarch64/libkern/memset.S b/arch/aarch64/libkern/memset.S new file mode 100644 index 000000000..6601171e2 --- /dev/null +++ b/arch/aarch64/libkern/memset.S @@ -0,0 +1,229 @@ +/* Copyright (c) 2012, Linaro Limited + All rights reserved. + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Linaro nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +/* Assumptions: + * + * ARMv8-a, AArch64 + * Unaligned accesses + * + */ + +/* By default we assume that the DC instruction can be used to zero + data blocks more efficiently. In some circumstances this might be + unsafe, for example in an asymmetric multiprocessor environment with + different DC clear lengths (neither the upper nor lower lengths are + safe to use). The feature can be disabled by defining DONT_USE_DC. + If code may be run in a virtualized environment, then define + MAYBE_VIRT. This will cause the code to cache the system register + values rather than re-reading them each call. */ +#define dstin x0 +#define val w1 +#define count x2 +#define tmp1 x3 +#define tmp1w w3 +#define tmp2 x4 +#define tmp2w w4 +#define zva_len_x x5 +#define zva_len w5 +#define zva_bits_x x6 +#define A_l x7 +#define A_lw w7 +#define dst x8 +#define tmp3w w9 + +#define ENTRY(sym) \ + .text; .globl sym; .align 2; .type sym,#function; sym: +#define EENTRY(sym) \ + .globl sym; sym: +#define END(sym) .size sym, . - sym +#define EEND(sym) + +ENTRY(aarch64_memset) + mov dst, dstin /* Preserve return value. */ + ands A_lw, val, #255 +#ifndef DONT_USE_DC + b.eq .Lzero_mem +#endif + orr A_lw, A_lw, A_lw, lsl #8 + orr A_lw, A_lw, A_lw, lsl #16 + orr A_l, A_l, A_l, lsl #32 +.Ltail_maybe_long: + cmp count, #64 + b.ge .Lnot_short +.Ltail_maybe_tiny: + cmp count, #15 + b.le .Ltail15tiny +.Ltail63: + ands tmp1, count, #0x30 + b.eq .Ltail15 + add dst, dst, tmp1 + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + stp A_l, A_l, [dst, #-48] +1: + stp A_l, A_l, [dst, #-32] +2: + stp A_l, A_l, [dst, #-16] +.Ltail15: + and count, count, #15 + add dst, dst, count + stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */ + ret +.Ltail15tiny: + /* Set up to 15 bytes. Does not assume earlier memory + being set. */ + tbz count, #3, 1f + str A_l, [dst], #8 +1: + tbz count, #2, 1f + str A_lw, [dst], #4 +1: + tbz count, #1, 1f + strh A_lw, [dst], #2 +1: + tbz count, #0, 1f + strb A_lw, [dst] +1: + ret + /* Critical loop. Start at a new cache line boundary. Assuming + * 64 bytes per line, this ensures the entire loop is in one line. */ + .p2align 6 +.Lnot_short: + neg tmp2, dst + ands tmp2, tmp2, #15 + b.eq 2f + /* Bring DST to 128-bit (16-byte) alignment. We know that there's + * more than that to set, so we simply store 16 bytes and advance by + * the amount required to reach alignment. */ + sub count, count, tmp2 + stp A_l, A_l, [dst] + add dst, dst, tmp2 + /* There may be less than 63 bytes to go now. */ + cmp count, #63 + b.le .Ltail63 +2: + sub dst, dst, #16 /* Pre-bias. */ + sub count, count, #64 +1: + stp A_l, A_l, [dst, #16] + stp A_l, A_l, [dst, #32] + stp A_l, A_l, [dst, #48] + stp A_l, A_l, [dst, #64]! + subs count, count, #64 + b.ge 1b + tst count, #0x3f + add dst, dst, #16 + b.ne .Ltail63 + ret +#ifndef DONT_USE_DC + /* For zeroing memory, check to see if we can use the ZVA feature to + * zero entire 'cache' lines. */ +.Lzero_mem: + mov A_l, #0 + cmp count, #63 + b.le .Ltail_maybe_tiny + neg tmp2, dst + ands tmp2, tmp2, #15 + b.eq 1f + sub count, count, tmp2 + stp A_l, A_l, [dst] + add dst, dst, tmp2 + cmp count, #63 + b.le .Ltail63 +1: + /* For zeroing small amounts of memory, it's not worth setting up + * the line-clear code. */ + cmp count, #128 + b.lt .Lnot_short +#ifdef MAYBE_VIRT + /* For efficiency when virtualized, we cache the ZVA capability. */ + adrp tmp2, .Lcache_clear + ldr zva_len, [tmp2, #:lo12:.Lcache_clear] + tbnz zva_len, #31, .Lnot_short + cbnz zva_len, .Lzero_by_line + mrs tmp1, dczid_el0 + tbz tmp1, #4, 1f + /* ZVA not available. Remember this for next time. */ + mov zva_len, #~0 + str zva_len, [tmp2, #:lo12:.Lcache_clear] + b .Lnot_short +1: + mov tmp3w, #4 + and zva_len, tmp1w, #15 /* Safety: other bits reserved. */ + lsl zva_len, tmp3w, zva_len + str zva_len, [tmp2, #:lo12:.Lcache_clear] +#else + mrs tmp1, dczid_el0 + tbnz tmp1, #4, .Lnot_short + mov tmp3w, #4 + and zva_len, tmp1w, #15 /* Safety: other bits reserved. */ + lsl zva_len, tmp3w, zva_len +#endif +.Lzero_by_line: + /* Compute how far we need to go to become suitably aligned. We're + * already at quad-word alignment. */ + cmp count, zva_len_x + b.lt .Lnot_short /* Not enough to reach alignment. */ + sub zva_bits_x, zva_len_x, #1 + neg tmp2, dst + ands tmp2, tmp2, zva_bits_x + b.eq 1f /* Already aligned. */ + /* Not aligned, check that there's enough to copy after alignment. */ + sub tmp1, count, tmp2 + cmp tmp1, #64 + ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */ + b.lt .Lnot_short + /* We know that there's at least 64 bytes to zero and that it's safe + * to overrun by 64 bytes. */ + mov count, tmp1 +2: + stp A_l, A_l, [dst] + stp A_l, A_l, [dst, #16] + stp A_l, A_l, [dst, #32] + subs tmp2, tmp2, #64 + stp A_l, A_l, [dst, #48] + add dst, dst, #64 + b.ge 2b + /* We've overrun a bit, so adjust dst downwards. */ + add dst, dst, tmp2 +1: + sub count, count, zva_len_x +3: + dc zva, dst + add dst, dst, zva_len_x + subs count, count, zva_len_x + b.ge 3b + ands count, count, zva_bits_x + b.ne .Ltail_maybe_long + ret +END(aarch64_memset) + +#ifdef MAYBE_VIRT + .bss + .p2align 2 +.Lcache_clear: + .space 4 +#endif +#endif /* DONT_USE_DC */ diff --git a/arch/aarch64/libkern/strlen.S b/arch/aarch64/libkern/strlen.S new file mode 100644 index 000000000..58e3f8c66 --- /dev/null +++ b/arch/aarch64/libkern/strlen.S @@ -0,0 +1,123 @@ +/* Copyright (c) 2014, Linaro Limited + All rights reserved. + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Linaro nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +/* Assumptions: + * + * ARMv8-a, AArch64 + */ + +/* Arguments and results. */ +#define srcin x0 +#define len x0 +/* Locals and temporaries. */ +#define src x1 +#define data1 x2 +#define data2 x3 +#define data2a x4 +#define has_nul1 x5 +#define has_nul2 x6 +#define tmp1 x7 +#define tmp2 x8 +#define tmp3 x9 +#define tmp4 x10 +#define zeroones x11 +#define pos x12 +#define REP8_01 0x0101010101010101 +#define REP8_7f 0x7f7f7f7f7f7f7f7f +#define REP8_80 0x8080808080808080 + +#define ENTRY(sym) \ + .text; .globl sym; .align 2; .type sym,#function; sym: +#define EENTRY(sym) \ + .globl sym; sym: +#define END(sym) .size sym, . - sym +#define EEND(sym) + + /* Start of critial section -- keep to one 64Byte cache line. */ +ENTRY(aarch64_strlen) + mov zeroones, #REP8_01 + bic src, srcin, #15 + ands tmp1, srcin, #15 + b.ne .Lmisaligned + /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 + (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and + can be done in parallel across the entire word. */ + /* The inner loop deals with two Dwords at a time. This has a + slightly higher start-up cost, but we should win quite quickly, + especially on cores with a high number of issue slots per + cycle, as we get much better parallelism out of the operations. */ +.Lloop: + ldp data1, data2, [src], #16 +.Lrealigned: + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + sub tmp3, data2, zeroones + orr tmp4, data2, #REP8_7f + bic has_nul1, tmp1, tmp2 + bics has_nul2, tmp3, tmp4 + ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */ + b.eq .Lloop + /* End of critical section -- keep to one 64Byte cache line. */ + sub len, src, srcin + cbz has_nul1, .Lnul_in_data2 +#ifdef __AARCH64EB__ + mov data2, data1 +#endif + sub len, len, #8 + mov has_nul2, has_nul1 +.Lnul_in_data2: +#ifdef __AARCH64EB__ + /* For big-endian, carry propagation (if the final byte in the + string is 0x01) means we cannot use has_nul directly. The + easiest way to get the correct byte is to byte-swap the data + and calculate the syndrome a second time. */ + rev data2, data2 + sub tmp1, data2, zeroones + orr tmp2, data2, #REP8_7f + bic has_nul2, tmp1, tmp2 +#endif + sub len, len, #8 + rev has_nul2, has_nul2 + clz pos, has_nul2 + add len, len, pos, lsr #3 /* Bits to bytes. */ + ret +.Lmisaligned: + cmp tmp1, #8 + neg tmp1, tmp1 + ldp data1, data2, [src], #16 + lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ + mov tmp2, #~0 +#ifdef __AARCH64EB__ + /* Big-endian. Early bytes are at MSB. */ + lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ +#else + /* Little-endian. Early bytes are at LSB. */ + lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ +#endif + orr data1, data1, tmp2 + orr data2a, data2, tmp2 + csinv data1, data1, xzr, le + csel data2, data2, data2a, le + b .Lrealigned +END(aarch64_strlen) diff --git a/arch/aarch64/mm/Makefile b/arch/aarch64/mm/Makefile new file mode 100644 index 000000000..660366f69 --- /dev/null +++ b/arch/aarch64/mm/Makefile @@ -0,0 +1,4 @@ +C_source := page.c memory.c vma.c +MODULE := arch_arm64_mm + +include $(TOPDIR)/Makefile.inc diff --git a/arch/aarch64/mm/memory.c b/arch/aarch64/mm/memory.c new file mode 100644 index 000000000..85d78779c --- /dev/null +++ b/arch/aarch64/mm/memory.c @@ -0,0 +1,298 @@ +/* + * Copyright (c) 2010, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +extern uint64_t base; +extern uint64_t limit; + +typedef struct free_list { + size_t start, end; + struct free_list* next; + struct free_list* prev; +} free_list_t; + +/* + * Note that linker symbols are not variables, they have no memory allocated for + * maintaining a value, rather their address is their value. + */ +extern const void kernel_start; +extern const void kernel_end; + +static spinlock_t list_lock = SPINLOCK_INIT; + +static free_list_t init_list = {0, 0, NULL, NULL}; +static free_list_t* free_start = (free_list_t*) &init_list; + +atomic_int64_t total_pages = ATOMIC_INIT(0); +atomic_int64_t total_allocated_pages = ATOMIC_INIT(0); +atomic_int64_t total_available_pages = ATOMIC_INIT(0); + +size_t get_pages(size_t npages) +{ + size_t i, ret = 0; + free_list_t* curr = free_start; + + if (BUILTIN_EXPECT(!npages, 0)) + return 0; + if (BUILTIN_EXPECT(npages > atomic_int64_read(&total_available_pages), 0)) + return 0; + + spinlock_lock(&list_lock); + + while(curr) { + i = (curr->end - curr->start) / PAGE_SIZE; + if (i > npages) { + ret = curr->start; + curr->start += npages * PAGE_SIZE; + goto out; + } else if (i == npages) { + ret = curr->start; + if (curr->prev) + curr->prev = curr->next; + else + free_start = curr->next; + if (curr != &init_list) + kfree(curr); + goto out; + } + + curr = curr->next; + } +out: + LOG_DEBUG("get_pages: ret 0%llx, curr->start 0x%llx, curr->end 0x%llx\n", ret, curr->start, curr->end); + + spinlock_unlock(&list_lock); + + if (ret) { + atomic_int64_add(&total_allocated_pages, npages); + atomic_int64_sub(&total_available_pages, npages); + } + + return ret; +} + +DEFINE_PER_CORE(size_t, ztmp_addr, 0); + +size_t get_zeroed_page(void) +{ + size_t phyaddr = get_page(); + size_t viraddr; + uint8_t flags; + + if (BUILTIN_EXPECT(!phyaddr, 0)) + return 0; + + flags = irq_nested_disable(); + + viraddr = per_core(ztmp_addr); + if (BUILTIN_EXPECT(!viraddr, 0)) + { + viraddr = vma_alloc(PAGE_SIZE, VMA_READ|VMA_WRITE|VMA_CACHEABLE); + if (BUILTIN_EXPECT(!viraddr, 0)) + goto novaddr; + + LOG_DEBUG("Core %d uses 0x%zx as temporary address\n", CORE_ID, viraddr); + set_per_core(ztmp_addr, viraddr); + } + + __page_map(viraddr, phyaddr, 1, PG_GLOBAL|PG_RW|PG_PRESENT); + + memset((void*) viraddr, 0x00, PAGE_SIZE); + +novaddr: + irq_nested_enable(flags); + + return phyaddr; +} + +/* TODO: reunion of elements is still missing */ +int put_pages(size_t phyaddr, size_t npages) +{ + free_list_t* curr = free_start; + + if (BUILTIN_EXPECT(!phyaddr, 0)) + return -EINVAL; + if (BUILTIN_EXPECT(!npages, 0)) + return -EINVAL; + + spinlock_lock(&list_lock); + + while(curr) { + if (phyaddr+npages*PAGE_SIZE == curr->start) { + curr->start = phyaddr; + goto out; + } else if (phyaddr == curr->end) { + curr->end += npages*PAGE_SIZE; + goto out; + } if (phyaddr > curr->end) { + free_list_t* n = kmalloc(sizeof(free_list_t)); + + if (BUILTIN_EXPECT(!n, 0)) + goto out_err; + + /* add new element */ + n->start = phyaddr; + n->end = phyaddr + npages * PAGE_SIZE; + n->prev = curr; + n->next = curr->next; + curr->next = n; + } + + curr = curr->next; + } +out: + spinlock_unlock(&list_lock); + + atomic_int64_sub(&total_allocated_pages, npages); + atomic_int64_add(&total_available_pages, npages); + + return 0; + +out_err: + spinlock_unlock(&list_lock); + + return -ENOMEM; +} + +void* page_alloc(size_t sz, uint32_t flags) +{ + size_t viraddr = 0; + size_t phyaddr; + uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS; + size_t pflags = PG_PRESENT|PG_GLOBAL; //|PG_XD; + + if (BUILTIN_EXPECT(!npages, 0)) + goto oom; + + viraddr = vma_alloc(PAGE_FLOOR(sz), flags); + if (BUILTIN_EXPECT(!viraddr, 0)) + goto oom; + + phyaddr = get_pages(npages); + if (BUILTIN_EXPECT(!phyaddr, 0)) + { + vma_free(viraddr, viraddr+npages*PAGE_SIZE); + viraddr = 0; + goto oom; + } + + if (flags & VMA_WRITE) + pflags |= PG_RW; + if (!(flags & VMA_CACHEABLE)) + pflags |= PG_PCD; + + int ret = page_map(viraddr, phyaddr, npages, pflags); + if (BUILTIN_EXPECT(ret, 0)) + { + vma_free(viraddr, viraddr+npages*PAGE_SIZE); + put_pages(phyaddr, npages); + viraddr = 0; + } + +oom: + return (void*) viraddr; +} + +void page_free(void* viraddr, size_t sz) +{ + size_t phyaddr; + + if (BUILTIN_EXPECT(!viraddr || !sz, 0)) + return; + + phyaddr = virt_to_phys((size_t)viraddr); + + vma_free((size_t) viraddr, (size_t) viraddr + PAGE_FLOOR(sz)); + + if (phyaddr) + put_pages(phyaddr, PAGE_FLOOR(sz) >> PAGE_BITS); +} + +int memory_init(void) +{ + size_t image_sz = (size_t) &kernel_end - (size_t) &kernel_start; + int ret = 0; + + // enable paging and map Multiboot modules etc. + ret = page_init(); + if (BUILTIN_EXPECT(ret, 0)) { + LOG_ERROR("Failed to initialize paging!\n"); + return ret; + } + + LOG_INFO("memory_init: base 0x%zx, image_size 0x%zx, limit 0x%zx\n", base, image_sz, limit); + + // determine available memory + atomic_int64_add(&total_pages, (limit-base) >> PAGE_BITS); + atomic_int64_add(&total_available_pages, (limit-base) >> PAGE_BITS); + + //initialize free list + init_list.start = PAGE_FLOOR((size_t) &kernel_end + (16+511)*PAGE_SIZE); + if (limit < GICD_BASE) + init_list.end = limit; + else + init_list.end = GICD_BASE; + + // determine allocated memory, we use 2MB pages to map the kernel + atomic_int64_add(&total_allocated_pages, PAGE_FLOOR((size_t) &kernel_end + 511*PAGE_SIZE) >> PAGE_BITS); + atomic_int64_sub(&total_available_pages, PAGE_FLOOR((size_t) &kernel_end + 511*PAGE_SIZE) >> PAGE_BITS); + + LOG_INFO("free list starts at 0x%zx, limit 0x%zx\n", init_list.start, init_list.end); + + ret = vma_init(); + if (BUILTIN_EXPECT(ret, 0)) + LOG_WARNING("Failed to initialize VMA regions: %d\n", ret); + + if (limit > GICD_BASE + GICD_SIZE + GICC_SIZE) { + init_list.next = kmalloc(sizeof(free_list_t)); + if (BUILTIN_EXPECT(!init_list.next, 0)) { + LOG_ERROR("Unable to allocate new element for the free list\n"); + goto oom; + } + + LOG_INFO("Add region 0x%zx - 0x%zx\n", GICD_BASE + GICD_SIZE + GICC_SIZE, limit); + + init_list.next->prev = &init_list; + init_list.next->next = NULL; + init_list.start = GICD_BASE + GICD_SIZE + GICC_SIZE; + init_list.end = limit; + } + +oom: + return ret; +} diff --git a/arch/aarch64/mm/memory.dep b/arch/aarch64/mm/memory.dep new file mode 100644 index 000000000..241b80e7f --- /dev/null +++ b/arch/aarch64/mm/memory.dep @@ -0,0 +1,29 @@ +arch/arm64/mm/memory.o: arch/arm64/mm/memory.c \ + /home/stefan/HermitCore/include/hermit/stddef.h \ + /home/stefan/HermitCore/include/hermit/config.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/stddef.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/irqflags.h \ + /home/stefan/HermitCore/include/hermit/stdlib.h \ + /home/stefan/HermitCore/include/hermit/stdio.h \ + /home/stefan/HermitCore/include/hermit/stdarg.h \ + /home/stefan/HermitCore/include/hermit/string.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/string.h \ + /home/stefan/HermitCore/include/hermit/spinlock.h \ + /home/stefan/HermitCore/include/hermit/spinlock_types.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/atomic.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/atomic32.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/atomic64.h \ + /home/stefan/HermitCore/include/hermit/tasks_types.h \ + /home/stefan/HermitCore/include/hermit/vma.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/page.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/processor.h \ + /home/stefan/HermitCore/include/hermit/signal.h \ + /home/stefan/HermitCore/include/hermit/semaphore_types.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/tasks_types.h \ + /home/stefan/HermitCore/include/hermit/errno.h \ + /home/stefan/HermitCore/include/hermit/memory.h \ + /home/stefan/HermitCore/include/hermit/logging.h \ + /home/stefan/HermitCore/include/hermit/time.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/time.h \ + /home/stefan/HermitCore/include/hermit/syscall.h \ + /home/stefan/HermitCore/include/stdlib.h diff --git a/arch/aarch64/mm/page.c b/arch/aarch64/mm/page.c new file mode 100644 index 000000000..75af10b0a --- /dev/null +++ b/arch/aarch64/mm/page.c @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2010-2018, Stefan Lankes, RWTH Aachen University + * 2014, Steffen Vogel, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * This is a 32/64 bit portable paging implementation for the x86 architecture + * using self-referenced page tables i. + * See http://www.noteblok.net/2014/06/14/bachelor/ for a detailed description. + * + * @author Steffen Vogel + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* Note that linker symbols are not variables, they have no memory + * allocated for maintaining a value, rather their address is their value. */ +extern const void kernel_start; +extern const void kernel_end; + +extern size_t l0_pgtable; + +/** Single-address space operating system => one lock for all tasks */ +static spinlock_irqsave_t page_lock = SPINLOCK_IRQSAVE_INIT; + +/** A self-reference enables direct access to all page tables */ +static size_t* const self[PAGE_LEVELS] = { + (size_t *) 0x0000FF8000000000ULL, + (size_t *) 0x0000FFFFC0000000ULL, + (size_t *) 0x0000FFFFFFE00000ULL, + (size_t *) 0x0000FFFFFFFFF000ULL +}; + +static uint8_t expect_zeroed_pages = 0; + +size_t virt_to_phys(size_t addr) +{ + size_t vpn = addr >> PAGE_BITS; // virtual page number + size_t entry = self[0][vpn]; // page table entry + size_t off = addr & ~PAGE_MASK; // offset within page + size_t phy = entry & PAGE_MASK; // physical page frame number + + return (phy | off) & ((1ULL << VIRT_BITS) - 1); +} + +/* + * get memory page size + */ +int getpagesize(void) +{ + return PAGE_SIZE; +} + +//TODO: code is missing +int page_set_flags(size_t viraddr, uint32_t npages, int flags) +{ + return -EINVAL; +} + +int __page_map(size_t viraddr, size_t phyaddr, size_t npages, size_t bits) +{ + int lvl, ret = -ENOMEM; + long vpn = viraddr >> PAGE_BITS; + long first[PAGE_LEVELS], last[PAGE_LEVELS]; + size_t page_counter = 0; + size_t cflags = 0; + + //kprintf("Map %d pages at 0x%zx\n", npages, viraddr); + + /* Calculate index boundaries for page map traversal */ + for (lvl=0; lvl> (lvl * PAGE_MAP_BITS); + last[lvl] = (vpn+npages-1) >> (lvl * PAGE_MAP_BITS); + } + + spinlock_irqsave_lock(&page_lock); + + /* Start iterating through the entries + * beginning at the root table */ + for (lvl=PAGE_LEVELS-1; lvl>=0; lvl--) { + for (vpn=first[lvl]; vpn<=last[lvl]; vpn++) { + if (lvl) { + if (!self[lvl][vpn]) { + /* There's no table available which covers the region. + * Therefore we need to create a new empty table. */ + size_t phyaddr = get_pages(1); + if (BUILTIN_EXPECT(!phyaddr, 0)) + goto out; + + /* Reference the new table within its parent */ + self[lvl][vpn] = phyaddr | PT_PT; + + /* Fill new table with zeros */ + //LOG_INFO("Clear new page table at %p\n", &self[lvl-1][vpn<= 16)) + cflags = PT_CONTIG; + else if (cflags && !(viraddr & 0xFFFFULL) && (npages-page_counter < 16)) + cflags = 0; + + if (bits & PG_DEVICE) + self[lvl][vpn] = phyaddr | PT_DEVICE | cflags; + else + self[lvl][vpn] = phyaddr | PT_MEM | cflags; + + //if (bits & PG_DEVICE) + // kprintf("viradd 0x%zx, reference 0x%zx\n", viraddr, self[lvl][vpn]); + + //if (cflags && !(viraddr & 0xFFFFULL)) + // kprintf("usre PT_CONTIG for 0x%zx, reference 0x%zx\n", viraddr, self[lvl][vpn]); + + page_counter++; + phyaddr += PAGE_SIZE; + viraddr += PAGE_SIZE; + } + } + } + + tlb_flush_range(viraddr, viraddr+npages*PAGE_SIZE); + + ret = 0; +out: + spinlock_irqsave_unlock(&page_lock); + + return ret; +} + +int page_unmap(size_t viraddr, size_t npages) +{ + if (BUILTIN_EXPECT(!npages, 0)) + return 0; + + //kprintf("Unmap %d pages at 0x%zx\n", npages, viraddr); + + spinlock_irqsave_lock(&page_lock); + + /* Start iterating through the entries. + * Only the PGT entries are removed. Tables remain allocated. */ + size_t vpn, start = viraddr>>PAGE_BITS; + for (vpn=start; vpn> PAGE_BITS; + long index[PAGE_LEVELS]; + + /* Calculate index boundaries for page map traversal */ + for (lvl=0; lvl> (lvl * PAGE_MAP_BITS); + + /* do we have already a valid entry in the page tables */ + for (lvl=PAGE_LEVELS-1; lvl>=0; lvl--) { + vpn = index[lvl]; + + if (!self[lvl][vpn]) + return 0; + } + + return 1; + } + + spinlock_irqsave_lock(&page_lock); + + if ((task->heap) && (viraddr >= task->heap->start) && (viraddr < task->heap->end)) { + size_t flags; + int ret; + + /* + * do we have a valid page table entry? => flush TLB and return + */ + if (check_pagetables(viraddr)) { + tlb_flush_one_page(viraddr); + spinlock_irqsave_unlock(&page_lock); + return 0; + } + + // on demand userspace heap mapping + viraddr &= PAGE_MASK; + + size_t phyaddr = expect_zeroed_pages ? get_zeroed_page() : get_page(); + if (BUILTIN_EXPECT(!phyaddr, 0)) { + LOG_ERROR("out of memory: task = %u\n", task->id); + goto default_handler; + } + + flags = PG_USER|PG_RW; + ret = __page_map(viraddr, phyaddr, 1, flags); + + if (BUILTIN_EXPECT(ret, 0)) { + LOG_ERROR("map_region: could not map %#lx to %#lx, task = %u\n", phyaddr, viraddr, task->id); + put_page(phyaddr); + + goto default_handler; + } + + spinlock_irqsave_unlock(&page_lock); + + return 0; + } + +default_handler: + spinlock_irqsave_unlock(&page_lock); + + return -EINVAL; +} + +// weak symbol is used to detect a Go application +void __attribute__((weak)) runtime_osinit(); + +/*static void dump_pgtable(void) +{ + size_t* l0 = &l0_pgtable; + + LOG_INFO("Dump page table tree: %p\n", l0); + + for (int i=0; i<512; i++) { + if (l0[i] != 0) { + LOG_INFO("\tx[%d] = %zx\n", i, l0[i]); + size_t* l1 = (size_t*) l0[i]; + for(int j=0; j<512; j++) { + if (l1[j] != 0) { + LOG_INFO("\t\ty[%d] = %zx\n", j, l1[j]); + } + } + } + } +}*/ + +int page_init(void) +{ + // do we have Go application? => weak symbol isn't zeroe + // => Go expect zeroed pages => set zeroed_pages to true + if (runtime_osinit) { + expect_zeroed_pages = 1; + LOG_INFO("Detect Go runtime! Consequently, HermitCore zeroed heap.\n"); + } + + return 0; +} diff --git a/arch/aarch64/mm/page.dep b/arch/aarch64/mm/page.dep new file mode 100644 index 000000000..fe44b3740 --- /dev/null +++ b/arch/aarch64/mm/page.dep @@ -0,0 +1,31 @@ +arch/arm64/mm/page.o: arch/arm64/mm/page.c \ + /home/stefan/HermitCore/include/hermit/stdio.h \ + /home/stefan/HermitCore/include/hermit/config.h \ + /home/stefan/HermitCore/include/hermit/stddef.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/stddef.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/irqflags.h \ + /home/stefan/HermitCore/include/hermit/stdarg.h \ + /home/stefan/HermitCore/include/hermit/memory.h \ + /home/stefan/HermitCore/include/hermit/errno.h \ + /home/stefan/HermitCore/include/hermit/string.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/string.h \ + /home/stefan/HermitCore/include/hermit/spinlock.h \ + /home/stefan/HermitCore/include/hermit/spinlock_types.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/atomic.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/atomic32.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/atomic64.h \ + /home/stefan/HermitCore/include/hermit/tasks_types.h \ + /home/stefan/HermitCore/include/hermit/vma.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/page.h \ + /home/stefan/HermitCore/include/hermit/stdlib.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/processor.h \ + /home/stefan/HermitCore/include/hermit/signal.h \ + /home/stefan/HermitCore/include/hermit/semaphore_types.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/tasks_types.h \ + /home/stefan/HermitCore/include/hermit/tasks.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/tasks.h \ + /home/stefan/HermitCore/include/hermit/logging.h \ + /home/stefan/HermitCore/include/hermit/time.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/time.h \ + /home/stefan/HermitCore/include/hermit/syscall.h \ + /home/stefan/HermitCore/include/stdlib.h diff --git a/arch/aarch64/mm/vma.c b/arch/aarch64/mm/vma.c new file mode 100644 index 000000000..22846abf0 --- /dev/null +++ b/arch/aarch64/mm/vma.c @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2014, Steffen Vogel, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +int vma_arch_init(void) +{ + // reserve virtual address space for IO ports + return vma_add((size_t)0x00, 2*PAGE_SIZE, VMA_READ|VMA_WRITE); +} diff --git a/arch/aarch64/mm/vma.dep b/arch/aarch64/mm/vma.dep new file mode 100644 index 000000000..0ffc476a5 --- /dev/null +++ b/arch/aarch64/mm/vma.dep @@ -0,0 +1,9 @@ +arch/arm64/mm/vma.o: arch/arm64/mm/vma.c \ + /home/stefan/HermitCore/include/hermit/vma.h \ + /home/stefan/HermitCore/include/hermit/stddef.h \ + /home/stefan/HermitCore/include/hermit/config.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/stddef.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/irqflags.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/page.h \ + /home/stefan/HermitCore/include/hermit/stdlib.h \ + /home/stefan/HermitCore/arch/arm64/include/asm/processor.h diff --git a/arch/x86/CMakeLists.txt b/arch/x86/CMakeLists.txt index cb88ba498..f47f2fc95 100644 --- a/arch/x86/CMakeLists.txt +++ b/arch/x86/CMakeLists.txt @@ -4,6 +4,7 @@ include(../../cmake/HermitCore.cmake) project(arch_x86_kernel C ASM_NASM) set_parent(X86_KERNEL_TARGET ${PROJECT_NAME}) +set_parent(ARCH_KERNEL_TARGET ${PROJECT_NAME}) set_parent(X86_KERNEL_ASM_TARGET ${X86_KERNEL_TARGET}_asm) set_parent(X86_KERNEL_C_TARGET ${X86_KERNEL_TARGET}_c) diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index e273c6f57..8a8c33bd5 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -108,8 +108,10 @@ static inline size_t sign_extend(ssize_t addr, int bits) /// Align to next 2M boundary #define PAGE_2M_CEIL(addr) (((addr) + (1L << 21) - 1) & ((~0L) << 21)) -/// Align to nex 2M boundary +/// Align to next 2M boundary #define PAGE_2M_FLOOR(addr) ( (addr) & ((~0L) << 21)) +/// Align end of the kernel +#define KERNEL_END_CEIL(addr) (PAGE_2M_CEIL((addr))) /// Page is present #define PG_PRESENT (1 << 0) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 2b8df557f..685d00117 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -307,6 +307,21 @@ int reset_fsgs(int32_t core_id); // determine the cpu features int cpu_detection(void); +extern size_t hbmem_base; +extern size_t hbmem_size; + +inline static int has_hbmem(void) { + return (hbmem_base != 0); +} + +inline static size_t get_hbmem_base(void) { + return hbmem_base; +} + +inline static size_t get_hbmem_size(void) { + return hbmem_base; +} + inline static uint32_t has_fpu(void) { return (cpu_info.feature1 & CPU_FEATURE_FPU); } diff --git a/arch/x86/include/asm/stddef.h b/arch/x86/include/asm/stddef.h index 42d68ed0a..4bbb82f6e 100644 --- a/arch/x86/include/asm/stddef.h +++ b/arch/x86/include/asm/stddef.h @@ -82,6 +82,7 @@ typedef long off_t; typedef unsigned long long size_t; /// Pointer differences typedef long long ptrdiff_t; +typedef unsigned long uintptr_t; #ifdef __KERNEL__ typedef long long ssize_t; typedef long long off_t; @@ -208,6 +209,10 @@ typedef struct { const int32_t is_uhyve(void); const int32_t is_single_kernel(void); +const char* get_cmdline(void); +int init_rcce(void); +void print_cpu_status(int isle); + #ifdef __cplusplus } #endif diff --git a/arch/x86/kernel/processor.c b/arch/x86/kernel/processor.c index f6b991c11..73d61667f 100644 --- a/arch/x86/kernel/processor.c +++ b/arch/x86/kernel/processor.c @@ -32,6 +32,10 @@ #include #include #include +#include +#include +#include +#include #include /* @@ -46,8 +50,50 @@ extern void* Lpatch1; extern void* Lpatch2; extern atomic_int32_t current_boot_id; +islelock_t* rcce_lock = NULL; +rcce_mpb_t* rcce_mpb = NULL; + extern void isrsyscall(void); +const char* get_cmdline(void) +{ + if (mb_info) + return (char*) (size_t) mb_info->cmdline; + + return NULL; +} + +int init_rcce(void) +{ + size_t addr, flags = PG_GLOBAL|PG_RW; + + addr = vma_alloc(PAGE_SIZE, VMA_READ|VMA_WRITE|VMA_CACHEABLE); + if (BUILTIN_EXPECT(!addr, 0)) + return -ENOMEM; + if (has_nx()) + flags |= PG_XD; + if (page_map(addr, phy_rcce_internals, 1, flags)) { + vma_free(addr, addr + PAGE_SIZE); + return -ENOMEM; + } + + rcce_lock = (islelock_t*) addr; + rcce_mpb = (rcce_mpb_t*) (addr + CACHE_LINE*(RCCE_MAXNP+1)); + + LOG_INFO("Map rcce_lock at %p and rcce_mpb at %p\n", rcce_lock, rcce_mpb); + + return 0; +} + +void print_cpu_status(int isle) +{ + static spinlock_t status_lock = SPINLOCK_INIT; + + spinlock_lock(&status_lock); + LOG_INFO("CPU %d of isle %d is now online (CR0 0x%zx, CR4 0x%zx)\n", CORE_ID, isle, read_cr0(), read_cr4()); + spinlock_unlock(&status_lock); +} + cpu_info_t cpu_info = { 0, 0, 0, 0, 0}; static char cpu_vendor[13] = {[0 ... 12] = 0}; static char cpu_brand[4*3*sizeof(uint32_t)+1] = {[0 ... 4*3*sizeof(uint32_t)] = 0}; diff --git a/arch/x86/kernel/tasks.c b/arch/x86/kernel/tasks.c index 4a3a89045..ea6f93073 100644 --- a/arch/x86/kernel/tasks.c +++ b/arch/x86/kernel/tasks.c @@ -38,6 +38,7 @@ #include #include #include +#include #define TLS_ALIGNBITS 5 #define TLS_ALIGNSIZE (1L << TLS_ALIGNBITS) @@ -228,3 +229,15 @@ void wakeup_core(uint32_t core_id) LOG_DEBUG("wakeup core %d\n", core_id); apic_send_ipi(core_id, 121); } + +void reschedule(void) +{ + size_t** stack; + uint8_t flags; + + flags = irq_nested_disable(); + stack = scheduler(); + if (stack) + switch_context(stack); + irq_nested_enable(flags); +} diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c index a91b0359d..41e874f63 100644 --- a/arch/x86/mm/page.c +++ b/arch/x86/mm/page.c @@ -50,28 +50,9 @@ * allocated for maintaining a value, rather their address is their value. */ extern const void kernel_start; -/// This page is reserved for copying -#define PAGE_TMP (PAGE_FLOOR((size_t) &kernel_start) - PAGE_SIZE) - /** Single-address space operating system => one lock for all tasks */ static spinlock_irqsave_t page_lock = SPINLOCK_IRQSAVE_INIT; -/** This PGD table is initialized in entry.asm */ -extern size_t* boot_map; - -#if 0 -/** A self-reference enables direct access to all page tables */ -static size_t * const self[PAGE_LEVELS] = { - (size_t *) 0xFFC00000, - (size_t *) 0xFFFFF000 -}; - -/** An other self-reference for page_map_copy() */ -static size_t * const other[PAGE_LEVELS] = { - (size_t *) 0xFF800000, - (size_t *) 0xFFFFE000 -}; -#else /** A self-reference enables direct access to all page tables */ static size_t* const self[PAGE_LEVELS] = { (size_t *) 0xFFFFFF8000000000, @@ -80,17 +61,6 @@ static size_t* const self[PAGE_LEVELS] = { (size_t *) 0xFFFFFFFFFFFFF000 }; -#if 0 -/** An other self-reference for page_map_copy() */ -static size_t * const other[PAGE_LEVELS] = { - (size_t *) 0xFFFFFF0000000000, - (size_t *) 0xFFFFFFFF80000000, - (size_t *) 0xFFFFFFFFFFC00000, - (size_t *) 0xFFFFFFFFFFFFE000 -}; -#endif -#endif - static uint8_t expect_zeroed_pages = 0; size_t virt_to_phys(size_t addr) diff --git a/cmake/HermitCore-Configuration.cmake b/cmake/HermitCore-Configuration.cmake index 0f125be79..6bdea2e02 100644 --- a/cmake/HermitCore-Configuration.cmake +++ b/cmake/HermitCore-Configuration.cmake @@ -29,8 +29,20 @@ option(DYNAMIC_TICKS option(SAVE_FPU "Save FPU registers on context switch" ON) -option(HAVE_ARCH_MEMSET "Use machine specific version of memset" OFF) -option(HAVE_ARCH_MEMCPY "Use machine specific version of memcpy" OFF) -option(HAVE_ARCH_STRLEN "Use machine specific version of strlen" OFF) -option(HAVE_ARCH_STRCPY "Use machine specific version of strcpy" OFF) -option(HAVE_ARCH_STRNCPY "Use machine specific version of strncpy" OFF) +set(HAVE_ARCH_MEMSET "1" CACHE STRING + "Use machine specific version of memset") +set(HAVE_ARCH_MEMCPY "1" CACHE STRING + "Use machine specific version of memcpy") +set(HAVE_ARCH_STRLEN "1" CACHE STRING + "Use machine specific version of strlen") +if("${HERMIT_ARCH}" STREQUAL "aarch64") +set(HAVE_ARCH_STRCPY "0" CACHE STRING + "Use machine specific version of strcpy") +set(HAVE_ARCH_STRNCPY "0" CACHE STRING + "Use machine specific version of strncpy") +else() +set(HAVE_ARCH_STRCPY "0" CACHE STRING + "Use machine specific version of strcpy") +set(HAVE_ARCH_STRNCPY "0" CACHE STRING + "Use machine specific version of strncpy") +endif() diff --git a/cmake/HermitCore-Toolchain-aarch64-bootstrap.cmake b/cmake/HermitCore-Toolchain-aarch64-bootstrap.cmake new file mode 100644 index 000000000..35d934561 --- /dev/null +++ b/cmake/HermitCore-Toolchain-aarch64-bootstrap.cmake @@ -0,0 +1,9 @@ +include(${CMAKE_CURRENT_LIST_DIR}/HermitCore-Toolchain-aarch64.cmake) +include_guard() + +set(CMAKE_C_COMPILER_WORKS 1 CACHE INTERNAL "") +set(CMAKE_CXX_COMPILER_WORKS 1 CACHE INTERNAL "") + +# bootstrap toolchain cannot compile neither Go nor Fortran +unset(CMAKE_Go_COMPILER) +unset(CMAKE_Fortran_COMPILER) diff --git a/cmake/HermitCore-Toolchain-aarch64.cmake b/cmake/HermitCore-Toolchain-aarch64.cmake new file mode 100644 index 000000000..967569a6e --- /dev/null +++ b/cmake/HermitCore-Toolchain-aarch64.cmake @@ -0,0 +1,33 @@ +include(${CMAKE_CURRENT_LIST_DIR}/HermitCore-Utils.cmake) +include_guard() + +# let user provide a different path to the toolchain +set_default(TOOLCHAIN_BIN_DIR /opt/hermit/bin) + +set(TARGET_ARCH aarch64-hermit) +set(HERMIT_KERNEL_FLAGS + -Wall -O2 -mgeneral-regs-only + -fno-var-tracking-assignments -fstrength-reduce + -fomit-frame-pointer -finline-functions -ffreestanding + -nostdinc -fno-stack-protector + -fno-delete-null-pointer-checks + -falign-jumps=1 -falign-loops=1 + -fno-common -Wframe-larger-than=1024 + -fno-strict-aliasing -fno-asynchronous-unwind-tables + -fno-strict-overflow) + +set(HERMIT_APP_FLAGS + -O3 -ftree-vectorize) + +set(CMAKE_SYSTEM_NAME Generic) + +# point CMake to our toolchain +set(CMAKE_C_COMPILER ${TOOLCHAIN_BIN_DIR}/${TARGET_ARCH}-gcc) +set(CMAKE_CXX_COMPILER ${TOOLCHAIN_BIN_DIR}/${TARGET_ARCH}-g++) +set(CMAKE_Fortran_COMPILER ${TOOLCHAIN_BIN_DIR}/${TARGET_ARCH}-gfortran) +set(CMAKE_Go_COMPILER ${TOOLCHAIN_BIN_DIR}/${TARGET_ARCH}-gccgo) + +# hinting the prefix and location is needed in order to correctly detect +# binutils +set(_CMAKE_TOOLCHAIN_PREFIX "${TARGET_ARCH}-") +set(_CMAKE_TOOLCHAIN_LOCATION ${TOOLCHAIN_BIN_DIR}) diff --git a/cmake/HermitCore-Toolchain-x86.cmake b/cmake/HermitCore-Toolchain-x86.cmake index ec875cfc5..25fba21d8 100644 --- a/cmake/HermitCore-Toolchain-x86.cmake +++ b/cmake/HermitCore-Toolchain-x86.cmake @@ -5,7 +5,22 @@ include_guard() set_default(TOOLCHAIN_BIN_DIR /opt/hermit/bin) set(TARGET_ARCH x86_64-hermit) +set(HERMIT_KERNEL_FLAGS + -m64 -Wall -O2 -mno-red-zone + -fno-var-tracking-assignments -fstrength-reduce + -fomit-frame-pointer -finline-functions -ffreestanding + -nostdinc -fno-stack-protector -mno-sse -mno-mmx + -mno-sse2 -mno-3dnow -mno-avx + -fno-delete-null-pointer-checks + -falign-jumps=1 -falign-loops=1 + -mno-80387 -mno-fp-ret-in-387 -mskip-rax-setup + -fno-common -Wframe-larger-than=1024 + -fno-strict-aliasing -fno-asynchronous-unwind-tables + -fno-strict-overflow -maccumulate-outgoing-args) +set(HERMIT_APP_FLAGS + -m64 -mtls-direct-seg-refs -O3 -ftree-vectorize) + set(CMAKE_SYSTEM_NAME Generic) # point CMake to our toolchain @@ -18,9 +33,3 @@ set(CMAKE_Go_COMPILER ${TOOLCHAIN_BIN_DIR}/${TARGET_ARCH}-gccgo) # binutils set(_CMAKE_TOOLCHAIN_PREFIX "${TARGET_ARCH}-") set(_CMAKE_TOOLCHAIN_LOCATION ${TOOLCHAIN_BIN_DIR}) - -option(HAVE_ARCH_MEMSET "Use machine specific version of memset" ON) -option(HAVE_ARCH_MEMCPY "Use machine specific version of memcpy" ON) -option(HAVE_ARCH_STRLEN "Use machine specific version of strlen" ON) -option(HAVE_ARCH_STRCPY "Use machine specific version of strcpy" ON) -option(HAVE_ARCH_STRNCPY "Use machine specific version of strncpy" ON) diff --git a/cmake/HermitCore.cmake b/cmake/HermitCore.cmake index 4d6ac9d00..599a522d2 100644 --- a/cmake/HermitCore.cmake +++ b/cmake/HermitCore.cmake @@ -34,6 +34,8 @@ if(NOT CMAKE_TOOLCHAIN_FILE) set(CMAKE_TOOLCHAIN_FILE ${CMAKE_CURRENT_LIST_DIR}/HermitCore-Toolchain-${HERMIT_ARCH}${_BOOTSTRAP_ARCH_SUFFIX}.cmake) endif() +# NASM is only required on x86_64 +if("${HERMIT_ARCH}" STREQUAL "x86") # NASM detection will change binary format depending on host system, but # we only want to generate elf64 for HermitCore # Note: Has to be set *before* ASM_NASM is enabled @@ -46,22 +48,7 @@ enable_language(ASM_NASM) # Note: Has to be set *after* ASM_NASM is enabled set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -I ${CMAKE_BINARY_DIR}/include/") - -set(HERMIT_KERNEL_FLAGS - -m64 -Wall -O2 -mno-red-zone - -fno-var-tracking-assignments -fstrength-reduce - -fomit-frame-pointer -finline-functions -ffreestanding - -nostdinc -fno-stack-protector -mno-sse -mno-mmx - -mno-sse2 -mno-3dnow -mno-avx - -fno-delete-null-pointer-checks - -falign-jumps=1 -falign-loops=1 - -mno-80387 -mno-fp-ret-in-387 -mskip-rax-setup - -fno-common -Wframe-larger-than=1024 - -fno-strict-aliasing -fno-asynchronous-unwind-tables - -fno-strict-overflow -maccumulate-outgoing-args) - -set(HERMIT_APP_FLAGS - -m64 -mtls-direct-seg-refs -O3 -ftree-vectorize) +endif() if(MTUNE) set(HERMIT_KERNEL_FLAGS ${HERMIT_KERNEL_FLAGS} -mtune=${MTUNE}) diff --git a/drivers/net/e1000.c b/drivers/net/e1000.c index 330c16f5c..0a027e7b7 100644 --- a/drivers/net/e1000.c +++ b/drivers/net/e1000.c @@ -105,7 +105,7 @@ static struct netif* mynetif = NULL; static inline uint32_t e1000_read(volatile uint8_t* base, uint32_t off) { -#if 1 +#if __x86_64__ uint32_t ret; asm volatile ("movl (%1), %0" : "=r"(ret) : "r"(base+off)); diff --git a/drivers/net/mmnif.c b/drivers/net/mmnif.c index eab0284c6..cecf5c773 100644 --- a/drivers/net/mmnif.c +++ b/drivers/net/mmnif.c @@ -70,7 +70,9 @@ #include #include #include +#if __x86_64__ #include +#endif #include @@ -230,7 +232,6 @@ inline static int mmnif_trigger_irq(int dest_ip) dest = 0; else dest = 0; // TODO: determine physical apic id of the destination - return apic_send_ipi(dest, MMNIF_IRQ); } @@ -602,8 +603,10 @@ err_t mmnif_init(struct netif *netif) // protect mmnif shared segments by the NX flag flags = PG_RW|PG_GLOBAL; +#if __x86_64__ if (has_nx()) flags |= PG_XD; +#endif // map physical address in the virtual address space err = page_map((size_t) header_start_address, (size_t) header_phy_start_address, (nodes * header_size) >> PAGE_BITS, flags); diff --git a/drivers/net/uhyve-net.h b/drivers/net/uhyve-net.h index 6ec120d3a..c031f0d30 100755 --- a/drivers/net/uhyve-net.h +++ b/drivers/net/uhyve-net.h @@ -39,11 +39,6 @@ #define TX_BUF_LEN 2048 #define TX_BUF_NUM 1 //number of tx buffer -#define UHYVE_PORT_NETINFO 0x505 -#define UHYVE_PORT_NETWRITE 0x506 -#define UHYVE_PORT_NETREAD 0x507 -#define UHYVE_PORT_NETSTAT 0x508 - // UHYVE_PORT_NETINFO typedef struct { /* OUT */ diff --git a/include/hermit/config.h.in b/include/hermit/config.h.in index 2c9e9c9ec..95770dae0 100644 --- a/include/hermit/config.h.in +++ b/include/hermit/config.h.in @@ -11,16 +11,16 @@ #cmakedefine DYNAMIC_TICKS /* Define to use machine specific version of memcpy */ -#cmakedefine HAVE_ARCH_MEMCPY +#cmakedefine HAVE_ARCH_MEMCPY (@HAVE_ARCH_MEMCPY@) /* Define to use machine specific version of memset */ -#cmakedefine HAVE_ARCH_MEMSET +#cmakedefine HAVE_ARCH_MEMSET (@HAVE_ARCH_MEMSET@) /* Define to use machine specific version of strcpy */ -#cmakedefine HAVE_ARCH_STRCPY +#cmakedefine HAVE_ARCH_STRCPY (@HAVE_ARCH_STRCPY@) /* Define to use machine specific version of strlen */ -#cmakedefine HAVE_ARCH_STRLEN +#cmakedefine HAVE_ARCH_STRLEN (@HAVE_ARCH_STRLEN@) /* Define to use machine specific version of strncpy */ -#cmakedefine HAVE_ARCH_STRNCPY +#cmakedefine HAVE_ARCH_STRNCPY (@HAVE_ARCH_STRNCPY@) diff --git a/include/hermit/memory.h b/include/hermit/memory.h index 89ed45f0c..022e22120 100644 --- a/include/hermit/memory.h +++ b/include/hermit/memory.h @@ -78,7 +78,7 @@ int hbmem_put_pages(size_t phyaddr, size_t npages); */ static inline int hbmem_put_page(size_t phyaddr) { return hbmem_put_pages(phyaddr, 1); } -/** @brief check if high memory bandwidth is available */ +/** @brief check if high memory bandwidth is available */ int is_hbmem_available(void); /** @brief Initialize the high bandwidth memory subsystem */ diff --git a/include/hermit/stddef.h b/include/hermit/stddef.h index 818ad5fc8..fc78b8397 100644 --- a/include/hermit/stddef.h +++ b/include/hermit/stddef.h @@ -43,12 +43,12 @@ extern "C" { #endif // size of the whole application -extern const size_t image_size; +extern size_t image_size; #define TIMER_FREQ 100 /* in HZ */ #define CLOCK_TICK_RATE 1193182 /* 8254 chip's internal oscillator frequency */ #define CACHE_LINE 64 -#define HEAP_START (PAGE_2M_CEIL((size_t)&kernel_start + image_size) + 4*PAGE_SIZE) +#define HEAP_START (PAGE_2M_CEIL(((size_t)&kernel_start + image_size + (16ULL << 10)))) #define HEAP_SIZE (1ULL << 32) #define KMSG_SIZE 0x1000 #define INT_SYSCALL 0x80 @@ -59,12 +59,24 @@ extern const size_t image_size; #define DYNAMIC_TICKS -#define UHYVE_PORT_WRITE 0x499 -#define UHYVE_PORT_OPEN 0x500 -#define UHYVE_PORT_CLOSE 0x501 -#define UHYVE_PORT_READ 0x502 -#define UHYVE_PORT_EXIT 0x503 -#define UHYVE_PORT_LSEEK 0x504 +#define UHYVE_PORT_WRITE 0x400 +#define UHYVE_PORT_OPEN 0x440 +#define UHYVE_PORT_CLOSE 0x480 +#define UHYVE_PORT_READ 0x500 +#define UHYVE_PORT_EXIT 0x540 +#define UHYVE_PORT_LSEEK 0x580 + +// Networkports +#define UHYVE_PORT_NETINFO 0x600 +#define UHYVE_PORT_NETWRITE 0x640 +#define UHYVE_PORT_NETREAD 0x680 +#define UHYVE_PORT_NETSTAT 0x700 + +/* Ports and data structures for uhyve command line arguments and envp + * forwarding */ +#define UHYVE_PORT_CMDSIZE 0x740 +#define UHYVE_PORT_CMDVAL 0x780 + #define BUILTIN_EXPECT(exp, b) __builtin_expect((exp), (b)) //#define BUILTIN_EXPECT(exp, b) (exp) diff --git a/include/hermit/virtio_types.h b/include/hermit/virtio_types.h index 4792dea96..5a0381996 100644 --- a/include/hermit/virtio_types.h +++ b/include/hermit/virtio_types.h @@ -39,11 +39,10 @@ * - __le{16,32,64} for standard-compliant virtio devices */ -typedef uint8_t __u8; +typedef uint8_t __u8; typedef uint16_t __u16; typedef uint32_t __u32; typedef uint64_t __u64; -typedef size_t uintptr_t; #define __bitwise__ diff --git a/kernel/main.c b/kernel/main.c index 5e873c514..1a53a9a46 100644 --- a/kernel/main.c +++ b/kernel/main.c @@ -34,13 +34,13 @@ #include #include #include -#include -#include #include #include #include #include +#ifdef __x86_64__ #include +#endif #include #include @@ -66,10 +66,6 @@ #define HERMIT_PORT 0x494E #define HERMIT_MAGIC 0x7E317 -/* Ports and data structures for command line args + envp forwarding to uhyve */ -#define UHYVE_PORT_CMDSIZE 0x509 -#define UHYVE_PORT_CMDVAL 0x510 - typedef struct { int argc; int argsz[MAX_ARGC_ENVC]; @@ -97,9 +93,6 @@ extern const void __bss_start; extern const void percore_start; extern const void percore_end0; extern const void percore_end; -extern char __BUILD_DATE; -extern size_t hbmem_base; -extern size_t hbmem_size; /* Page frame counters */ extern atomic_int64_t total_pages; @@ -116,9 +109,6 @@ extern uint8_t hcip[4]; extern uint8_t hcgateway[4]; extern uint8_t hcmask[4]; -islelock_t* rcce_lock = NULL; -rcce_mpb_t* rcce_mpb = NULL; - extern void signal_init(); static int hermit_init(void) @@ -134,6 +124,7 @@ static int hermit_init(void) memcpy((char*) &percore_start + i*sz, (char*) &percore_start, sz); koutput_init(); + system_init(); irq_init(); timer_init(); @@ -144,15 +135,6 @@ static int hermit_init(void) return 0; } -static void print_status(void) -{ - static spinlock_t status_lock = SPINLOCK_INIT; - - spinlock_lock(&status_lock); - LOG_INFO("CPU %d of isle %d is now online (CR0 0x%zx, CR4 0x%zx)\n", CORE_ID, isle, read_cr0(), read_cr4()); - spinlock_unlock(&status_lock); -} - static void tcpip_init_done(void* arg) { sys_sem_t* sem = (sys_sem_t*)arg; @@ -227,6 +209,11 @@ static int init_netifs(void) netifapi_netif_set_default(&default_netif); netifapi_netif_set_up(&default_netif); } else { +#ifdef __aarch64__ + LOG_ERROR("Unable to add the network interface\n"); + + return -ENODEV; +#else /* Clear network address because we use DHCP to get an ip address */ IP_ADDR4(&gw, 0,0,0,0); IP_ADDR4(&ipaddr, 0,0,0,0); @@ -258,13 +245,13 @@ success: int ip_counter = 0; /* wait for ip address */ while(!ip_2_ip4(&default_netif.ip_addr)->addr && (ip_counter < 20)) { - uint64_t end_tsc, start_tsc = rdtsc(); + uint64_t end_tsc, start_tsc = get_rdtsc(); do { if (ip_2_ip4(&default_netif.ip_addr)->addr) return 0; check_workqueues(); - end_tsc = rdtsc(); + end_tsc = get_rdtsc(); } while(((end_tsc - start_tsc) / (get_cpu_frequency() * 1000)) < DHCP_FINE_TIMER_MSECS); dhcp_fine_tmr(); @@ -279,6 +266,7 @@ success: if (!ip_2_ip4(&default_netif.ip_addr)->addr) return -ENODEV; +#endif } return 0; @@ -294,7 +282,7 @@ int network_shutdown(void) lwip_close(s); } - mmnif_shutdown(); + //mmnif_shutdown(); //stats_display(); return 0; @@ -308,7 +296,7 @@ int smp_main(void) enable_dynticks(); #endif - print_status(); + print_cpu_status(isle); /* wait for the other cpus */ while(atomic_int32_read(&cpu_online) < atomic_int32_read(&possible_cpus)) { @@ -324,31 +312,11 @@ int smp_main(void) } #endif -static int init_rcce(void) -{ - size_t addr, flags = PG_GLOBAL|PG_RW; - - addr = vma_alloc(PAGE_SIZE, VMA_READ|VMA_WRITE|VMA_CACHEABLE); - if (BUILTIN_EXPECT(!addr, 0)) - return -ENOMEM; - if (has_nx()) - flags |= PG_XD; - if (page_map(addr, phy_rcce_internals, 1, flags)) { - vma_free(addr, addr + PAGE_SIZE); - return -ENOMEM; - } - - rcce_lock = (islelock_t*) addr; - rcce_mpb = (rcce_mpb_t*) (addr + CACHE_LINE*(RCCE_MAXNP+1)); - - LOG_INFO("Map rcce_lock at %p and rcce_mpb at %p\n", rcce_lock, rcce_mpb); - - return 0; -} - int libc_start(int argc, char** argv, char** env); -// init task => creates all other tasks an initialize the LwIP +char* itoa(uint64_t input, char* str); + +// init task => creates all other tasks and initializes the LwIP static int initd(void* arg) { int s = -1, c = -1; @@ -385,8 +353,12 @@ static int initd(void* arg) vma_free(curr_task->heap->start, curr_task->heap->start+PAGE_SIZE); vma_add(curr_task->heap->start, curr_task->heap->start+PAGE_SIZE, VMA_HEAP|VMA_USER); +#ifndef __aarch64__ // initialize network err = init_netifs(); +#else + err = -EINVAL; +#endif if (is_uhyve()) { int i; @@ -423,10 +395,10 @@ static int initd(void* arg) LOG_INFO("Boot time: %d ms\n", (get_clock_tick() * 1000) / TIMER_FREQ); libc_start(uhyve_cmdsize.argc, uhyve_cmdval.argv, uhyve_cmdval.envp); - for(i=0; i 0) + LOG_INFO("Processor frequency: %u MHz\n", get_cpu_frequency()); LOG_INFO("Total memory: %zd MiB\n", atomic_int64_read(&total_pages) * PAGE_SIZE / (1024ULL*1024ULL)); LOG_INFO("Current allocated memory: %zd KiB\n", atomic_int64_read(&total_allocated_pages) * PAGE_SIZE / 1024ULL); LOG_INFO("Current available memory: %zd MiB\n", atomic_int64_read(&total_available_pages) * PAGE_SIZE / (1024ULL*1024ULL)); LOG_INFO("Core %d is the boot processor\n", boot_processor); LOG_INFO("System is able to use %d processors\n", possible_cpus); - if (mb_info) - LOG_INFO("Kernel cmdline: %s\n", (char*) (size_t) mb_info->cmdline); - if (hbmem_base) - LOG_INFO("Found high bandwidth memory at 0x%zx (size 0x%zx)\n", hbmem_base, hbmem_size); + if (get_cmdline()) + LOG_INFO("Kernel cmdline: %s\n", get_cmdline()); + if (has_hbmem()) + LOG_INFO("Found high bandwidth memory at 0x%zx (size 0x%zx)\n", get_hbmem_base(), get_hbmem_size()); #if 0 print_pci_adapters(); @@ -632,7 +605,7 @@ int hermit_main(void) while(atomic_int32_read(&cpu_online) < atomic_int32_read(&possible_cpus)) PAUSE; - print_status(); + print_cpu_status(isle); //vma_dump(); create_kernel_task_on_core(NULL, initd, NULL, NORMAL_PRIO, boot_processor); diff --git a/kernel/syscall.c b/kernel/syscall.c index d10691dea..dd4ae63c3 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include diff --git a/kernel/tasks.c b/kernel/tasks.c index 6a50c405a..40e46cb12 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -328,7 +328,7 @@ void finish_task_switch(void) if (old->status == TASK_FINISHED) { /* cleanup task */ if (old->stack) { - LOG_INFO("Release stack at 0x%zx\n", old->stack); + //LOG_INFO("Release stack at 0x%zx\n", old->stack); destroy_stack(old->stack, DEFAULT_STACK_SIZE); old->stack = NULL; } @@ -378,11 +378,12 @@ void NORETURN do_exit(int arg) // do we need to release the TLS? tls_addr = (void*)get_tls(); if (tls_addr) { - LOG_INFO("Release TLS at %p\n", (char*)tls_addr - curr_task->tls_size); + //LOG_INFO("Release TLS at %p\n", (char*)tls_addr - curr_task->tls_size); kfree((char*)tls_addr - curr_task->tls_size - TLS_OFFSET); } curr_task->status = TASK_FINISHED; + reschedule(); irq_nested_enable(flags); @@ -842,6 +843,7 @@ size_t** scheduler(void) curr_task = task_list_pop_front(&readyqueues[core_id].queue[prio-1]); if(BUILTIN_EXPECT(curr_task == NULL, 0)) { + kputs("Kernel panic: No task in readyqueue\n"); LOG_ERROR("Kernel panic: No task in readyqueue\n"); while(1); } @@ -895,15 +897,3 @@ int get_task(tid_t id, task_t** task) return 0; } - - -void reschedule(void) -{ - size_t** stack; - uint8_t flags; - - flags = irq_nested_disable(); - if ((stack = scheduler())) - switch_context(stack); - irq_nested_enable(flags); -} diff --git a/libkern/printf.c b/libkern/printf.c index a666af6cc..48cdd48a2 100644 --- a/libkern/printf.c +++ b/libkern/printf.c @@ -35,11 +35,11 @@ */ -/* - * HermitCore's printf implementation is based on a implementation which was - * published at http://www.pagetable.com/?p=298. +/* + * HermitCore's printf implementation is based on a implementation which was + * published at http://www.pagetable.com/?p=298. * The authors built a full-featured standalone version of printf(). The - * base code has been taken from FreeBSD (sys/kern/subr_prf.c) and is + * base code has been taken from FreeBSD (sys/kern/subr_prf.c) and is * consequently BSD-licensed. Unnecessary functions have been removed and * all typedefs required have been added. */ @@ -63,7 +63,6 @@ typedef unsigned long u_long; typedef unsigned short u_short; typedef unsigned long long u_quad_t; typedef long long quad_t; -typedef unsigned long uintptr_t; #define NBBY 8 /* number of bits in a byte */ static char const hex2ascii_data[] = "0123456789abcdefghijklmnopqrstuvwxyz"; #define hex2ascii(hex) (hex2ascii_data[hex]) @@ -486,7 +485,7 @@ int kprintf(const char *fmt, ...) va_list ap; va_start(ap, fmt); - ret = kvprintf(fmt, + ret = kvprintf(fmt, _putchar, /* output function */ NULL, /* additional argument for the output function */ 10, ap); diff --git a/libkern/stdio.c b/libkern/stdio.c index 0b1976589..218c4e8cb 100644 --- a/libkern/stdio.c +++ b/libkern/stdio.c @@ -42,7 +42,7 @@ spinlock_irqsave_t stdio_lock = SPINLOCK_IRQSAVE_INIT; the binary. => no valid kernel messages */ /* static */ unsigned char kmessages[KMSG_SIZE+1] __attribute__ ((section(".kmsg"))) = {[0 ... KMSG_SIZE] = 0x00}; -int koutput_init(void) + int koutput_init(void) { if (is_single_kernel()) uart_init(); diff --git a/lwip b/lwip index d9c0ff8d2..8df4dc73f 160000 --- a/lwip +++ b/lwip @@ -1 +1 @@ -Subproject commit d9c0ff8d247d5398bf96f00e61a74e9701fdbd0f +Subproject commit 8df4dc73fdae29ff2af611f3ea9aa8ad70c333ca diff --git a/mm/vma.c b/mm/vma.c index e55f0356f..ac71b8dc7 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -55,11 +55,11 @@ int vma_init(void) LOG_INFO("vma_init: reserve vma region 0x%llx - 0x%llx\n", PAGE_2M_FLOOR((size_t) &kernel_start), - PAGE_2M_CEIL((size_t) &kernel_start + image_size)); + KERNEL_END_CEIL((size_t) &kernel_start + image_size)); // add Kernel ret = vma_add(PAGE_2M_FLOOR((size_t) &kernel_start), - PAGE_2M_CEIL((size_t) &kernel_start + image_size), + KERNEL_END_CEIL((size_t) &kernel_start + image_size), VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE); if (BUILTIN_EXPECT(ret, 0)) goto out; diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 6005db92b..8b18b205f 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -5,7 +5,7 @@ include(../cmake/HermitCore-Paths.cmake) add_compile_options(-std=c99) -add_executable(proxy proxy.c utils.c uhyve.c uhyve-net.c) +add_executable(proxy proxy.c utils.c uhyve.c uhyve-net.c uhyve-x86_64.c uhyve-aarch64.c) target_compile_options(proxy PUBLIC -pthread) target_compile_options(proxy PUBLIC -DMAX_ARGC_ENVC=${MAX_ARGC_ENVC}) target_link_libraries(proxy -pthread) diff --git a/tools/proxy.c b/tools/proxy.c index 2f053cfa9..0ff059596 100644 --- a/tools/proxy.c +++ b/tools/proxy.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -71,6 +72,8 @@ typedef enum { UHYVE } monitor_t; +bool verbose = false; + static monitor_t monitor = BAREMETAL; static int sobufsize = 131072; static unsigned int isle_nr = 0; @@ -303,6 +306,11 @@ static int qemu_init(char *path) "-no-acpi", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}; +#ifdef __aarch64__ + fprintf(stderr, "QEMU as hypervisor is currently not supported for aarch64\n"); + exit(1); +#endif + str = getenv("HERMIT_CPUS"); if (str) qemu_argv[5] = str; @@ -409,8 +417,7 @@ static int qemu_init(char *path) qemu_argv[i+1] = "dump"; } - str = getenv("HERMIT_VERBOSE"); - if (str && (strcmp(str, "0") != 0)) + if (verbose) { printf("qemu startup command: "); @@ -454,6 +461,11 @@ static int multi_init(char *path) char isle_path[MAX_PATH]; char* result; +#ifdef __aarch64__ + fprintf(stderr, "The multi-kernel version is currently not supported for aarch64\n"); + exit(1); +#endif + // set path to temporary file snprintf(isle_path, MAX_PATH, "/sys/hermit/isle%d/path", isle_nr); file = fopen(isle_path, "w"); @@ -514,11 +526,10 @@ static int multi_init(char *path) static void dump_log(void) { - char* str = getenv("HERMIT_VERBOSE"); FILE* file; char line[2048]; - if (!(str && (strcmp(str, "0") != 0))) + if (!verbose) return; if (monitor == BAREMETAL) @@ -881,151 +892,157 @@ int socket_loop(int argc, char **argv) struct sockaddr_in serv_name; #if 0 - // check if mmnif interface is available - if (!qemu) { - struct ifreq ethreq; + // check if mmnif interface is available + if (!qemu) { + struct ifreq ethreq; - memset(ðreq, 0, sizeof(ethreq)); - strncpy(ethreq.ifr_name, "mmnif", IFNAMSIZ); + memset(ðreq, 0, sizeof(ethreq)); + strncpy(ethreq.ifr_name, "mmnif", IFNAMSIZ); - while(1) { - /* this socket doesn't really matter, we just need a descriptor - * to perform the ioctl on */ - s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); - ioctl(s, SIOCGIFFLAGS, ðreq); - close(s); + while(1) { + /* this socket doesn't really matter, we just need a descriptor + * to perform the ioctl on */ + s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + ioctl(s, SIOCGIFFLAGS, ðreq); + close(s); - if (ethreq.ifr_flags & (IFF_UP|IFF_RUNNING)) - break; - } - sched_yield(); + if (ethreq.ifr_flags & (IFF_UP|IFF_RUNNING)) + break; } + sched_yield(); + } #endif - /* create a socket */ - s = socket(PF_INET, SOCK_STREAM, 0); - if (s < 0) - { - perror("Proxy: socket creation error"); - exit(1); + /* create a socket */ + s = socket(PF_INET, SOCK_STREAM, 0); + if (s < 0) + { + perror("Proxy: socket creation error"); + exit(1); + } + + setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *) &sobufsize, sizeof(sobufsize)); + setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *) &sobufsize, sizeof(sobufsize)); + i = 1; + setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (char *) &i, sizeof(i)); + i = 0; + setsockopt(s, SOL_SOCKET, SO_KEEPALIVE, (char *) &i, sizeof(i)); + + /* server address */ + memset((char *) &serv_name, 0x00, sizeof(serv_name)); + serv_name.sin_family = AF_INET; + if (monitor == QEMU) + serv_name.sin_addr = INADDR(127, 0, 0, 1); + else + serv_name.sin_addr = HERMIT_IP(isle_nr); + serv_name.sin_port = htons(port); + + i = 0; +retry: + ret = connect(s, (struct sockaddr*)&serv_name, sizeof(serv_name)); + if (ret < 0) + { + i++; + if (i <= 10) { + usleep(10000); + goto retry; } - - setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *) &sobufsize, sizeof(sobufsize)); - setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *) &sobufsize, sizeof(sobufsize)); - i = 1; - setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (char *) &i, sizeof(i)); - i = 0; - setsockopt(s, SOL_SOCKET, SO_KEEPALIVE, (char *) &i, sizeof(i)); - - /* server address */ - memset((char *) &serv_name, 0x00, sizeof(serv_name)); - serv_name.sin_family = AF_INET; - if (monitor == QEMU) - serv_name.sin_addr = INADDR(127, 0, 0, 1); - else - serv_name.sin_addr = HERMIT_IP(isle_nr); - serv_name.sin_port = htons(port); - - i = 0; - retry: - ret = connect(s, (struct sockaddr*)&serv_name, sizeof(serv_name)); - if (ret < 0) - { - i++; - if (i <= 10) { - usleep(10000); - goto retry; - } - perror("Proxy -- connection error"); - close(s); - exit(1); - } - - ret = write(s, &magic, sizeof(magic)); - if (ret < 0) - goto out; - - // forward program arguments to HermitCore - // argv[0] is path of this proxy so we strip it - - argv++; - argc--; - - ret = write(s, &argc, sizeof(argc)); - if (ret < 0) - goto out; - - for(i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "uhyve.h" +#include "proxy.h" + +#define GUEST_OFFSET 0x0 + +#define GIC_SPI_IRQ_BASE 32 +#define GICD_BASE (1ULL << 39) +#define GICC_BASE (GICD_BASE + GICD_SIZE) +#define GIC_SIZE (GICD_SIZE + GICC_SIZE) +#define GICD_SIZE 0x10000ULL +#define GICC_SIZE 0x20000ULL + +#define KVM_GAP_SIZE (GIC_SIZE) +#define KVM_GAP_START GICD_BASE + +#define PAGE_SIZE 0x1000 + +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#endif +#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 |\ + KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) + +static bool cap_irqfd = false; +static bool cap_read_only = false; +static int gic_fd = -1; + +extern size_t guest_size; +extern uint64_t elf_entry; +extern uint8_t* klog; +extern bool verbose; +extern uint8_t* guest_mem; +extern size_t guest_size; +extern int kvm, vmfd, netfd, efd; +extern uint8_t* mboot; +extern __thread struct kvm_run *run; +extern __thread int vcpufd; +extern __thread uint32_t cpuid; + +void print_registers(void) +{ + struct kvm_one_reg reg; + uint64_t data; + + fprintf(stderr, "\n Dump state of CPU %d\n\n", cpuid); + fprintf(stderr, " Registers\n"); + fprintf(stderr, " =========\n"); + + reg.addr = (uint64_t)&data; + reg.id = ARM64_CORE_REG(regs.pc); + kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®); + fprintf(stderr, " PC: 0x%016lx\n", data); + + reg.id = ARM64_CORE_REG(regs.pstate); + kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®); + fprintf(stderr, " PSTATE: 0x%016lx\n", data); + + reg.id = ARM64_CORE_REG(sp_el1); + kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®); + fprintf(stderr, " SP_EL1: 0x%016lx\n", data); + + reg.id = ARM64_CORE_REG(regs.regs[30]); + kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®); + fprintf(stderr, " LR: 0x%016lx\n", data); + + for(int i=0; i<=29; i+=2) + { + reg.id = ARM64_CORE_REG(regs.regs[i]); + kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®); + fprintf(stderr, " X%d:\t 0x%016lx\t", i, data); + + reg.id = ARM64_CORE_REG(regs.regs[i+1]); + kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®); + fprintf(stderr, " X%d:\t0x%016lx\n", i+1, data); + } +} + +void timer_handler(int signum) +{ + err(1, "Checkpointing is currently not supported!"); +} + +void restore_cpu_state(void) +{ + err(1, "Checkpointing is currently not supported!"); +} + +void save_cpu_state(void) +{ + err(1, "Checkpointing is currently not supported!"); +} + +int load_checkpoint(uint8_t* mem, char* path) +{ + err(1, "Checkpointing is currently not supported!"); +} + +void init_cpu_state(uint64_t elf_entry) +{ + struct kvm_vcpu_init vcpu_init = { + .features = 0, + }; + struct kvm_vcpu_init preferred_init; + + if (!ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, &preferred_init)) { + if ((preferred_init.target == KVM_ARM_TARGET_CORTEX_A57) || + (preferred_init.target == KVM_ARM_TARGET_CORTEX_A53)) { + vcpu_init.target = preferred_init.target; + } else { + vcpu_init.target = KVM_ARM_TARGET_GENERIC_V8; + } + } else { + vcpu_init.target = KVM_ARM_TARGET_GENERIC_V8; + } + + kvm_ioctl(vcpufd, KVM_ARM_VCPU_INIT, &vcpu_init); + + // be sure that the multiprocessor is runable + struct kvm_mp_state mp_state = { KVM_MP_STATE_RUNNABLE }; + kvm_ioctl(vcpufd, KVM_SET_MP_STATE, &mp_state); + + struct kvm_one_reg reg; + uint64_t data; + + /* pstate = all interrupts masked */ + data = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1h; + reg.id = ARM64_CORE_REG(regs.pstate); + reg.addr = (uint64_t)&data; + kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®); + +#if 0 + /* x0...x3 = 0 */ + data = 0; + reg.id = ARM64_CORE_REG(regs.regs[0]); + kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®); + + reg.id = ARM64_CORE_REG(regs.regs[1]); + kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®); + + reg.id = ARM64_CORE_REG(regs.regs[2]); + kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®); + + reg.id = ARM64_CORE_REG(regs.regs[3]); + kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®); +#endif + + /* set start address */ + data = elf_entry; + reg.id = ARM64_CORE_REG(regs.pc); + kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®); + + if (gic_fd > 0) { + int lines = 1; + uint32_t nr_irqs = lines * 32 + GIC_SPI_IRQ_BASE; + struct kvm_device_attr nr_irqs_attr = { + .group = KVM_DEV_ARM_VGIC_GRP_NR_IRQS, + .addr = (uint64_t)&nr_irqs, + }; + struct kvm_device_attr vgic_init_attr = { + .group = KVM_DEV_ARM_VGIC_GRP_CTRL, + .attr = KVM_DEV_ARM_VGIC_CTRL_INIT, + }; + + kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &nr_irqs_attr); + kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &vgic_init_attr); + } + + // only one core is able to enter startup code + // => the wait for the predecessor core + while (*((volatile uint32_t*) (mboot + 0x120)) < cpuid) + pthread_yield(); + *((volatile uint32_t*) (mboot + 0x130)) = cpuid; +} + +void init_kvm_arch(void) +{ + guest_mem = mmap(NULL, guest_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (guest_mem == MAP_FAILED) + err(1, "mmap failed"); + + const char* merge = getenv("HERMIT_MERGEABLE"); + if (merge && (strcmp(merge, "0") != 0)) { + /* + * The KSM feature is intended for applications that generate + * many instances of the same data (e.g., virtualization systems + * such as KVM). It can consume a lot of processing power! + */ + madvise(guest_mem, guest_size, MADV_MERGEABLE); + if (verbose) + fprintf(stderr, "VM uses KSN feature \"mergeable\" to reduce the memory footprint.\n"); + } + + const char* hugepage = getenv("HERMIT_HUGEPAGE"); + if (merge && (strcmp(merge, "0") != 0)) { + madvise(guest_mem, guest_size, MADV_HUGEPAGE); + if (verbose) + fprintf(stderr, "VM uses huge pages to improve the performance.\n"); + } + + cap_read_only = kvm_ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_READONLY_MEM) <= 0 ? false : true; + if (!cap_read_only) + err(1, "the support of KVM_CAP_READONLY_MEM is curently required"); + + struct kvm_userspace_memory_region kvm_region = { + .slot = 0, + .guest_phys_addr = 0, + .memory_size = PAGE_SIZE, + .userspace_addr = (uint64_t) guest_mem, + .flags = KVM_MEM_READONLY, + }; + kvm_ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region); + + kvm_region = (struct kvm_userspace_memory_region) { + .slot = 1, + .guest_phys_addr = PAGE_SIZE, + .memory_size = guest_size - PAGE_SIZE, + .userspace_addr = (uint64_t) guest_mem + PAGE_SIZE, + #ifdef USE_DIRTY_LOG + .flags = KVM_MEM_LOG_DIRTY_PAGES, + #else + .flags = 0, + #endif + }; + kvm_ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region); + +#if 0 + /* Create interrupt controller GICv2 */ + uint64_t cpu_if_addr = GICC_BASE; + uint64_t dist_addr = GICD_BASE; + struct kvm_device_attr cpu_if_attr = { + .group = KVM_DEV_ARM_VGIC_GRP_ADDR, + .attr = KVM_VGIC_V2_ADDR_TYPE_CPU, + .addr = (uint64_t)&cpu_if_addr, + }; + struct kvm_create_device gic_device = { + .flags = 0, + .type = KVM_DEV_TYPE_ARM_VGIC_V2, + }; + struct kvm_device_attr dist_attr = { + .group = KVM_DEV_ARM_VGIC_GRP_ADDR, + .attr = KVM_VGIC_V2_ADDR_TYPE_DIST, + .addr = (uint64_t)&dist_addr, + }; + kvm_ioctl(vmfd, KVM_CREATE_DEVICE, &gic_device); + + gic_fd = gic_device.fd; + kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &cpu_if_attr); + kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &dist_attr); +#else + /* Create interrupt controller GICv2 */ + struct kvm_arm_device_addr gic_addr[] = { + [0] = { + .id = KVM_VGIC_V2_ADDR_TYPE_DIST | + (KVM_ARM_DEVICE_VGIC_V2 << KVM_ARM_DEVICE_ID_SHIFT), + .addr = GICD_BASE, + }, + [1] = { + .id = KVM_VGIC_V2_ADDR_TYPE_CPU | + (KVM_ARM_DEVICE_VGIC_V2 << KVM_ARM_DEVICE_ID_SHIFT), + .addr = GICC_BASE, + } + }; + + kvm_ioctl(vmfd, KVM_CREATE_IRQCHIP, NULL); + kvm_ioctl(vmfd, KVM_ARM_SET_DEVICE_ADDR, &gic_addr[0]); + kvm_ioctl(vmfd, KVM_ARM_SET_DEVICE_ADDR, &gic_addr[1]); +#endif + + //fprintf(stderr, "Create gicd at 0x%llx\n", GICD_BASE); + //fprintf(stderr, "Create gicc at 0x%llx\n", GICC_BASE); + + cap_irqfd = ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_IRQFD) <= 0 ? false : true; + if (!cap_irqfd) + err(1, "the support of KVM_CAP_IRQFD is curently required"); +} + +int load_kernel(uint8_t* mem, char* path) +{ + Elf64_Ehdr hdr; + Elf64_Phdr *phdr = NULL; + size_t buflen; + int fd, ret; + int first_load = 1; + + fd = open(path, O_RDONLY); + if (fd == -1) + { + perror("Unable to open file"); + return -1; + } + + ret = pread_in_full(fd, &hdr, sizeof(hdr), 0); + if (ret < 0) + goto out; + + // check if the program is a HermitCore file + if (hdr.e_ident[EI_MAG0] != ELFMAG0 + || hdr.e_ident[EI_MAG1] != ELFMAG1 + || hdr.e_ident[EI_MAG2] != ELFMAG2 + || hdr.e_ident[EI_MAG3] != ELFMAG3 + || hdr.e_ident[EI_CLASS] != ELFCLASS64 + || hdr.e_ident[EI_OSABI] != HERMIT_ELFOSABI + || hdr.e_type != ET_EXEC || hdr.e_machine != EM_AARCH64) { + fprintf(stderr, "Invalid HermitCore file!\n"); + ret = -1; + goto out; + } + + elf_entry = hdr.e_entry; + + buflen = hdr.e_phentsize * hdr.e_phnum; + phdr = malloc(buflen); + if (!phdr) { + fprintf(stderr, "Not enough memory\n"); + ret = -1; + goto out; + } + + ret = pread_in_full(fd, phdr, buflen, hdr.e_phoff); + if (ret < 0) + goto out; + + /* + * Load all segments with type "LOAD" from the file at offset + * p_offset, and copy that into in memory. + */ + for (Elf64_Half ph_i = 0; ph_i < hdr.e_phnum; ph_i++) + { + uint64_t paddr = phdr[ph_i].p_paddr; + size_t offset = phdr[ph_i].p_offset; + size_t filesz = phdr[ph_i].p_filesz; + size_t memsz = phdr[ph_i].p_memsz; + + if (phdr[ph_i].p_type != PT_LOAD) + continue; + + //fprintf(stderr, "Kernel location 0x%zx, file size 0x%zx, memory size 0x%zx\n", paddr, filesz, memsz); + + ret = pread_in_full(fd, mem+paddr-GUEST_OFFSET, filesz, offset); + if (ret < 0) + goto out; + if (!klog) + klog = mem+paddr+0x1000-GUEST_OFFSET; + if (!mboot) + mboot = mem+paddr-GUEST_OFFSET; + //fprintf(stderr, "mboot at %p, klog at %p\n", mboot, klog); + + if (first_load) { + first_load = 0; + + // initialize kernel + *((uint64_t*) (mem+paddr-GUEST_OFFSET + 0x100)) = paddr; // physical start address + *((uint64_t*) (mem+paddr-GUEST_OFFSET + 0x108)) = guest_size - PAGE_SIZE; // physical limit + *((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x110)) = get_cpufreq(); + *((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x128)) = 1; // number of used cpus + *((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x130)) = 0; // cpuid + *((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x148)) = 1; // announce uhyve + + + char* str = getenv("HERMIT_IP"); + if (str) { + uint32_t ip[4]; + + sscanf(str, "%u.%u.%u.%u", ip+0, ip+1, ip+2, ip+3); + *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB0)) = (uint8_t) ip[0]; + *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB1)) = (uint8_t) ip[1]; + *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB2)) = (uint8_t) ip[2]; + *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB3)) = (uint8_t) ip[3]; + } + + str = getenv("HERMIT_GATEWAY"); + if (str) { + uint32_t ip[4]; + + sscanf(str, "%u.%u.%u.%u", ip+0, ip+1, ip+2, ip+3); + *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB4)) = (uint8_t) ip[0]; + *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB5)) = (uint8_t) ip[1]; + *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB6)) = (uint8_t) ip[2]; + *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB7)) = (uint8_t) ip[3]; + } + str = getenv("HERMIT_MASK"); + if (str) { + uint32_t ip[4]; + + sscanf(str, "%u.%u.%u.%u", ip+0, ip+1, ip+2, ip+3); + *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB8)) = (uint8_t) ip[0]; + *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB9)) = (uint8_t) ip[1]; + *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xBA)) = (uint8_t) ip[2]; + *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xBB)) = (uint8_t) ip[3]; + } + + *((uint64_t*) (mem+paddr-GUEST_OFFSET + 0xbc)) = (uint64_t) guest_mem; + } + *((uint64_t*) (mem+paddr-GUEST_OFFSET + 0x158)) += memsz; // total kernel size + } + + ret = 0; + +out: + if (phdr) + free(phdr); + + close(fd); + + return ret; +} +#endif diff --git a/tools/uhyve-net.h b/tools/uhyve-net.h index bd3bb054f..540616cf7 100755 --- a/tools/uhyve-net.h +++ b/tools/uhyve-net.h @@ -21,10 +21,7 @@ #include #include -#include "uhyve-cpu.h" - -static char *netif; -static int netfd; +extern int netfd; // UHYVE_PORT_NETINFO typedef struct { diff --git a/tools/uhyve-syscalls.h b/tools/uhyve-syscalls.h index 7b7998e03..7b83cdc01 100644 --- a/tools/uhyve-syscalls.h +++ b/tools/uhyve-syscalls.h @@ -21,15 +21,6 @@ #include #include -typedef enum { - UHYVE_PORT_WRITE = 0x499, - UHYVE_PORT_OPEN = 0x500, - UHYVE_PORT_CLOSE = 0x501, - UHYVE_PORT_READ = 0x502, - UHYVE_PORT_EXIT = 0x503, - UHYVE_PORT_LSEEK = 0x504 -} uhyve_syscall_t; - typedef struct { int fd; const char* buf; diff --git a/tools/uhyve-x86_64.c b/tools/uhyve-x86_64.c new file mode 100644 index 000000000..9983966a6 --- /dev/null +++ b/tools/uhyve-x86_64.c @@ -0,0 +1,1060 @@ +/* + * Copyright (c) 2018, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef __x86_64__ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "uhyve.h" +#include "uhyve-x86_64.h" +#include "uhyve-syscalls.h" +#include "uhyve-net.h" +#include "proxy.h" + +// define this macro to create checkpoints with KVM's dirty log +//#define USE_DIRTY_LOG + +#define MAX_FNAME 256 +#define MAX_MSR_ENTRIES 25 + +#define GUEST_OFFSET 0x0 +#define CPUID_FUNC_PERFMON 0x0A +#define GUEST_PAGE_SIZE 0x200000 /* 2 MB pages in guest */ + +#define KVM_32BIT_MAX_MEM_SIZE (1ULL << 32) +#define KVM_32BIT_GAP_SIZE (768 << 20) +#define KVM_32BIT_GAP_START (KVM_32BIT_MAX_MEM_SIZE - KVM_32BIT_GAP_SIZE) + +#define BOOT_GDT 0x1000 +#define BOOT_INFO 0x2000 +#define BOOT_PML4 0x10000 +#define BOOT_PDPTE 0x11000 +#define BOOT_PDE 0x12000 + +#define BOOT_GDT_NULL 0 +#define BOOT_GDT_CODE 1 +#define BOOT_GDT_DATA 2 +#define BOOT_GDT_MAX 3 + +#define KVM_32BIT_MAX_MEM_SIZE (1ULL << 32) +#define KVM_32BIT_GAP_SIZE (768 << 20) +#define KVM_32BIT_GAP_START (KVM_32BIT_MAX_MEM_SIZE - KVM_32BIT_GAP_SIZE) + +/// Page offset bits +#define PAGE_BITS 12 +#define PAGE_2M_BITS 21 +#define PAGE_SIZE (1L << PAGE_BITS) +/// Mask the page address without page map flags and XD flag +#if 0 +#define PAGE_MASK ((~0L) << PAGE_BITS) +#define PAGE_2M_MASK (~0L) << PAGE_2M_BITS) +#else +#define PAGE_MASK (((~0UL) << PAGE_BITS) & ~PG_XD) +#define PAGE_2M_MASK (((~0UL) << PAGE_2M_BITS) & ~PG_XD) +#endif + +// Page is present +#define PG_PRESENT (1 << 0) +// Page is read- and writable +#define PG_RW (1 << 1) +// Page is addressable from userspace +#define PG_USER (1 << 2) +// Page write through is activated +#define PG_PWT (1 << 3) +// Page cache is disabled +#define PG_PCD (1 << 4) +// Page was recently accessed (set by CPU) +#define PG_ACCESSED (1 << 5) +// Page is dirty due to recent write-access (set by CPU) +#define PG_DIRTY (1 << 6) +// Huge page: 4MB (or 2MB, 1GB) +#define PG_PSE (1 << 7) +// Page attribute table +#define PG_PAT PG_PSE +#if 1 +/* @brief Global TLB entry (Pentium Pro and later) + * + * HermitCore is a single-address space operating system + * => CR3 never changed => The flag isn't required for HermitCore + */ +#define PG_GLOBAL 0 +#else +#define PG_GLOBAL (1 << 8) +#endif +// This table is a self-reference and should skipped by page_map_copy() +#define PG_SELF (1 << 9) + +/// Disable execution for this page +#define PG_XD (1L << 63) + +#define BITS 64 +#define PHYS_BITS 52 +#define VIRT_BITS 48 +#define PAGE_MAP_BITS 9 +#define PAGE_LEVELS 4 + +#define IOAPIC_DEFAULT_BASE 0xfec00000 +#define APIC_DEFAULT_BASE 0xfee00000 + +static bool cap_tsc_deadline = false; +static bool cap_irqchip = false; +static bool cap_adjust_clock_stable = false; +static bool cap_irqfd = false; +static bool cap_vapic = false; + +extern size_t guest_size; +extern pthread_barrier_t barrier; +extern pthread_t* vcpu_threads; +extern uint64_t elf_entry; +extern uint8_t* klog; +extern bool verbose; +extern bool full_checkpoint; +extern uint32_t no_checkpoint; +extern uint32_t ncores; +extern uint8_t* guest_mem; +extern size_t guest_size; +extern int kvm, vmfd, netfd, efd; +extern uint8_t* mboot; +extern __thread struct kvm_run *run; +extern __thread int vcpufd; +extern __thread uint32_t cpuid; + +static inline void show_dtable(const char *name, struct kvm_dtable *dtable) +{ + fprintf(stderr, " %s %016zx %08hx\n", name, (size_t) dtable->base, (uint16_t) dtable->limit); +} + +static inline void show_segment(const char *name, struct kvm_segment *seg) +{ + fprintf(stderr, " %s %04hx %016zx %08x %02hhx %x %x %x %x %x %x %x\n", + name, (uint16_t) seg->selector, (size_t) seg->base, (uint32_t) seg->limit, + (uint8_t) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); +} + +static void show_registers(int id, struct kvm_regs* regs, struct kvm_sregs* sregs) +{ + size_t cr0, cr2, cr3; + size_t cr4, cr8; + size_t rax, rbx, rcx; + size_t rdx, rsi, rdi; + size_t rbp, r8, r9; + size_t r10, r11, r12; + size_t r13, r14, r15; + size_t rip, rsp; + size_t rflags; + int i; + + rflags = regs->rflags; + rip = regs->rip; rsp = regs->rsp; + rax = regs->rax; rbx = regs->rbx; rcx = regs->rcx; + rdx = regs->rdx; rsi = regs->rsi; rdi = regs->rdi; + rbp = regs->rbp; r8 = regs->r8; r9 = regs->r9; + r10 = regs->r10; r11 = regs->r11; r12 = regs->r12; + r13 = regs->r13; r14 = regs->r14; r15 = regs->r15; + + fprintf(stderr, "\n Dump state of CPU %d\n", id); + fprintf(stderr, "\n Registers:\n"); + fprintf(stderr, " ----------\n"); + fprintf(stderr, " rip: %016zx rsp: %016zx flags: %016zx\n", rip, rsp, rflags); + fprintf(stderr, " rax: %016zx rbx: %016zx rcx: %016zx\n", rax, rbx, rcx); + fprintf(stderr, " rdx: %016zx rsi: %016zx rdi: %016zx\n", rdx, rsi, rdi); + fprintf(stderr, " rbp: %016zx r8: %016zx r9: %016zx\n", rbp, r8, r9); + fprintf(stderr, " r10: %016zx r11: %016zx r12: %016zx\n", r10, r11, r12); + fprintf(stderr, " r13: %016zx r14: %016zx r15: %016zx\n", r13, r14, r15); + + cr0 = sregs->cr0; cr2 = sregs->cr2; cr3 = sregs->cr3; + cr4 = sregs->cr4; cr8 = sregs->cr8; + + fprintf(stderr, " cr0: %016zx cr2: %016zx cr3: %016zx\n", cr0, cr2, cr3); + fprintf(stderr, " cr4: %016zx cr8: %016zx\n", cr4, cr8); + fprintf(stderr, "\n Segment registers:\n"); + fprintf(stderr, " ------------------\n"); + fprintf(stderr, " register selector base limit type p dpl db s l g avl\n"); + show_segment("cs ", &sregs->cs); + show_segment("ss ", &sregs->ss); + show_segment("ds ", &sregs->ds); + show_segment("es ", &sregs->es); + show_segment("fs ", &sregs->fs); + show_segment("gs ", &sregs->gs); + show_segment("tr ", &sregs->tr); + show_segment("ldt", &sregs->ldt); + show_dtable("gdt", &sregs->gdt); + show_dtable("idt", &sregs->idt); + + fprintf(stderr, "\n APIC:\n"); + fprintf(stderr, " -----\n"); + fprintf(stderr, " efer: %016zx apic base: %016zx\n", + (size_t) sregs->efer, (size_t) sregs->apic_base); + + fprintf(stderr, "\n Interrupt bitmap:\n"); + fprintf(stderr, " -----------------\n"); + for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) + fprintf(stderr, " %016zx", (size_t) sregs->interrupt_bitmap[i]); + fprintf(stderr, "\n"); +} + +void print_registers(void) +{ + struct kvm_regs regs; + struct kvm_sregs sregs; + + kvm_ioctl(vcpufd, KVM_GET_SREGS, &sregs); + kvm_ioctl(vcpufd, KVM_GET_REGS, ®s); + + show_registers(cpuid, ®s, &sregs); +} + +/// Filter CPUID functions that are not supported by the hypervisor and enable +/// features according to our needs. +static void filter_cpuid(struct kvm_cpuid2 *kvm_cpuid) +{ + for (uint32_t i = 0; i < kvm_cpuid->nent; i++) { + struct kvm_cpuid_entry2 *entry = &kvm_cpuid->entries[i]; + + switch (entry->function) { + case 1: + // CPUID to define basic cpu features + entry->ecx |= (1U << 31); // propagate that we are running on a hypervisor + if (cap_tsc_deadline) + entry->ecx |= (1U << 24); // enable TSC deadline feature + entry->edx |= (1U << 5); // enable msr support + break; + + case CPUID_FUNC_PERFMON: + // disable it + entry->eax = 0x00; + break; + + default: + // Keep the CPUID function as-is + break; + }; + } +} + +static void setup_system_64bit(struct kvm_sregs *sregs) +{ + sregs->cr0 |= X86_CR0_PE; + sregs->efer |= EFER_LME; +} + +static void setup_system_page_tables(struct kvm_sregs *sregs, uint8_t *mem) +{ + uint64_t *pml4 = (uint64_t *) (mem + BOOT_PML4); + uint64_t *pdpte = (uint64_t *) (mem + BOOT_PDPTE); + uint64_t *pde = (uint64_t *) (mem + BOOT_PDE); + uint64_t paddr; + + /* + * For simplicity we currently use 2MB pages and only a single + * PML4/PDPTE/PDE. + */ + + memset(pml4, 0x00, 4096); + memset(pdpte, 0x00, 4096); + memset(pde, 0x00, 4096); + + *pml4 = BOOT_PDPTE | (X86_PDPT_P | X86_PDPT_RW); + *pdpte = BOOT_PDE | (X86_PDPT_P | X86_PDPT_RW); + for (paddr = 0; paddr < 0x20000000ULL; paddr += GUEST_PAGE_SIZE, pde++) + *pde = paddr | (X86_PDPT_P | X86_PDPT_RW | X86_PDPT_PS); + + sregs->cr3 = BOOT_PML4; + sregs->cr4 |= X86_CR4_PAE; + sregs->cr0 |= X86_CR0_PG; +} + +static void setup_system_gdt(struct kvm_sregs *sregs, + uint8_t *mem, + uint64_t off) +{ + uint64_t *gdt = (uint64_t *) (mem + off); + struct kvm_segment data_seg, code_seg; + + /* flags, base, limit */ + gdt[BOOT_GDT_NULL] = GDT_ENTRY(0, 0, 0); + gdt[BOOT_GDT_CODE] = GDT_ENTRY(0xA09B, 0, 0xFFFFF); + gdt[BOOT_GDT_DATA] = GDT_ENTRY(0xC093, 0, 0xFFFFF); + + sregs->gdt.base = off; + sregs->gdt.limit = (sizeof(uint64_t) * BOOT_GDT_MAX) - 1; + + GDT_TO_KVM_SEGMENT(code_seg, gdt, BOOT_GDT_CODE); + GDT_TO_KVM_SEGMENT(data_seg, gdt, BOOT_GDT_DATA); + + sregs->cs = code_seg; + sregs->ds = data_seg; + sregs->es = data_seg; + sregs->fs = data_seg; + sregs->gs = data_seg; + sregs->ss = data_seg; +} + +static void setup_system(int vcpufd, uint8_t *mem, uint32_t id) +{ + static struct kvm_sregs sregs; + + // all cores use the same startup code + // => all cores use the same sregs + // => only the boot processor has to initialize sregs + if (id == 0) { + kvm_ioctl(vcpufd, KVM_GET_SREGS, &sregs); + + /* Set all cpu/mem system structures */ + setup_system_gdt(&sregs, mem, BOOT_GDT); + setup_system_page_tables(&sregs, mem); + setup_system_64bit(&sregs); + } + + kvm_ioctl(vcpufd, KVM_SET_SREGS, &sregs); +} + +static void setup_cpuid(int kvm, int vcpufd) +{ + struct kvm_cpuid2 *kvm_cpuid; + unsigned int max_entries = 100; + + // allocate space for cpuid we get from KVM + kvm_cpuid = calloc(1, sizeof(*kvm_cpuid) + (max_entries * sizeof(kvm_cpuid->entries[0]))); + kvm_cpuid->nent = max_entries; + + kvm_ioctl(kvm, KVM_GET_SUPPORTED_CPUID, kvm_cpuid); + + // set features + filter_cpuid(kvm_cpuid); + kvm_ioctl(vcpufd, KVM_SET_CPUID2, kvm_cpuid); + + free(kvm_cpuid); +} + +void init_cpu_state(uint64_t elf_entry) +{ + struct kvm_regs regs = { + .rip = elf_entry, // entry point to HermitCore + .rflags = 0x2, // POR value required by x86 architecture + }; + struct kvm_mp_state mp_state = { KVM_MP_STATE_RUNNABLE }; + struct { + struct kvm_msrs info; + struct kvm_msr_entry entries[MAX_MSR_ENTRIES]; + } msr_data; + struct kvm_msr_entry *msrs = msr_data.entries; + + run->apic_base = APIC_DEFAULT_BASE; + setup_cpuid(kvm, vcpufd); + + // be sure that the multiprocessor is runable + kvm_ioctl(vcpufd, KVM_SET_MP_STATE, &mp_state); + + // enable fast string operations + msrs[0].index = MSR_IA32_MISC_ENABLE; + msrs[0].data = 1; + msr_data.info.nmsrs = 1; + kvm_ioctl(vcpufd, KVM_SET_MSRS, &msr_data); + + /* Setup registers and memory. */ + setup_system(vcpufd, guest_mem, cpuid); + kvm_ioctl(vcpufd, KVM_SET_REGS, ®s); + + // only one core is able to enter startup code + // => the wait for the predecessor core + while (*((volatile uint32_t*) (mboot + 0x20)) < cpuid) + pthread_yield(); + *((volatile uint32_t*) (mboot + 0x30)) = cpuid; +} + +void restore_cpu_state(void) +{ + struct kvm_regs regs; + struct kvm_mp_state mp_state = { KVM_MP_STATE_RUNNABLE }; + char fname[MAX_FNAME]; + struct kvm_sregs sregs; + struct kvm_fpu fpu; + struct { + struct kvm_msrs info; + struct kvm_msr_entry entries[MAX_MSR_ENTRIES]; + } msr_data; + struct kvm_lapic_state lapic; + struct kvm_xsave xsave; + struct kvm_xcrs xcrs; + struct kvm_vcpu_events events; + + run->apic_base = APIC_DEFAULT_BASE; + setup_cpuid(kvm, vcpufd); + + snprintf(fname, MAX_FNAME, "checkpoint/chk%u_core%u.dat", no_checkpoint, cpuid); + + FILE* f = fopen(fname, "r"); + if (f == NULL) + err(1, "fopen: unable to open file"); + + if (fread(&sregs, sizeof(sregs), 1, f) != 1) + err(1, "fread failed\n"); + if (fread(®s, sizeof(regs), 1, f) != 1) + err(1, "fread failed\n"); + if (fread(&fpu, sizeof(fpu), 1, f) != 1) + err(1, "fread failed\n"); + if (fread(&msr_data, sizeof(msr_data), 1, f) != 1) + err(1, "fread failed\n"); + if (fread(&lapic, sizeof(lapic), 1, f) != 1) + err(1, "fread failed\n"); + if (fread(&xsave, sizeof(xsave), 1, f) != 1) + err(1, "fread failed\n"); + if (fread(&xcrs, sizeof(xcrs), 1, f) != 1) + err(1, "fread failed\n"); + if (fread(&events, sizeof(events), 1, f) != 1) + err(1, "fread failed\n"); + if (fread(&mp_state, sizeof(mp_state), 1, f) != 1) + err(1, "fread failed\n"); + + fclose(f); + + kvm_ioctl(vcpufd, KVM_SET_SREGS, &sregs); + kvm_ioctl(vcpufd, KVM_SET_REGS, ®s); + kvm_ioctl(vcpufd, KVM_SET_MSRS, &msr_data); + kvm_ioctl(vcpufd, KVM_SET_XCRS, &xcrs); + kvm_ioctl(vcpufd, KVM_SET_MP_STATE, &mp_state); + kvm_ioctl(vcpufd, KVM_SET_LAPIC, &lapic); + kvm_ioctl(vcpufd, KVM_SET_FPU, &fpu); + kvm_ioctl(vcpufd, KVM_SET_XSAVE, &xsave); + kvm_ioctl(vcpufd, KVM_SET_VCPU_EVENTS, &events); + +} + +void save_cpu_state(void) +{ + struct { + struct kvm_msrs info; + struct kvm_msr_entry entries[MAX_MSR_ENTRIES]; + } msr_data; + struct kvm_msr_entry *msrs = msr_data.entries; + struct kvm_regs regs; + struct kvm_sregs sregs; + struct kvm_fpu fpu; + struct kvm_lapic_state lapic; + struct kvm_xsave xsave; + struct kvm_xcrs xcrs; + struct kvm_vcpu_events events; + struct kvm_mp_state mp_state; + char fname[MAX_FNAME]; + int n = 0; + + /* define the list of required MSRs */ + msrs[n++].index = MSR_IA32_APICBASE; + msrs[n++].index = MSR_IA32_SYSENTER_CS; + msrs[n++].index = MSR_IA32_SYSENTER_ESP; + msrs[n++].index = MSR_IA32_SYSENTER_EIP; + msrs[n++].index = MSR_IA32_CR_PAT; + msrs[n++].index = MSR_IA32_MISC_ENABLE; + msrs[n++].index = MSR_IA32_TSC; + msrs[n++].index = MSR_CSTAR; + msrs[n++].index = MSR_STAR; + msrs[n++].index = MSR_EFER; + msrs[n++].index = MSR_LSTAR; + msrs[n++].index = MSR_GS_BASE; + msrs[n++].index = MSR_FS_BASE; + msrs[n++].index = MSR_KERNEL_GS_BASE; + //msrs[n++].index = MSR_IA32_FEATURE_CONTROL; + msr_data.info.nmsrs = n; + + kvm_ioctl(vcpufd, KVM_GET_SREGS, &sregs); + kvm_ioctl(vcpufd, KVM_GET_REGS, ®s); + kvm_ioctl(vcpufd, KVM_GET_MSRS, &msr_data); + kvm_ioctl(vcpufd, KVM_GET_XCRS, &xcrs); + kvm_ioctl(vcpufd, KVM_GET_LAPIC, &lapic); + kvm_ioctl(vcpufd, KVM_GET_FPU, &fpu); + kvm_ioctl(vcpufd, KVM_GET_XSAVE, &xsave); + kvm_ioctl(vcpufd, KVM_GET_VCPU_EVENTS, &events); + kvm_ioctl(vcpufd, KVM_GET_MP_STATE, &mp_state); + + snprintf(fname, MAX_FNAME, "checkpoint/chk%u_core%u.dat", no_checkpoint, cpuid); + + FILE* f = fopen(fname, "w"); + if (f == NULL) { + err(1, "fopen: unable to open file\n"); + } + + if (fwrite(&sregs, sizeof(sregs), 1, f) != 1) + err(1, "fwrite failed\n"); + if (fwrite(®s, sizeof(regs), 1, f) != 1) + err(1, "fwrite failed\n"); + if (fwrite(&fpu, sizeof(fpu), 1, f) != 1) + err(1, "fwrite failed\n"); + if (fwrite(&msr_data, sizeof(msr_data), 1, f) != 1) + err(1, "fwrite failed\n"); + if (fwrite(&lapic, sizeof(lapic), 1, f) != 1) + err(1, "fwrite failed\n"); + if (fwrite(&xsave, sizeof(xsave), 1, f) != 1) + err(1, "fwrite failed\n"); + if (fwrite(&xcrs, sizeof(xcrs), 1, f) != 1) + err(1, "fwrite failed\n"); + if (fwrite(&events, sizeof(events), 1, f) != 1) + err(1, "fwrite failed\n"); + if (fwrite(&mp_state, sizeof(mp_state), 1, f) != 1) + err(1, "fwrite failed\n"); + + fclose(f); +} + +void timer_handler(int signum) +{ + struct stat st = {0}; + const size_t flag = (!full_checkpoint && (no_checkpoint > 0)) ? PG_DIRTY : PG_ACCESSED; + char fname[MAX_FNAME]; + struct timeval begin, end; + + if (verbose) + gettimeofday(&begin, NULL); + + if (stat("checkpoint", &st) == -1) + mkdir("checkpoint", 0700); + + for(size_t i = 0; i < ncores; i++) + if (vcpu_threads[i] != pthread_self()) + pthread_kill(vcpu_threads[i], SIGRTMIN); + + pthread_barrier_wait(&barrier); + + save_cpu_state(); + + snprintf(fname, MAX_FNAME, "checkpoint/chk%u_mem.dat", no_checkpoint); + + FILE* f = fopen(fname, "w"); + if (f == NULL) { + err(1, "fopen: unable to open file"); + } + + /*struct kvm_irqchip irqchip = {}; + if (cap_irqchip) + kvm_ioctl(vmfd, KVM_GET_IRQCHIP, &irqchip); + else + memset(&irqchip, 0x00, sizeof(irqchip)); + if (fwrite(&irqchip, sizeof(irqchip), 1, f) != 1) + err(1, "fwrite failed");*/ + + struct kvm_clock_data clock = {}; + kvm_ioctl(vmfd, KVM_GET_CLOCK, &clock); + if (fwrite(&clock, sizeof(clock), 1, f) != 1) + err(1, "fwrite failed"); + +#if 0 + if (fwrite(guest_mem, guest_size, 1, f) != 1) + err(1, "fwrite failed"); +#elif defined(USE_DIRTY_LOG) + static struct kvm_dirty_log dlog = { + .slot = 0, + .dirty_bitmap = NULL + }; + size_t dirty_log_size = (guest_size >> PAGE_BITS) / sizeof(size_t); + + // do we create our first checkpoint + if (dlog.dirty_bitmap == NULL) + { + // besure that all paddings are zero + memset(&dlog, 0x00, sizeof(dlog)); + + dlog.dirty_bitmap = malloc(dirty_log_size * sizeof(size_t)); + if (dlog.dirty_bitmap == NULL) + err(1, "malloc failed!\n"); + } + memset(dlog.dirty_bitmap, 0x00, dirty_log_size * sizeof(size_t)); + + dlog.slot = 0; +nextslot: + kvm_ioctl(vmfd, KVM_GET_DIRTY_LOG, &dlog); + + for(size_t i=0; i KVM_32BIT_GAP_START - GUEST_OFFSET)) { + dlog.slot = 1; + memset(dlog.dirty_bitmap, 0x00, dirty_log_size * sizeof(size_t)); + goto nextslot; + } +#else + size_t* pml4 = (size_t*) (guest_mem+elf_entry+PAGE_SIZE); + for(size_t i=0; i<(1 << PAGE_MAP_BITS); i++) { + if ((pml4[i] & PG_PRESENT) != PG_PRESENT) + continue; + //printf("pml[%zd] 0x%zx\n", i, pml4[i]); + size_t* pdpt = (size_t*) (guest_mem+(pml4[i] & PAGE_MASK)); + for(size_t j=0; j<(1 << PAGE_MAP_BITS); j++) { + if ((pdpt[j] & PG_PRESENT) != PG_PRESENT) + continue; + //printf("\tpdpt[%zd] 0x%zx\n", j, pdpt[j]); + size_t* pgd = (size_t*) (guest_mem+(pdpt[j] & PAGE_MASK)); + for(size_t k=0; k<(1 << PAGE_MAP_BITS); k++) { + if ((pgd[k] & PG_PRESENT) != PG_PRESENT) + continue; + //printf("\t\tpgd[%zd] 0x%zx\n", k, pgd[k] & ~PG_XD); + if ((pgd[k] & PG_PSE) != PG_PSE) { + size_t* pgt = (size_t*) (guest_mem+(pgd[k] & PAGE_MASK)); + for(size_t l=0; l<(1 << PAGE_MAP_BITS); l++) { + if ((pgt[l] & (PG_PRESENT|flag)) == (PG_PRESENT|flag)) { + //printf("\t\t\t*pgt[%zd] 0x%zx, 4KB\n", l, pgt[l] & ~PG_XD); + if (!full_checkpoint) + pgt[l] = pgt[l] & ~(PG_DIRTY|PG_ACCESSED); + size_t pgt_entry = pgt[l] & ~PG_PSE; // because PAT use the same bit as PSE + if (fwrite(&pgt_entry, sizeof(size_t), 1, f) != 1) + err(1, "fwrite failed"); + if (fwrite((size_t*) (guest_mem + (pgt[l] & PAGE_MASK)), (1UL << PAGE_BITS), 1, f) != 1) + err(1, "fwrite failed"); + } + } + } else if ((pgd[k] & flag) == flag) { + //printf("\t\t*pgd[%zd] 0x%zx, 2MB\n", k, pgd[k] & ~PG_XD); + if (!full_checkpoint) + pgd[k] = pgd[k] & ~(PG_DIRTY|PG_ACCESSED); + if (fwrite(pgd+k, sizeof(size_t), 1, f) != 1) + err(1, "fwrite failed"); + if (fwrite((size_t*) (guest_mem + (pgd[k] & PAGE_2M_MASK)), (1UL << PAGE_2M_BITS), 1, f) != 1) + err(1, "fwrite failed"); + } + } + } + } +#endif + + fclose(f); + + pthread_barrier_wait(&barrier); + + // update configuration file + f = fopen("checkpoint/chk_config.txt", "w"); + if (f == NULL) { + err(1, "fopen: unable to open file"); + } + + fprintf(f, "number of cores: %u\n", ncores); + fprintf(f, "memory size: 0x%zx\n", guest_size); + fprintf(f, "checkpoint number: %u\n", no_checkpoint); + fprintf(f, "entry point: 0x%zx", elf_entry); + if (full_checkpoint) + fprintf(f, "full checkpoint: 1"); + else + fprintf(f, "full checkpoint: 0"); + + fclose(f); + + if (verbose) { + gettimeofday(&end, NULL); + size_t msec = (end.tv_sec - begin.tv_sec) * 1000; + msec += (end.tv_usec - begin.tv_usec) / 1000; + fprintf(stderr, "Create checkpoint %u in %zd ms\n", no_checkpoint, msec); + } + + no_checkpoint++; +} + +int load_checkpoint(uint8_t* mem, char* path) +{ + char fname[MAX_FNAME]; + size_t location; + size_t paddr = elf_entry; + int ret; + struct timeval begin, end; + uint32_t i; + + if (verbose) + gettimeofday(&begin, NULL); + + if (!klog) + klog = mem+paddr+0x5000-GUEST_OFFSET; + if (!mboot) + mboot = mem+paddr-GUEST_OFFSET; + + +#ifdef USE_DIRTY_LOG + /* + * if we use KVM's dirty page logging, we have to load + * the elf image because most parts are readonly sections + * and aren't able to detect by KVM's dirty page logging + * technique. + */ + ret = load_kernel(mem, path); + if (ret) + return ret; +#endif + + i = full_checkpoint ? no_checkpoint : 0; + for(; i<=no_checkpoint; i++) + { + snprintf(fname, MAX_FNAME, "checkpoint/chk%u_mem.dat", i); + + FILE* f = fopen(fname, "r"); + if (f == NULL) + return -1; + + /*struct kvm_irqchip irqchip; + if (fread(&irqchip, sizeof(irqchip), 1, f) != 1) + err(1, "fread failed"); + if (cap_irqchip && (i == no_checkpoint-1)) + kvm_ioctl(vmfd, KVM_SET_IRQCHIP, &irqchip);*/ + + struct kvm_clock_data clock; + if (fread(&clock, sizeof(clock), 1, f) != 1) + err(1, "fread failed"); + // only the last checkpoint has to set the clock + if (cap_adjust_clock_stable && (i == no_checkpoint)) { + struct kvm_clock_data data = {}; + + data.clock = clock.clock; + kvm_ioctl(vmfd, KVM_SET_CLOCK, &data); + } + +#if 0 + if (fread(guest_mem, guest_size, 1, f) != 1) + err(1, "fread failed"); +#else + + while (fread(&location, sizeof(location), 1, f) == 1) { + //printf("location 0x%zx\n", location); + if (location & PG_PSE) + ret = fread((size_t*) (mem + (location & PAGE_2M_MASK)), (1UL << PAGE_2M_BITS), 1, f); + else + ret = fread((size_t*) (mem + (location & PAGE_MASK)), (1UL << PAGE_BITS), 1, f); + + if (ret != 1) { + fprintf(stderr, "Unable to read checkpoint: ret = %d", ret); + err(1, "fread failed"); + } + } +#endif + + fclose(f); + } + + if (verbose) { + gettimeofday(&end, NULL); + size_t msec = (end.tv_sec - begin.tv_sec) * 1000; + msec += (end.tv_usec - begin.tv_usec) / 1000; + fprintf(stderr, "Load checkpoint %u in %zd ms\n", no_checkpoint, msec); + } + + return 0; +} + +void init_kvm_arch(void) +{ + uint64_t identity_base = 0xfffbc000; + if (ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU) > 0) { + /* Allows up to 16M BIOSes. */ + identity_base = 0xfeffc000; + + kvm_ioctl(vmfd, KVM_SET_IDENTITY_MAP_ADDR, &identity_base); + } + kvm_ioctl(vmfd, KVM_SET_TSS_ADDR, identity_base + 0x1000); + + /* + * Allocate page-aligned guest memory. + * + * TODO: support of huge pages + */ + if (guest_size < KVM_32BIT_GAP_START) { + guest_mem = mmap(NULL, guest_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (guest_mem == MAP_FAILED) + err(1, "mmap failed"); + } else { + guest_size += KVM_32BIT_GAP_SIZE; + guest_mem = mmap(NULL, guest_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (guest_mem == MAP_FAILED) + err(1, "mmap failed"); + + /* + * We mprotect the gap PROT_NONE so that if we accidently write to it, we will know. + */ + mprotect(guest_mem + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE); + } + + const char* merge = getenv("HERMIT_MERGEABLE"); + if (merge && (strcmp(merge, "0") != 0)) { + /* + * The KSM feature is intended for applications that generate + * many instances of the same data (e.g., virtualization systems + * such as KVM). It can consume a lot of processing power! + */ + madvise(guest_mem, guest_size, MADV_MERGEABLE); + if (verbose) + fprintf(stderr, "VM uses KSN feature \"mergeable\" to reduce the memory footprint.\n"); + } + + const char* hugepage = getenv("HERMIT_HUGEPAGE"); + if (merge && (strcmp(merge, "0") != 0)) { + madvise(guest_mem, guest_size, MADV_HUGEPAGE); + if (verbose) + fprintf(stderr, "VM uses huge pages to improve the performance.\n"); + } + + struct kvm_userspace_memory_region kvm_region = { + .slot = 0, + .guest_phys_addr = GUEST_OFFSET, + .memory_size = guest_size, + .userspace_addr = (uint64_t) guest_mem, +#ifdef USE_DIRTY_LOG + .flags = KVM_MEM_LOG_DIRTY_PAGES, +#else + .flags = 0, +#endif + }; + + if (guest_size <= KVM_32BIT_GAP_START - GUEST_OFFSET) { + kvm_ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region); + } else { + kvm_region.memory_size = KVM_32BIT_GAP_START - GUEST_OFFSET; + kvm_ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region); + + kvm_region.slot = 1; + kvm_region.guest_phys_addr = KVM_32BIT_GAP_START+KVM_32BIT_GAP_SIZE; + kvm_region.memory_size = guest_size - KVM_32BIT_GAP_SIZE - KVM_32BIT_GAP_START + GUEST_OFFSET; + kvm_ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region); + } + + kvm_ioctl(vmfd, KVM_CREATE_IRQCHIP, NULL); + +#ifdef KVM_CAP_X2APIC_API + // enable x2APIC support + struct kvm_enable_cap cap = { + .cap = KVM_CAP_X2APIC_API, + .flags = 0, + .args[0] = KVM_X2APIC_API_USE_32BIT_IDS|KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK, + }; + kvm_ioctl(vmfd, KVM_ENABLE_CAP, &cap); +#endif + + // initialited IOAPIC with HermitCore's default settings + struct kvm_irqchip chip; + chip.chip_id = KVM_IRQCHIP_IOAPIC; + kvm_ioctl(vmfd, KVM_GET_IRQCHIP, &chip); + for(int i=0; i #include @@ -61,135 +61,33 @@ #include #include #include -#include -#include -#include "uhyve-cpu.h" +#include "uhyve.h" #include "uhyve-syscalls.h" #include "uhyve-net.h" #include "proxy.h" -// define this macro to create checkpoints with KVM's dirty log -//#define USE_DIRTY_LOG - -#define MAX_FNAME 256 -#define MAX_MSR_ENTRIES 25 - -#define GUEST_OFFSET 0x0 -#define CPUID_FUNC_PERFMON 0x0A -#define GUEST_PAGE_SIZE 0x200000 /* 2 MB pages in guest */ - -#define BOOT_GDT 0x1000 -#define BOOT_INFO 0x2000 -#define BOOT_PML4 0x10000 -#define BOOT_PDPTE 0x11000 -#define BOOT_PDE 0x12000 - -#define BOOT_GDT_NULL 0 -#define BOOT_GDT_CODE 1 -#define BOOT_GDT_DATA 2 -#define BOOT_GDT_MAX 3 - -#define KVM_32BIT_MAX_MEM_SIZE (1ULL << 32) -#define KVM_32BIT_GAP_SIZE (768 << 20) -#define KVM_32BIT_GAP_START (KVM_32BIT_MAX_MEM_SIZE - KVM_32BIT_GAP_SIZE) - -/// Page offset bits -#define PAGE_BITS 12 -#define PAGE_2M_BITS 21 -#define PAGE_SIZE (1L << PAGE_BITS) -/// Mask the page address without page map flags and XD flag -#if 0 -#define PAGE_MASK ((~0L) << PAGE_BITS) -#define PAGE_2M_MASK (~0L) << PAGE_2M_BITS) -#else -#define PAGE_MASK (((~0UL) << PAGE_BITS) & ~PG_XD) -#define PAGE_2M_MASK (((~0UL) << PAGE_2M_BITS) & ~PG_XD) -#endif - -// Page is present -#define PG_PRESENT (1 << 0) -// Page is read- and writable -#define PG_RW (1 << 1) -// Page is addressable from userspace -#define PG_USER (1 << 2) -// Page write through is activated -#define PG_PWT (1 << 3) -// Page cache is disabled -#define PG_PCD (1 << 4) -// Page was recently accessed (set by CPU) -#define PG_ACCESSED (1 << 5) -// Page is dirty due to recent write-access (set by CPU) -#define PG_DIRTY (1 << 6) -// Huge page: 4MB (or 2MB, 1GB) -#define PG_PSE (1 << 7) -// Page attribute table -#define PG_PAT PG_PSE -#if 1 -/* @brief Global TLB entry (Pentium Pro and later) - * - * HermitCore is a single-address space operating system - * => CR3 never changed => The flag isn't required for HermitCore - */ -#define PG_GLOBAL 0 -#else -#define PG_GLOBAL (1 << 8) -#endif -// This table is a self-reference and should skipped by page_map_copy() -#define PG_SELF (1 << 9) - -/// Disable execution for this page -#define PG_XD (1L << 63) - -#define BITS 64 -#define PHYS_BITS 52 -#define VIRT_BITS 48 -#define PAGE_MAP_BITS 9 -#define PAGE_LEVELS 4 - -#define kvm_ioctl(fd, cmd, arg) ({ \ - const int ret = ioctl(fd, cmd, arg); \ - if(ret == -1) \ - err(1, "KVM: ioctl " #cmd " failed"); \ - ret; \ - }) - -// Networkports -#define UHYVE_PORT_NETINFO 0x505 -#define UHYVE_PORT_NETWRITE 0x506 -#define UHYVE_PORT_NETREAD 0x507 -#define UHYVE_PORT_NETSTAT 0x508 - -#define UHYVE_IRQ 11 - -#define IOAPIC_DEFAULT_BASE 0xfec00000 -#define APIC_DEFAULT_BASE 0xfee00000 - - static bool restart = false; -static bool cap_tsc_deadline = false; -static bool cap_irqchip = false; -static bool cap_adjust_clock_stable = false; -static bool cap_irqfd = false; -static bool cap_vapic = false; -static bool verbose = false; -static bool full_checkpoint = false; -static uint32_t ncores = 1; -static uint8_t* guest_mem = NULL; -static uint8_t* klog = NULL; -static uint8_t* mboot = NULL; -static size_t guest_size = 0x20000000ULL; -static uint64_t elf_entry; -static pthread_t* vcpu_threads = NULL; static pthread_t net_thread; static int* vcpu_fds = NULL; -static int kvm = -1, vmfd = -1, netfd = -1, efd = -1; -static uint32_t no_checkpoint = 0; static pthread_mutex_t kvm_lock = PTHREAD_MUTEX_INITIALIZER; -static pthread_barrier_t barrier; -static __thread struct kvm_run *run = NULL; -static __thread int vcpufd = -1; -static __thread uint32_t cpuid = 0; + +extern bool verbose; + +size_t guest_size = 0x20000000ULL; +bool full_checkpoint = false; +pthread_barrier_t barrier; +pthread_t* vcpu_threads = NULL; +uint8_t* klog = NULL; +uint8_t* guest_mem = NULL; +uint32_t no_checkpoint = 0; +uint32_t ncores = 1; +uint64_t elf_entry; +int kvm = -1, vmfd = -1, netfd = -1, efd = -1; +uint8_t* mboot = NULL; +__thread struct kvm_run *run = NULL; +__thread int vcpufd = -1; +__thread uint32_t cpuid = 0; static sem_t net_sem; int uhyve_argc = -1; @@ -198,11 +96,6 @@ char **uhyve_argv = NULL; extern char **environ; char **uhyve_envp = NULL; -/* Ports and data structures for uhyve command line arguments and envp - * forwarding */ -#define UHYVE_PORT_CMDSIZE 0x509 -#define UHYVE_PORT_CMDVAL 0x510 - typedef struct { int argc; int argsz[MAX_ARGC_ENVC]; @@ -252,7 +145,7 @@ static uint64_t memparse(const char *ptr) } // Just close file descriptor if not already done -static inline void close_fd(int* fd) +static void close_fd(int* fd) { if (*fd != -1) { close(*fd); @@ -262,6 +155,8 @@ static inline void close_fd(int* fd) static void uhyve_exit(void* arg) { + //print_registers(); + if (pthread_mutex_trylock(&kvm_lock)) { close_fd(&vcpufd); @@ -318,435 +213,12 @@ static void uhyve_atexit(void) close_fd(&kvm); } -static int load_kernel(uint8_t* mem, char* path) -{ - Elf64_Ehdr hdr; - Elf64_Phdr *phdr = NULL; - size_t buflen; - int fd, ret; - int first_load = 1; - - fd = open(path, O_RDONLY); - if (fd == -1) - { - perror("Unable to open file"); - return -1; - } - - ret = pread_in_full(fd, &hdr, sizeof(hdr), 0); - if (ret < 0) - goto out; - - // check if the program is a HermitCore file - if (hdr.e_ident[EI_MAG0] != ELFMAG0 - || hdr.e_ident[EI_MAG1] != ELFMAG1 - || hdr.e_ident[EI_MAG2] != ELFMAG2 - || hdr.e_ident[EI_MAG3] != ELFMAG3 - || hdr.e_ident[EI_CLASS] != ELFCLASS64 - || hdr.e_ident[EI_OSABI] != HERMIT_ELFOSABI - || hdr.e_type != ET_EXEC || hdr.e_machine != EM_X86_64) { - fprintf(stderr, "Inavlide HermitCore file!\n"); - goto out; - } - - elf_entry = hdr.e_entry; - - buflen = hdr.e_phentsize * hdr.e_phnum; - phdr = malloc(buflen); - if (!phdr) { - fprintf(stderr, "Not enough memory\n"); - goto out; - } - - ret = pread_in_full(fd, phdr, buflen, hdr.e_phoff); - if (ret < 0) - goto out; - - /* - * Load all segments with type "LOAD" from the file at offset - * p_offset, and copy that into in memory. - */ - for (Elf64_Half ph_i = 0; ph_i < hdr.e_phnum; ph_i++) - { - uint64_t paddr = phdr[ph_i].p_paddr; - size_t offset = phdr[ph_i].p_offset; - size_t filesz = phdr[ph_i].p_filesz; - size_t memsz = phdr[ph_i].p_memsz; - - if (phdr[ph_i].p_type != PT_LOAD) - continue; - - //printf("Kernel location 0x%zx, file size 0x%zx, memory size 0x%zx\n", paddr, filesz, memsz); - - ret = pread_in_full(fd, mem+paddr-GUEST_OFFSET, filesz, offset); - if (ret < 0) - goto out; - if (!klog) - klog = mem+paddr+0x5000-GUEST_OFFSET; - if (!mboot) - mboot = mem+paddr-GUEST_OFFSET; - - if (first_load) { - first_load = 0; - - // initialize kernel - *((uint64_t*) (mem+paddr-GUEST_OFFSET + 0x08)) = paddr; // physical start address - *((uint64_t*) (mem+paddr-GUEST_OFFSET + 0x10)) = guest_size; // physical limit - *((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x18)) = get_cpufreq(); - *((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x24)) = 1; // number of used cpus - *((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x30)) = 0; // apicid - *((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x60)) = 1; // numa nodes - *((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x94)) = 1; // announce uhyve - - - char* str = getenv("HERMIT_IP"); - if (str) { - uint32_t ip[4]; - - sscanf(str, "%u.%u.%u.%u", ip+0, ip+1, ip+2, ip+3); - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB0)) = (uint8_t) ip[0]; - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB1)) = (uint8_t) ip[1]; - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB2)) = (uint8_t) ip[2]; - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB3)) = (uint8_t) ip[3]; - } - - str = getenv("HERMIT_GATEWAY"); - if (str) { - uint32_t ip[4]; - - sscanf(str, "%u.%u.%u.%u", ip+0, ip+1, ip+2, ip+3); - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB4)) = (uint8_t) ip[0]; - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB5)) = (uint8_t) ip[1]; - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB6)) = (uint8_t) ip[2]; - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB7)) = (uint8_t) ip[3]; - } - str = getenv("HERMIT_MASK"); - if (str) { - uint32_t ip[4]; - - sscanf(str, "%u.%u.%u.%u", ip+0, ip+1, ip+2, ip+3); - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB8)) = (uint8_t) ip[0]; - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB9)) = (uint8_t) ip[1]; - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xBA)) = (uint8_t) ip[2]; - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xBB)) = (uint8_t) ip[3]; - } - - *((uint64_t*) (mem+paddr-GUEST_OFFSET + 0xbc)) = guest_mem; - } - *((uint64_t*) (mem+paddr-GUEST_OFFSET + 0x38)) += memsz; // total kernel size - } - -out: - if (phdr) - free(phdr); - - close(fd); - - return 0; -} - -static int load_checkpoint(uint8_t* mem, char* path) -{ - char fname[MAX_FNAME]; - size_t location; - size_t paddr = elf_entry; - int ret; - struct timeval begin, end; - uint32_t i; - - if (verbose) - gettimeofday(&begin, NULL); - - if (!klog) - klog = mem+paddr+0x5000-GUEST_OFFSET; - if (!mboot) - mboot = mem+paddr-GUEST_OFFSET; - - -#ifdef USE_DIRTY_LOG - /* - * if we use KVM's dirty page logging, we have to load - * the elf image because most parts are readonly sections - * and aren't able to detect by KVM's dirty page logging - * technique. - */ - ret = load_kernel(mem, path); - if (ret) - return ret; -#endif - - i = full_checkpoint ? no_checkpoint : 0; - for(; i<=no_checkpoint; i++) - { - snprintf(fname, MAX_FNAME, "checkpoint/chk%u_mem.dat", i); - - FILE* f = fopen(fname, "r"); - if (f == NULL) - return -1; - - /*struct kvm_irqchip irqchip; - if (fread(&irqchip, sizeof(irqchip), 1, f) != 1) - err(1, "fread failed"); - if (cap_irqchip && (i == no_checkpoint-1)) - kvm_ioctl(vmfd, KVM_SET_IRQCHIP, &irqchip);*/ - - struct kvm_clock_data clock; - if (fread(&clock, sizeof(clock), 1, f) != 1) - err(1, "fread failed"); - // only the last checkpoint has to set the clock - if (cap_adjust_clock_stable && (i == no_checkpoint)) { - struct kvm_clock_data data = {}; - - data.clock = clock.clock; - kvm_ioctl(vmfd, KVM_SET_CLOCK, &data); - } - -#if 0 - if (fread(guest_mem, guest_size, 1, f) != 1) - err(1, "fread failed"); -#else - - while (fread(&location, sizeof(location), 1, f) == 1) { - //printf("location 0x%zx\n", location); - if (location & PG_PSE) - ret = fread((size_t*) (mem + (location & PAGE_2M_MASK)), (1UL << PAGE_2M_BITS), 1, f); - else - ret = fread((size_t*) (mem + (location & PAGE_MASK)), (1UL << PAGE_BITS), 1, f); - - if (ret != 1) { - fprintf(stderr, "Unable to read checkpoint: ret = %d", ret); - err(1, "fread failed"); - } - } -#endif - - fclose(f); - } - - if (verbose) { - gettimeofday(&end, NULL); - size_t msec = (end.tv_sec - begin.tv_sec) * 1000; - msec += (end.tv_usec - begin.tv_usec) / 1000; - fprintf(stderr, "Load checkpoint %u in %zd ms\n", no_checkpoint, msec); - } - - return 0; -} - -static inline void show_dtable(const char *name, struct kvm_dtable *dtable) -{ - fprintf(stderr, " %s %016zx %08hx\n", name, (size_t) dtable->base, (uint16_t) dtable->limit); -} - -static inline void show_segment(const char *name, struct kvm_segment *seg) -{ - fprintf(stderr, " %s %04hx %016zx %08x %02hhx %x %x %x %x %x %x %x\n", - name, (uint16_t) seg->selector, (size_t) seg->base, (uint32_t) seg->limit, - (uint8_t) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); -} - -static void show_registers(int id, struct kvm_regs* regs, struct kvm_sregs* sregs) -{ - size_t cr0, cr2, cr3; - size_t cr4, cr8; - size_t rax, rbx, rcx; - size_t rdx, rsi, rdi; - size_t rbp, r8, r9; - size_t r10, r11, r12; - size_t r13, r14, r15; - size_t rip, rsp; - size_t rflags; - int i; - - rflags = regs->rflags; - rip = regs->rip; rsp = regs->rsp; - rax = regs->rax; rbx = regs->rbx; rcx = regs->rcx; - rdx = regs->rdx; rsi = regs->rsi; rdi = regs->rdi; - rbp = regs->rbp; r8 = regs->r8; r9 = regs->r9; - r10 = regs->r10; r11 = regs->r11; r12 = regs->r12; - r13 = regs->r13; r14 = regs->r14; r15 = regs->r15; - - fprintf(stderr, "\n Dump state of CPU %d\n", id); - fprintf(stderr, "\n Registers:\n"); - fprintf(stderr, " ----------\n"); - fprintf(stderr, " rip: %016zx rsp: %016zx flags: %016zx\n", rip, rsp, rflags); - fprintf(stderr, " rax: %016zx rbx: %016zx rcx: %016zx\n", rax, rbx, rcx); - fprintf(stderr, " rdx: %016zx rsi: %016zx rdi: %016zx\n", rdx, rsi, rdi); - fprintf(stderr, " rbp: %016zx r8: %016zx r9: %016zx\n", rbp, r8, r9); - fprintf(stderr, " r10: %016zx r11: %016zx r12: %016zx\n", r10, r11, r12); - fprintf(stderr, " r13: %016zx r14: %016zx r15: %016zx\n", r13, r14, r15); - - cr0 = sregs->cr0; cr2 = sregs->cr2; cr3 = sregs->cr3; - cr4 = sregs->cr4; cr8 = sregs->cr8; - - fprintf(stderr, " cr0: %016zx cr2: %016zx cr3: %016zx\n", cr0, cr2, cr3); - fprintf(stderr, " cr4: %016zx cr8: %016zx\n", cr4, cr8); - fprintf(stderr, "\n Segment registers:\n"); - fprintf(stderr, " ------------------\n"); - fprintf(stderr, " register selector base limit type p dpl db s l g avl\n"); - show_segment("cs ", &sregs->cs); - show_segment("ss ", &sregs->ss); - show_segment("ds ", &sregs->ds); - show_segment("es ", &sregs->es); - show_segment("fs ", &sregs->fs); - show_segment("gs ", &sregs->gs); - show_segment("tr ", &sregs->tr); - show_segment("ldt", &sregs->ldt); - show_dtable("gdt", &sregs->gdt); - show_dtable("idt", &sregs->idt); - - fprintf(stderr, "\n APIC:\n"); - fprintf(stderr, " -----\n"); - fprintf(stderr, " efer: %016zx apic base: %016zx\n", - (size_t) sregs->efer, (size_t) sregs->apic_base); - - fprintf(stderr, "\n Interrupt bitmap:\n"); - fprintf(stderr, " -----------------\n"); - for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) - fprintf(stderr, " %016zx", (size_t) sregs->interrupt_bitmap[i]); - fprintf(stderr, "\n"); -} - -static void print_registers(void) -{ - struct kvm_regs regs; - struct kvm_sregs sregs; - - kvm_ioctl(vcpufd, KVM_GET_SREGS, &sregs); - kvm_ioctl(vcpufd, KVM_GET_REGS, ®s); - - show_registers(cpuid, ®s, &sregs); -} - -/// Filter CPUID functions that are not supported by the hypervisor and enable -/// features according to our needs. -static void filter_cpuid(struct kvm_cpuid2 *kvm_cpuid) -{ - for (uint32_t i = 0; i < kvm_cpuid->nent; i++) { - struct kvm_cpuid_entry2 *entry = &kvm_cpuid->entries[i]; - - switch (entry->function) { - case 1: - // CPUID to define basic cpu features - entry->ecx |= (1U << 31); // propagate that we are running on a hypervisor - if (cap_tsc_deadline) - entry->ecx |= (1U << 24); // enable TSC deadline feature - entry->edx |= (1U << 5); // enable msr support - break; - - case CPUID_FUNC_PERFMON: - // disable it - entry->eax = 0x00; - break; - - default: - // Keep the CPUID function as-is - break; - }; - } -} - -static void setup_system_64bit(struct kvm_sregs *sregs) -{ - sregs->cr0 |= X86_CR0_PE; - sregs->efer |= EFER_LME; -} - -static void setup_system_page_tables(struct kvm_sregs *sregs, uint8_t *mem) -{ - uint64_t *pml4 = (uint64_t *) (mem + BOOT_PML4); - uint64_t *pdpte = (uint64_t *) (mem + BOOT_PDPTE); - uint64_t *pde = (uint64_t *) (mem + BOOT_PDE); - uint64_t paddr; - - /* - * For simplicity we currently use 2MB pages and only a single - * PML4/PDPTE/PDE. - */ - - memset(pml4, 0x00, 4096); - memset(pdpte, 0x00, 4096); - memset(pde, 0x00, 4096); - - *pml4 = BOOT_PDPTE | (X86_PDPT_P | X86_PDPT_RW); - *pdpte = BOOT_PDE | (X86_PDPT_P | X86_PDPT_RW); - for (paddr = 0; paddr < 0x20000000ULL; paddr += GUEST_PAGE_SIZE, pde++) - *pde = paddr | (X86_PDPT_P | X86_PDPT_RW | X86_PDPT_PS); - - sregs->cr3 = BOOT_PML4; - sregs->cr4 |= X86_CR4_PAE; - sregs->cr0 |= X86_CR0_PG; -} - -static void setup_system_gdt(struct kvm_sregs *sregs, - uint8_t *mem, - uint64_t off) -{ - uint64_t *gdt = (uint64_t *) (mem + off); - struct kvm_segment data_seg, code_seg; - - /* flags, base, limit */ - gdt[BOOT_GDT_NULL] = GDT_ENTRY(0, 0, 0); - gdt[BOOT_GDT_CODE] = GDT_ENTRY(0xA09B, 0, 0xFFFFF); - gdt[BOOT_GDT_DATA] = GDT_ENTRY(0xC093, 0, 0xFFFFF); - - sregs->gdt.base = off; - sregs->gdt.limit = (sizeof(uint64_t) * BOOT_GDT_MAX) - 1; - - GDT_TO_KVM_SEGMENT(code_seg, gdt, BOOT_GDT_CODE); - GDT_TO_KVM_SEGMENT(data_seg, gdt, BOOT_GDT_DATA); - - sregs->cs = code_seg; - sregs->ds = data_seg; - sregs->es = data_seg; - sregs->fs = data_seg; - sregs->gs = data_seg; - sregs->ss = data_seg; -} - -static void setup_system(int vcpufd, uint8_t *mem, uint32_t id) -{ - static struct kvm_sregs sregs; - - // all cores use the same startup code - // => all cores use the same sregs - // => only the boot processor has to initialize sregs - if (id == 0) { - kvm_ioctl(vcpufd, KVM_GET_SREGS, &sregs); - - /* Set all cpu/mem system structures */ - setup_system_gdt(&sregs, mem, BOOT_GDT); - setup_system_page_tables(&sregs, mem); - setup_system_64bit(&sregs); - } - - kvm_ioctl(vcpufd, KVM_SET_SREGS, &sregs); -} - -static void setup_cpuid(int kvm, int vcpufd) -{ - struct kvm_cpuid2 *kvm_cpuid; - unsigned int max_entries = 100; - - // allocate space for cpuid we get from KVM - kvm_cpuid = calloc(1, sizeof(*kvm_cpuid) + (max_entries * sizeof(kvm_cpuid->entries[0]))); - kvm_cpuid->nent = max_entries; - - kvm_ioctl(kvm, KVM_GET_SUPPORTED_CPUID, kvm_cpuid); - - // set features - filter_cpuid(kvm_cpuid); - kvm_ioctl(vcpufd, KVM_SET_CPUID2, kvm_cpuid); - - free(kvm_cpuid); -} - static void* wait_for_packet(void* arg) { int ret; struct pollfd fds = { .fd = netfd, - .events = POLLIN, - .revents = 0}; + .events = POLLIN, + .revents = 0}; while(1) { @@ -808,7 +280,11 @@ static int vcpu_loop(void) case EFAULT: { struct kvm_regs regs; kvm_ioctl(vcpufd, KVM_GET_REGS, ®s); +#ifdef __x86_64__ err(1, "KVM: host/guest translation fault: rip=0x%llx", regs.rip); +#else + err(1, "KVM: host/guest translation fault: elr_el1=0x%llx", regs.elr_el1); +#endif } default: @@ -817,6 +293,9 @@ static int vcpu_loop(void) } } + uint64_t port = 0; + unsigned raddr = 0; + /* handle requests */ switch (run->exit_reason) { case KVM_EXIT_HLT: @@ -824,49 +303,57 @@ static int vcpu_loop(void) return 0; case KVM_EXIT_MMIO: - err(1, "KVM: unhandled KVM_EXIT_MMIO at 0x%llx\n", run->mmio.phys_addr); - break; + port = run->mmio.phys_addr; + if (run->mmio.is_write) + memcpy(&raddr, run->mmio.data, sizeof(raddr) /*run->mmio.len*/); + //printf("KVM: handled KVM_EXIT_MMIO at 0x%lx (data %u)\n", port, raddr); case KVM_EXIT_IO: + if (!port) { + port = run->io.port; + raddr = *((unsigned*)((size_t)run+run->io.data_offset)); + } + //printf("port 0x%x\n", run->io.port); - switch (run->io.port) { + switch (port) { case UHYVE_PORT_WRITE: { - unsigned data = *((unsigned*)((size_t)run+run->io.data_offset)); - uhyve_write_t* uhyve_write = (uhyve_write_t*) (guest_mem+data); + uhyve_write_t* uhyve_write = (uhyve_write_t*) (guest_mem+raddr); uhyve_write->len = write(uhyve_write->fd, guest_mem+(size_t)uhyve_write->buf, uhyve_write->len); break; } case UHYVE_PORT_READ: { - unsigned data = *((unsigned*)((size_t)run+run->io.data_offset)); - uhyve_read_t* uhyve_read = (uhyve_read_t*) (guest_mem+data); + uhyve_read_t* uhyve_read = (uhyve_read_t*) (guest_mem+raddr); uhyve_read->ret = read(uhyve_read->fd, guest_mem+(size_t)uhyve_read->buf, uhyve_read->len); break; } case UHYVE_PORT_EXIT: { - unsigned data = *((unsigned*)((size_t)run+run->io.data_offset)); - if (cpuid) - pthread_exit((int*)(guest_mem+data)); + pthread_exit((int*)(guest_mem+raddr)); else - exit(*(int*)(guest_mem+data)); + exit(*(int*)(guest_mem+raddr)); break; } case UHYVE_PORT_OPEN: { - unsigned data = *((unsigned*)((size_t)run+run->io.data_offset)); - uhyve_open_t* uhyve_open = (uhyve_open_t*) (guest_mem+data); + uhyve_open_t* uhyve_open = (uhyve_open_t*) (guest_mem+raddr); + char rpath[PATH_MAX]; - uhyve_open->ret = open((const char*)guest_mem+(size_t)uhyve_open->name, uhyve_open->flags, uhyve_open->mode); + // forbid to open the kvm device + if (realpath((const char*)guest_mem+(size_t)uhyve_open->name, rpath) < 0) + uhyve_open->ret = -1; + else if (strcmp(rpath, "/dev/kvm") == 0) + uhyve_open->ret = -1; + else + uhyve_open->ret = open((const char*)guest_mem+(size_t)uhyve_open->name, uhyve_open->flags, uhyve_open->mode); break; } case UHYVE_PORT_CLOSE: { - unsigned data = *((unsigned*)((size_t)run+run->io.data_offset)); - uhyve_close_t* uhyve_close = (uhyve_close_t*) (guest_mem+data); + uhyve_close_t* uhyve_close = (uhyve_close_t*) (guest_mem+raddr); if (uhyve_close->fd > 2) uhyve_close->ret = close(uhyve_close->fd); @@ -876,8 +363,7 @@ static int vcpu_loop(void) } case UHYVE_PORT_NETINFO: { - unsigned data = *((unsigned*)((size_t)run+run->io.data_offset)); - uhyve_netinfo_t* uhyve_netinfo = (uhyve_netinfo_t*)(guest_mem+data); + uhyve_netinfo_t* uhyve_netinfo = (uhyve_netinfo_t*)(guest_mem+raddr); memcpy(uhyve_netinfo->mac_str, uhyve_get_mac(), 18); // guest configure the ethernet device => start network thread check_network(); @@ -885,8 +371,7 @@ static int vcpu_loop(void) } case UHYVE_PORT_NETWRITE: { - unsigned data = *((unsigned*)((size_t)run+run->io.data_offset)); - uhyve_netwrite_t* uhyve_netwrite = (uhyve_netwrite_t*)(guest_mem + data); + uhyve_netwrite_t* uhyve_netwrite = (uhyve_netwrite_t*)(guest_mem + raddr); uhyve_netwrite->ret = 0; ret = write(netfd, guest_mem + (size_t)uhyve_netwrite->data, uhyve_netwrite->len); if (ret >= 0) { @@ -899,8 +384,7 @@ static int vcpu_loop(void) } case UHYVE_PORT_NETREAD: { - unsigned data = *((unsigned*)((size_t)run+run->io.data_offset)); - uhyve_netread_t* uhyve_netread = (uhyve_netread_t*)(guest_mem + data); + uhyve_netread_t* uhyve_netread = (uhyve_netread_t*)(guest_mem + raddr); ret = read(netfd, guest_mem + (size_t)uhyve_netread->data, uhyve_netread->len); if (ret > 0) { uhyve_netread->len = ret; @@ -913,8 +397,7 @@ static int vcpu_loop(void) } case UHYVE_PORT_NETSTAT: { - unsigned status = *((unsigned*)((size_t)run+run->io.data_offset)); - uhyve_netstat_t* uhyve_netstat = (uhyve_netstat_t*)(guest_mem + status); + uhyve_netstat_t* uhyve_netstat = (uhyve_netstat_t*)(guest_mem + raddr); char* str = getenv("HERMIT_NETIF"); if (str) uhyve_netstat->status = 1; @@ -924,8 +407,7 @@ static int vcpu_loop(void) } case UHYVE_PORT_LSEEK: { - unsigned data = *((unsigned*)((size_t)run+run->io.data_offset)); - uhyve_lseek_t* uhyve_lseek = (uhyve_lseek_t*) (guest_mem+data); + uhyve_lseek_t* uhyve_lseek = (uhyve_lseek_t*) (guest_mem+raddr); uhyve_lseek->offset = lseek(uhyve_lseek->fd, uhyve_lseek->offset, uhyve_lseek->whence); break; @@ -933,8 +415,7 @@ static int vcpu_loop(void) case UHYVE_PORT_CMDSIZE: { int i; - unsigned data = *((unsigned*)((size_t)run+run->io.data_offset)); - uhyve_cmdsize_t *val = (uhyve_cmdsize_t *) (guest_mem+data); + uhyve_cmdsize_t *val = (uhyve_cmdsize_t *) (guest_mem+raddr); val->argc = uhyve_argc; for(i=0; iio.data_offset)); - uhyve_cmdval_t *val = (uhyve_cmdval_t *) (guest_mem+data); + uhyve_cmdval_t *val = (uhyve_cmdval_t *) (guest_mem+raddr); /* argv */ argv_ptr = (char **)(guest_mem + (size_t)val->argv); @@ -967,7 +447,7 @@ static int vcpu_loop(void) } default: - err(1, "KVM: unhandled KVM_EXIT_IO at port 0x%x, direction %d\n", run->io.port, run->io.direction); + err(1, "KVM: unhandled KVM_EXIT_IO / KVM_EXIT_MMIO at port 0x%lx\n", port); break; } break; @@ -1003,12 +483,6 @@ static int vcpu_loop(void) static int vcpu_init(void) { - struct kvm_mp_state mp_state = { KVM_MP_STATE_RUNNABLE }; - struct kvm_regs regs = { - .rip = elf_entry, // entry point to HermitCore - .rflags = 0x2, // POR value required by x86 architecture - }; - vcpu_fds[cpuid] = vcpufd = kvm_ioctl(vmfd, KVM_CREATE_VCPU, cpuid); /* Map the shared kvm_run structure and following data. */ @@ -1021,163 +495,15 @@ static int vcpu_init(void) if (run == MAP_FAILED) err(1, "KVM: VCPU mmap failed"); - run->apic_base = APIC_DEFAULT_BASE; - setup_cpuid(kvm, vcpufd); - if (restart) { - char fname[MAX_FNAME]; - struct kvm_sregs sregs; - struct kvm_fpu fpu; - struct { - struct kvm_msrs info; - struct kvm_msr_entry entries[MAX_MSR_ENTRIES]; - } msr_data; - struct kvm_lapic_state lapic; - struct kvm_xsave xsave; - struct kvm_xcrs xcrs; - struct kvm_vcpu_events events; - - snprintf(fname, MAX_FNAME, "checkpoint/chk%u_core%u.dat", no_checkpoint, cpuid); - - FILE* f = fopen(fname, "r"); - if (f == NULL) - err(1, "fopen: unable to open file"); - - if (fread(&sregs, sizeof(sregs), 1, f) != 1) - err(1, "fread failed\n"); - if (fread(®s, sizeof(regs), 1, f) != 1) - err(1, "fread failed\n"); - if (fread(&fpu, sizeof(fpu), 1, f) != 1) - err(1, "fread failed\n"); - if (fread(&msr_data, sizeof(msr_data), 1, f) != 1) - err(1, "fread failed\n"); - if (fread(&lapic, sizeof(lapic), 1, f) != 1) - err(1, "fread failed\n"); - if (fread(&xsave, sizeof(xsave), 1, f) != 1) - err(1, "fread failed\n"); - if (fread(&xcrs, sizeof(xcrs), 1, f) != 1) - err(1, "fread failed\n"); - if (fread(&events, sizeof(events), 1, f) != 1) - err(1, "fread failed\n"); - if (fread(&mp_state, sizeof(mp_state), 1, f) != 1) - err(1, "fread failed\n"); - - fclose(f); - - kvm_ioctl(vcpufd, KVM_SET_SREGS, &sregs); - kvm_ioctl(vcpufd, KVM_SET_REGS, ®s); - kvm_ioctl(vcpufd, KVM_SET_MSRS, &msr_data); - kvm_ioctl(vcpufd, KVM_SET_XCRS, &xcrs); - kvm_ioctl(vcpufd, KVM_SET_MP_STATE, &mp_state); - kvm_ioctl(vcpufd, KVM_SET_LAPIC, &lapic); - kvm_ioctl(vcpufd, KVM_SET_FPU, &fpu); - kvm_ioctl(vcpufd, KVM_SET_XSAVE, &xsave); - kvm_ioctl(vcpufd, KVM_SET_VCPU_EVENTS, &events); + restore_cpu_state(); } else { - struct { - struct kvm_msrs info; - struct kvm_msr_entry entries[MAX_MSR_ENTRIES]; - } msr_data; - struct kvm_msr_entry *msrs = msr_data.entries; - - // be sure that the multiprocessor is runable - kvm_ioctl(vcpufd, KVM_SET_MP_STATE, &mp_state); - - // enable fast string operations - msrs[0].index = MSR_IA32_MISC_ENABLE; - msrs[0].data = 1; - msr_data.info.nmsrs = 1; - kvm_ioctl(vcpufd, KVM_SET_MSRS, &msr_data); - - /* Setup registers and memory. */ - setup_system(vcpufd, guest_mem, cpuid); - kvm_ioctl(vcpufd, KVM_SET_REGS, ®s); - - // only one core is able to enter startup code - // => the wait for the predecessor core - while (*((volatile uint32_t*) (mboot + 0x20)) < cpuid) - pthread_yield(); - *((volatile uint32_t*) (mboot + 0x30)) = cpuid; + init_cpu_state(elf_entry); } return 0; } -static void save_cpu_state(void) -{ - struct { - struct kvm_msrs info; - struct kvm_msr_entry entries[MAX_MSR_ENTRIES]; - } msr_data; - struct kvm_msr_entry *msrs = msr_data.entries; - struct kvm_regs regs; - struct kvm_sregs sregs; - struct kvm_fpu fpu; - struct kvm_lapic_state lapic; - struct kvm_xsave xsave; - struct kvm_xcrs xcrs; - struct kvm_vcpu_events events; - struct kvm_mp_state mp_state; - char fname[MAX_FNAME]; - int n = 0; - - /* define the list of required MSRs */ - msrs[n++].index = MSR_IA32_APICBASE; - msrs[n++].index = MSR_IA32_SYSENTER_CS; - msrs[n++].index = MSR_IA32_SYSENTER_ESP; - msrs[n++].index = MSR_IA32_SYSENTER_EIP; - msrs[n++].index = MSR_IA32_CR_PAT; - msrs[n++].index = MSR_IA32_MISC_ENABLE; - msrs[n++].index = MSR_IA32_TSC; - msrs[n++].index = MSR_CSTAR; - msrs[n++].index = MSR_STAR; - msrs[n++].index = MSR_EFER; - msrs[n++].index = MSR_LSTAR; - msrs[n++].index = MSR_GS_BASE; - msrs[n++].index = MSR_FS_BASE; - msrs[n++].index = MSR_KERNEL_GS_BASE; - //msrs[n++].index = MSR_IA32_FEATURE_CONTROL; - msr_data.info.nmsrs = n; - - kvm_ioctl(vcpufd, KVM_GET_SREGS, &sregs); - kvm_ioctl(vcpufd, KVM_GET_REGS, ®s); - kvm_ioctl(vcpufd, KVM_GET_MSRS, &msr_data); - kvm_ioctl(vcpufd, KVM_GET_XCRS, &xcrs); - kvm_ioctl(vcpufd, KVM_GET_LAPIC, &lapic); - kvm_ioctl(vcpufd, KVM_GET_FPU, &fpu); - kvm_ioctl(vcpufd, KVM_GET_XSAVE, &xsave); - kvm_ioctl(vcpufd, KVM_GET_VCPU_EVENTS, &events); - kvm_ioctl(vcpufd, KVM_GET_MP_STATE, &mp_state); - - snprintf(fname, MAX_FNAME, "checkpoint/chk%u_core%u.dat", no_checkpoint, cpuid); - - FILE* f = fopen(fname, "w"); - if (f == NULL) { - err(1, "fopen: unable to open file\n"); - } - - if (fwrite(&sregs, sizeof(sregs), 1, f) != 1) - err(1, "fwrite failed\n"); - if (fwrite(®s, sizeof(regs), 1, f) != 1) - err(1, "fwrite failed\n"); - if (fwrite(&fpu, sizeof(fpu), 1, f) != 1) - err(1, "fwrite failed\n"); - if (fwrite(&msr_data, sizeof(msr_data), 1, f) != 1) - err(1, "fwrite failed\n"); - if (fwrite(&lapic, sizeof(lapic), 1, f) != 1) - err(1, "fwrite failed\n"); - if (fwrite(&xsave, sizeof(xsave), 1, f) != 1) - err(1, "fwrite failed\n"); - if (fwrite(&xcrs, sizeof(xcrs), 1, f) != 1) - err(1, "fwrite failed\n"); - if (fwrite(&events, sizeof(events), 1, f) != 1) - err(1, "fwrite failed\n"); - if (fwrite(&mp_state, sizeof(mp_state), 1, f) != 1) - err(1, "fwrite failed\n"); - - fclose(f); -} - static void sigusr_handler(int signum) { pthread_barrier_wait(&barrier); @@ -1219,10 +545,6 @@ void sigterm_handler(int signum) int uhyve_init(char *path) { - char* v = getenv("HERMIT_VERBOSE"); - if (v && (strcmp(v, "0") != 0)) - verbose = true; - signal(SIGTERM, sigterm_handler); // register routine to close the VM @@ -1277,115 +599,7 @@ int uhyve_init(char *path) /* Create the virtual machine */ vmfd = kvm_ioctl(kvm, KVM_CREATE_VM, 0); - uint64_t identity_base = 0xfffbc000; - if (ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU) > 0) { - /* Allows up to 16M BIOSes. */ - identity_base = 0xfeffc000; - - kvm_ioctl(vmfd, KVM_SET_IDENTITY_MAP_ADDR, &identity_base); - } - kvm_ioctl(vmfd, KVM_SET_TSS_ADDR, identity_base + 0x1000); - - /* - * Allocate page-aligned guest memory. - * - * TODO: support of huge pages - */ - if (guest_size < KVM_32BIT_GAP_START) { - guest_mem = mmap(NULL, guest_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (guest_mem == MAP_FAILED) - err(1, "mmap failed"); - } else { - guest_size += KVM_32BIT_GAP_SIZE; - guest_mem = mmap(NULL, guest_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (guest_mem == MAP_FAILED) - err(1, "mmap failed"); - - /* - * We mprotect the gap PROT_NONE so that if we accidently write to it, we will know. - */ - mprotect(guest_mem + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE); - } - - const char* merge = getenv("HERMIT_MERGEABLE"); - if (merge && (strcmp(merge, "0") != 0)) { - /* - * The KSM feature is intended for applications that generate - * many instances of the same data (e.g., virtualization systems - * such as KVM). It can consume a lot of processing power! - */ - madvise(guest_mem, guest_size, MADV_MERGEABLE); - if (verbose) - fprintf(stderr, "VM uses KSN feature \"mergeable\" to reduce the memory footprint.\n"); - } - - struct kvm_userspace_memory_region kvm_region = { - .slot = 0, - .guest_phys_addr = GUEST_OFFSET, - .memory_size = guest_size, - .userspace_addr = (uint64_t) guest_mem, -#ifdef USE_DIRTY_LOG - .flags = KVM_MEM_LOG_DIRTY_PAGES, -#else - .flags = 0, -#endif - }; - - if (guest_size <= KVM_32BIT_GAP_START - GUEST_OFFSET) { - kvm_ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region); - } else { - kvm_region.memory_size = KVM_32BIT_GAP_START - GUEST_OFFSET; - kvm_ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region); - - kvm_region.slot = 1; - kvm_region.guest_phys_addr = KVM_32BIT_GAP_START+KVM_32BIT_GAP_SIZE; - kvm_region.memory_size = guest_size - KVM_32BIT_GAP_SIZE - KVM_32BIT_GAP_START + GUEST_OFFSET; - kvm_ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region); - } - - kvm_ioctl(vmfd, KVM_CREATE_IRQCHIP, NULL); - -#ifdef KVM_CAP_X2APIC_API - // enable x2APIC support - struct kvm_enable_cap cap = { - .cap = KVM_CAP_X2APIC_API, - .flags = 0, - .args[0] = KVM_X2APIC_API_USE_32BIT_IDS|KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK, - }; - kvm_ioctl(vmfd, KVM_ENABLE_CAP, &cap); -#endif - - // initialited IOAPIC with HermitCore's default settings - struct kvm_irqchip chip; - chip.chip_id = KVM_IRQCHIP_IOAPIC; - kvm_ioctl(vmfd, KVM_GET_IRQCHIP, &chip); - for(int i=0; i 0)) ? PG_DIRTY : PG_ACCESSED; - char fname[MAX_FNAME]; - struct timeval begin, end; - - if (verbose) - gettimeofday(&begin, NULL); - - if (stat("checkpoint", &st) == -1) - mkdir("checkpoint", 0700); - - for(size_t i = 0; i < ncores; i++) - if (vcpu_threads[i] != pthread_self()) - pthread_kill(vcpu_threads[i], SIGRTMIN); - - pthread_barrier_wait(&barrier); - - save_cpu_state(); - - snprintf(fname, MAX_FNAME, "checkpoint/chk%u_mem.dat", no_checkpoint); - - FILE* f = fopen(fname, "w"); - if (f == NULL) { - err(1, "fopen: unable to open file"); - } - - /*struct kvm_irqchip irqchip = {}; - if (cap_irqchip) - kvm_ioctl(vmfd, KVM_GET_IRQCHIP, &irqchip); - else - memset(&irqchip, 0x00, sizeof(irqchip)); - if (fwrite(&irqchip, sizeof(irqchip), 1, f) != 1) - err(1, "fwrite failed");*/ - - struct kvm_clock_data clock = {}; - kvm_ioctl(vmfd, KVM_GET_CLOCK, &clock); - if (fwrite(&clock, sizeof(clock), 1, f) != 1) - err(1, "fwrite failed"); - -#if 0 - if (fwrite(guest_mem, guest_size, 1, f) != 1) - err(1, "fwrite failed"); -#elif defined(USE_DIRTY_LOG) - static struct kvm_dirty_log dlog = { - .slot = 0, - .dirty_bitmap = NULL - }; - size_t dirty_log_size = (guest_size >> PAGE_BITS) / sizeof(size_t); - - // do we create our first checkpoint - if (dlog.dirty_bitmap == NULL) - { - // besure that all paddings are zero - memset(&dlog, 0x00, sizeof(dlog)); - - dlog.dirty_bitmap = malloc(dirty_log_size * sizeof(size_t)); - if (dlog.dirty_bitmap == NULL) - err(1, "malloc failed!\n"); - } - memset(dlog.dirty_bitmap, 0x00, dirty_log_size * sizeof(size_t)); - - dlog.slot = 0; -nextslot: - kvm_ioctl(vmfd, KVM_GET_DIRTY_LOG, &dlog); - - for(size_t i=0; i KVM_32BIT_GAP_START - GUEST_OFFSET)) { - dlog.slot = 1; - memset(dlog.dirty_bitmap, 0x00, dirty_log_size * sizeof(size_t)); - goto nextslot; - } -#else - size_t* pml4 = (size_t*) (guest_mem+elf_entry+PAGE_SIZE); - for(size_t i=0; i<(1 << PAGE_MAP_BITS); i++) { - if ((pml4[i] & PG_PRESENT) != PG_PRESENT) - continue; - //printf("pml[%zd] 0x%zx\n", i, pml4[i]); - size_t* pdpt = (size_t*) (guest_mem+(pml4[i] & PAGE_MASK)); - for(size_t j=0; j<(1 << PAGE_MAP_BITS); j++) { - if ((pdpt[j] & PG_PRESENT) != PG_PRESENT) - continue; - //printf("\tpdpt[%zd] 0x%zx\n", j, pdpt[j]); - size_t* pgd = (size_t*) (guest_mem+(pdpt[j] & PAGE_MASK)); - for(size_t k=0; k<(1 << PAGE_MAP_BITS); k++) { - if ((pgd[k] & PG_PRESENT) != PG_PRESENT) - continue; - //printf("\t\tpgd[%zd] 0x%zx\n", k, pgd[k] & ~PG_XD); - if ((pgd[k] & PG_PSE) != PG_PSE) { - size_t* pgt = (size_t*) (guest_mem+(pgd[k] & PAGE_MASK)); - for(size_t l=0; l<(1 << PAGE_MAP_BITS); l++) { - if ((pgt[l] & (PG_PRESENT|flag)) == (PG_PRESENT|flag)) { - //printf("\t\t\t*pgt[%zd] 0x%zx, 4KB\n", l, pgt[l] & ~PG_XD); - if (!full_checkpoint) - pgt[l] = pgt[l] & ~(PG_DIRTY|PG_ACCESSED); - size_t pgt_entry = pgt[l] & ~PG_PSE; // because PAT use the same bit as PSE - if (fwrite(&pgt_entry, sizeof(size_t), 1, f) != 1) - err(1, "fwrite failed"); - if (fwrite((size_t*) (guest_mem + (pgt[l] & PAGE_MASK)), (1UL << PAGE_BITS), 1, f) != 1) - err(1, "fwrite failed"); - } - } - } else if ((pgd[k] & flag) == flag) { - //printf("\t\t*pgd[%zd] 0x%zx, 2MB\n", k, pgd[k] & ~PG_XD); - if (!full_checkpoint) - pgd[k] = pgd[k] & ~(PG_DIRTY|PG_ACCESSED); - if (fwrite(pgd+k, sizeof(size_t), 1, f) != 1) - err(1, "fwrite failed"); - if (fwrite((size_t*) (guest_mem + (pgd[k] & PAGE_2M_MASK)), (1UL << PAGE_2M_BITS), 1, f) != 1) - err(1, "fwrite failed"); - } - } - } - } -#endif - - fclose(f); - - pthread_barrier_wait(&barrier); - - // update configuration file - f = fopen("checkpoint/chk_config.txt", "w"); - if (f == NULL) { - err(1, "fopen: unable to open file"); - } - - fprintf(f, "number of cores: %u\n", ncores); - fprintf(f, "memory size: 0x%zx\n", guest_size); - fprintf(f, "checkpoint number: %u\n", no_checkpoint); - fprintf(f, "entry point: 0x%zx", elf_entry); - if (full_checkpoint) - fprintf(f, "full checkpoint: 1"); - else - fprintf(f, "full checkpoint: 0"); - - fclose(f); - - if (verbose) { - gettimeofday(&end, NULL); - size_t msec = (end.tv_sec - begin.tv_sec) * 1000; - msec += (end.tv_usec - begin.tv_usec) / 1000; - fprintf(stderr, "Create checkpoint %u in %zd ms\n", no_checkpoint, msec); - } - - no_checkpoint++; -} - int uhyve_loop(int argc, char **argv) { const char* hermit_check = getenv("HERMIT_CHECKPOINT"); diff --git a/tools/uhyve.h b/tools/uhyve.h new file mode 100644 index 000000000..126a9a427 --- /dev/null +++ b/tools/uhyve.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2018, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __UHYVE_H__ +#define __UHYVE_H__ + +#include + +#define UHYVE_PORT_WRITE 0x400 +#define UHYVE_PORT_OPEN 0x440 +#define UHYVE_PORT_CLOSE 0x480 +#define UHYVE_PORT_READ 0x500 +#define UHYVE_PORT_EXIT 0x540 +#define UHYVE_PORT_LSEEK 0x580 + +// Networkports +#define UHYVE_PORT_NETINFO 0x600 +#define UHYVE_PORT_NETWRITE 0x640 +#define UHYVE_PORT_NETREAD 0x680 +#define UHYVE_PORT_NETSTAT 0x700 + +/* Ports and data structures for uhyve command line arguments and envp + * forwarding */ +#define UHYVE_PORT_CMDSIZE 0x740 +#define UHYVE_PORT_CMDVAL 0x780 + +#define UHYVE_IRQ 11 + +#define kvm_ioctl(fd, cmd, arg) ({ \ + const int ret = ioctl(fd, cmd, arg); \ + if(ret == -1) \ + err(1, "KVM: ioctl " #cmd " failed"); \ + ret; \ + }) + +void print_registers(void); +void timer_handler(int signum); +void restore_cpu_state(void); +void save_cpu_state(void); +void init_cpu_state(uint64_t elf_entry); +int load_kernel(uint8_t* mem, char* path); +int load_checkpoint(uint8_t* mem, char* path); +void init_kvm_arch(void); +int load_kernel(uint8_t* mem, char* path); + +#endif diff --git a/tools/utils.c b/tools/utils.c index 043ff2384..6ddd7b19d 100644 --- a/tools/utils.c +++ b/tools/utils.c @@ -36,6 +36,7 @@ #include "proxy.h" +#ifdef __x86_64__ inline static void __cpuid(uint32_t code, uint32_t* a, uint32_t* b, uint32_t* c, uint32_t* d) { __asm volatile ("cpuid" : "=a"(*a), "=b"(*b), "=c"(*c), "=d"(*d) : "0"(code), "2"(*c)); @@ -94,6 +95,7 @@ static uint32_t get_frequency_from_brand(void) return 0; } +#endif uint32_t get_cpufreq(void) { @@ -101,9 +103,11 @@ uint32_t get_cpufreq(void) uint32_t freq = 0; char* match; +#ifdef __x86_64__ freq = get_frequency_from_brand(); if (freq > 0) - return freq; + return freq; +#endif // TODO: fallback solution, on some systems is cpuinfo_max_freq the turbo frequency // => wrong value