diff --git a/.bintray_descriptor.json b/.bintray_descriptor.json index 7afd423d9..5d244a4f7 100644 --- a/.bintray_descriptor.json +++ b/.bintray_descriptor.json @@ -5,7 +5,7 @@ "subject": "rwth-os", "website_url": "http://www.hermitcore.org", "issue_tracker_url": "https://github.com/RWTH-OS/HermitCore/issues", - "vcs_url": "https://github.com/RWTH-OS/pthread-embedded.git", + "vcs_url": "https://github.com/RWTH-OS/HermitCore.git", "github_release_notes_file": "RELEASE", "licenses": ["Revised BSD"], "public_download_numbers": false, @@ -13,7 +13,7 @@ }, "version": { - "name": "0.1", + "name": "0.2.1", "desc": "HermitCore's kernel as libOS", "gpgSign": false }, @@ -28,7 +28,7 @@ "deb_architecture": "amd64", "override": 1} }, - {"includePattern": "build/(libhermit[^/]*rpm$)", "uploadPattern": "$1", "override": 1} + {"includePattern": "build/(libhermit[^/]*rpm$)", "uploadPattern": "$1", "override": 1}, {"includePattern": "build/(libhermit[^/]*tar.bz2$)", "uploadPattern": "$1", "override": 1} ], "publish": true diff --git a/.travis.yml b/.travis.yml index ffcf780fd..116a34b6e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,6 +17,11 @@ script: - cd build - cmake .. - make -j1 package + - cd $TRAVIS_BUILD_DIR + - ./tests.sh + +notifications: + slack: hermitcore:UtcfeEXkbpx3WyIDK2Wm2beS deploy: on: master diff --git a/CMakeLists.txt b/CMakeLists.txt index 970a1c753..65cdc03cd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -198,10 +198,10 @@ set(CPACK_PACKAGE_NAME libhermit) set(CPACK_SYSTEM_NAME all) set(CPACK_PACKAGE_VERSION_MAJOR 0) -set(CPACK_PACKAGE_VERSION_MINOR 1) -set(CPACK_PACKAGE_VERSION_PATCH 0) +set(CPACK_PACKAGE_VERSION_MINOR 2) +set(CPACK_PACKAGE_VERSION_PATCH 1) -set(CPACK_PACKAGE_CONTACT "Daniel Krebs ") +set(CPACK_PACKAGE_CONTACT "Stefan Lankes ") # build .deb, .rpm and .tar.bz2 packages set(CPACK_GENERATOR DEB;RPM;TBZ2) diff --git a/README.md b/README.md index 90f0b6532..12b358dce 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # HermitCore - A lightweight unikernel for a scalable and predictable runtime behavior -[![Join the chat at https://gitter.im/RWTH-OS/HermitCore](https://badges.gitter.im/RWTH-OS/HermitCore.svg)](https://gitter.im/RWTH-OS/HermitCore?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Build Status](https://travis-ci.org/RWTH-OS/HermitCore.svg?branch=devel)](https://travis-ci.org/RWTH-OS/HermitCore) +[![Slack Status](https://radiant-ridge-95061.herokuapp.com/badge.svg)](https://radiant-ridge-95061.herokuapp.com) The project [HermitCore]( http://www.hermitcore.org ) is a new [unikernel](http://unikernel.org) targeting a scalable and predictable runtime @@ -27,6 +27,10 @@ cloud computing applications. It is the result of a research project at RWTH Aachen University and is currently an experimental approach, i.e., not production ready. Please use it with caution. +## Contributing + +HermitCore is being developed on [GitHub](https://github.com/RWTH-OS/HermitCore). +Create your own fork, send us a pull request, and chat with us on [Slack](https://radiant-ridge-95061.herokuapp.com). ## Requirements @@ -49,6 +53,20 @@ $ sudo apt-get -qq update $ sudo apt-get install binutils-hermit newlib-hermit pthread-embedded-hermit gcc-hermit libhermit ``` +For non-Debian based systems, a docker image with the complete toolchain is provided and can be installed as follows: + +```bash +$ docker pull rwthos/hermitcore +``` + +The following commad starts within the new docker container a shell and mounts from the host system the directory `~/src` to `/src`: + +```bash +$ docker run -i -t -v ~/src:/src rwthos/hermitcore:latest +``` + +Within the shell the croos toolchain can be used to build HermitCore applications. + If you want to build the toolchain yourself, have a look at the repository [hermit-toolchain](https://github.com/RWTH-OS/hermit-toolchain), which contains scripts to build the whole toolchain. Depending on how you want to use HermitCore, you might need additional packages @@ -56,8 +74,18 @@ such as: * QEMU (`apt-get install qemu-system-x86`) +## Building HermitCore -## CMake requirements +### Preliminary work + +To build HermitCore from source (without compiler), the repository with its submodules has to be cloned. + +```bash +$ git clone git@github.com:RWTH-OS/HermitCore.git +$ cd HermitCore +$ git submodule init +$ git submodule update +``` We require a fairly recent version of CMake (`3.7`) which is not yet present in most Linux distributions. We therefore provide a helper script that fetches the @@ -87,14 +115,16 @@ cmake-3.7.2-Linux-x86_64.tar.gz 100%[===================>] 29,26M 3,74 So before you build HermitCore you have to source the `local-cmake.sh` script everytime you open a new terminal. - -## Building HermitCore +### Building the library perating systems and its examples + +To build HermitCore go to the directory with the source code, create a `build` directory and call `cmake` followed by `make`. ```bash $ mkdir build $ cd build $ cmake .. $ make +$ sudo make install ``` If your toolchain is not located in `/opt/hermit/bin` then you have to supply diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index c613c11b6..b679a06e0 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -48,6 +48,8 @@ extern "C" { #define APIC_SVR 0x00F0 /// Error Status Register #define APIC_ESR 0x0280 +/// Corrected Machine-Check Error Interrupt Register +#define APIC_CMCI 0x02F0 /// Interrupt Command Register [bits 0-31] #define APIC_ICR1 0x0300 /// Interrupt Command Register [bits 32-63] diff --git a/arch/x86/include/asm/atomic32.h b/arch/x86/include/asm/atomic32.h index 25bb80e48..aa9a9cf28 100644 --- a/arch/x86/include/asm/atomic32.h +++ b/arch/x86/include/asm/atomic32.h @@ -52,8 +52,8 @@ typedef struct { volatile int32_t counter; } atomic_int32_t; * This function will atomically exchange the value of an atomic variable and * return its old value. Is used in locking-operations.\n * \n - * Intel manuals: If a memory operand is referenced, the processor's locking - * protocol is automatically implemented for the duration of the exchange + * Intel manuals: If a memory operand is referenced, the processor's locking + * protocol is automatically implemented for the duration of the exchange * operation, regardless of the presence or absence of the LOCK prefix. * * @param d Pointer to the atomic_int_32_t with the value you want to exchange @@ -79,7 +79,7 @@ inline static int32_t atomic_int32_test_and_set(atomic_int32_t* d, int32_t ret) inline static int32_t atomic_int32_add(atomic_int32_t *d, int32_t i) { int32_t res = i; - asm volatile(LOCK "xaddl %0, %1" : "=r"(i) : "m"(d->counter), "0"(i) : "memory", "cc"); + asm volatile(LOCK "xaddl %0, %1" : "+r"(i), "+m"(d->counter) : : "memory", "cc"); return res+i; } @@ -95,7 +95,7 @@ inline static int32_t atomic_int32_add(atomic_int32_t *d, int32_t i) */ inline static int32_t atomic_int32_sub(atomic_int32_t *d, int32_t i) { - return atomic_int32_add(d, -i); + return atomic_int32_add(d, -i); } /** @brief Atomic increment by one @@ -105,7 +105,9 @@ inline static int32_t atomic_int32_sub(atomic_int32_t *d, int32_t i) * @param d The atomic_int32_t var you want to increment */ inline static int32_t atomic_int32_inc(atomic_int32_t* d) { - return atomic_int32_add(d, 1); + int32_t res = 1; + asm volatile(LOCK "xaddl %0, %1" : "+r"(res), "+m"(d->counter) : : "memory", "cc"); + return ++res; } /** @brief Atomic decrement by one @@ -115,7 +117,9 @@ inline static int32_t atomic_int32_inc(atomic_int32_t* d) { * @param d The atomic_int32_t var you want to decrement */ inline static int32_t atomic_int32_dec(atomic_int32_t* d) { - return atomic_int32_add(d, -1); + int32_t res = -1; + asm volatile(LOCK "xaddl %0, %1" : "+r"(res), "+m"(d->counter) : : "memory", "cc"); + return --res; } /** @brief Read out an atomic_int32_t var @@ -132,7 +136,7 @@ inline static int32_t atomic_int32_read(atomic_int32_t *d) { /** @brief Set the value of an atomic_int32_t var * - * This function is for convenience: It sets the internal value of + * This function is for convenience: It sets the internal value of * an atomic_int32_t var for you. * * @param d Pointer to the atomic_int32_t var you want to set diff --git a/arch/x86/include/asm/atomic64.h b/arch/x86/include/asm/atomic64.h index 02bf98437..b741f0026 100644 --- a/arch/x86/include/asm/atomic64.h +++ b/arch/x86/include/asm/atomic64.h @@ -52,8 +52,8 @@ typedef struct { volatile int64_t counter; } atomic_int64_t; * This function will atomically exchange the value of an atomic variable and * return its old value. Is used in locking-operations.\n * \n - * Intel manuals: If a memory operand is referenced, the processor's locking - * protocol is automatically implemented for the duration of the exchange + * Intel manuals: If a memory operand is referenced, the processor's locking + * protocol is automatically implemented for the duration of the exchange * operation, regardless of the presence or absence of the LOCK prefix. * * @param d Pointer to the atomic_int_64_t with the value you want to exchange @@ -79,7 +79,7 @@ inline static int64_t atomic_int64_test_and_set(atomic_int64_t* d, int64_t ret) inline static int64_t atomic_int64_add(atomic_int64_t *d, int64_t i) { int64_t res = i; - asm volatile(LOCK "xaddq %0, %1" : "=r"(i) : "m"(d->counter), "0"(i) : "memory", "cc"); + asm volatile(LOCK "xaddq %0, %1" : "+r"(i), "+m"(d->counter) : : "memory", "cc"); return res+i; } @@ -95,7 +95,7 @@ inline static int64_t atomic_int64_add(atomic_int64_t *d, int64_t i) */ inline static int64_t atomic_int64_sub(atomic_int64_t *d, int64_t i) { - return atomic_int64_add(d, -i); + return atomic_int64_add(d, -i); } /** @brief Atomic increment by one @@ -105,7 +105,9 @@ inline static int64_t atomic_int64_sub(atomic_int64_t *d, int64_t i) * @param d The atomic_int64_t var you want to increment */ inline static int64_t atomic_int64_inc(atomic_int64_t* d) { - return atomic_int64_add(d, 1); + int64_t res = 1; + asm volatile(LOCK "xaddq %0, %1" : "+r"(res), "+m"(d->counter) : : "memory", "cc"); + return ++res; } /** @brief Atomic decrement by one @@ -115,7 +117,9 @@ inline static int64_t atomic_int64_inc(atomic_int64_t* d) { * @param d The atomic_int64_t var you want to decrement */ inline static int64_t atomic_int64_dec(atomic_int64_t* d) { - return atomic_int64_add(d, -1); + int64_t res = -1; + asm volatile(LOCK "xaddq %0, %1" : "+r"(res), "+m"(d->counter) : : "memory", "cc"); + return --res; } /** @brief Read out an atomic_int64_t var @@ -132,7 +136,7 @@ inline static int64_t atomic_int64_read(atomic_int64_t *d) { /** @brief Set the value of an atomic_int64_t var * - * This function is for convenience: It sets the internal value of + * This function is for convenience: It sets the internal value of * an atomic_int64_t var for you. * * @param d Pointer to the atomic_int64_t var you want to set diff --git a/arch/x86/include/asm/multiboot.h b/arch/x86/include/asm/multiboot.h index 5b8826a9a..1745f8db5 100644 --- a/arch/x86/include/asm/multiboot.h +++ b/arch/x86/include/asm/multiboot.h @@ -143,6 +143,8 @@ typedef struct multiboot_mod_list multiboot_module_t; /// Pointer to multiboot structure /// This pointer is declared at set by entry.asm -extern multiboot_info_t* mb_info; +extern const multiboot_info_t* const mb_info; +extern char* cmdline; +extern size_t cmdsize; #endif diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index cac2e633b..1c41dc58e 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -102,14 +102,14 @@ static inline size_t sign_extend(ssize_t addr, int bits) #define PAGE_MAP_ENTRIES (1L << PAGE_MAP_BITS) /// Align to next page -#define PAGE_FLOOR(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK) +#define PAGE_CEIL(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK) /// Align to page -#define PAGE_CEIL(addr) ( (addr) & PAGE_MASK) +#define PAGE_FLOOR(addr) ( (addr) & PAGE_MASK) /// Align to next 2M boundary -#define PAGE_2M_FLOOR(addr) (((addr) + (1L << 21) - 1) & ((~0L) << 21)) +#define PAGE_2M_CEIL(addr) (((addr) + (1L << 21) - 1) & ((~0L) << 21)) /// Align to nex 2M boundary -#define PAGE_2M_CEIL(addr) ( (addr) & ((~0L) << 21)) +#define PAGE_2M_FLOOR(addr) ( (addr) & ((~0L) << 21)) /// Page is present #define PG_PRESENT (1 << 0) diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 26d4b0860..211aa03bd 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -26,7 +26,7 @@ */ -/** +/** * @author Stefan Lankes * @file arch/x86/include/asm/pci.h * @brief functions related to PCI initialization and information @@ -48,6 +48,8 @@ typedef struct { uint32_t irq; } pci_info_t; +#define PCI_IGNORE_SUBID (0) + /** @brief Initialize the PCI environment */ int pci_init(void); @@ -55,15 +57,16 @@ int pci_init(void); /** @brief Determine the IObase address and the interrupt number of a specific device * * @param vendor_id The device's vendor ID - * @param device_id the device's ID + * @param device_id The device's ID + * @param subystem_id The subsystem DI * @param info Pointer to the record pci_info_t where among other the IObase address will be stored * @param enable_bus_master If true, the bus mastering will be enabled. * - * @return + * @return * - 0 on success * - -EINVAL (-22) on failure */ -int pci_get_device_info(uint32_t vendor_id, uint32_t device_id, pci_info_t* info, int8_t enble_bus_master); +int pci_get_device_info(uint32_t vendor_id, uint32_t device_id, uint32_t subsystem_id, pci_info_t* info, int8_t enble_bus_master); /** @brief Print information of existing pci adapters * diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 24ae11b78..15aa50a70 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -52,6 +52,7 @@ extern "C" { #define CPU_FEATURE_PSE (1 << 3) #define CPU_FEATURE_MSR (1 << 5) #define CPU_FEATURE_PAE (1 << 6) +#define CPU_FEATURE_MCE (1 << 7) #define CPU_FEATURE_APIC (1 << 9) #define CPU_FEATURE_SEP (1 << 11) #define CPU_FEATURE_PGE (1 << 13) @@ -308,6 +309,10 @@ inline static uint32_t has_msr(void) { return (cpu_info.feature1 & CPU_FEATURE_MSR); } +inline static uint32_t has_mce(void) { + return (cpu_info.feature1 & CPU_FEATURE_MCE); +} + inline static uint32_t has_apic(void) { return (cpu_info.feature1 & CPU_FEATURE_APIC); } diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index aa64a6eae..1192490b8 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -176,7 +176,7 @@ static inline void lapic_timer_set_counter(uint32_t counter) static inline void lapic_timer_disable(void) { - lapic_write(APIC_LVT_TSR, 0x10000); + lapic_write(APIC_LVT_T, 0x10000); } static inline void lapic_timer_oneshot(void) @@ -364,7 +364,7 @@ int apic_enable_timer(void) } static apic_mp_t* search_mptable(size_t base, size_t limit) { - size_t ptr=PAGE_CEIL(base), vptr=0; + size_t ptr=PAGE_FLOOR(base), vptr=0; size_t flags = PG_GLOBAL | PG_RW | PG_PCD; apic_mp_t* tmp; uint32_t i; @@ -410,7 +410,7 @@ static apic_mp_t* search_mptable(size_t base, size_t limit) { #if 0 static size_t search_ebda(void) { - size_t ptr=PAGE_CEIL(0x400), vptr=0xF0000; + size_t ptr=PAGE_FLOOR(0x400), vptr=0xF0000; size_t flags = PG_GLOBAL | PG_RW | PG_PCD; // protec apic by the NX flags @@ -456,8 +456,8 @@ static int lapic_reset(void) lapic_write(APIC_LVT_TSR, 0x10000); // disable thermal sensor interrupt if (max_lvt >= 5) lapic_write(APIC_LVT_PMC, 0x10000); // disable performance counter interrupt - lapic_write(APIC_LINT0, 0x7C); // connect LINT0 to idt entry 124 - lapic_write(APIC_LINT1, 0x7D); // connect LINT1 to idt entry 125 + lapic_write(APIC_LINT0, 0x00010000); // disable LINT0 + lapic_write(APIC_LINT1, 0x00010000); // disable LINT1 lapic_write(APIC_LVT_ER, 0x7E); // connect error to idt entry 126 return 0; @@ -580,8 +580,8 @@ int smp_init(void) * Wakeup the other cores via IPI. They start at this address * in real mode, switch to protected and finally they jump to smp_main. */ - page_map(SMP_SETUP_ADDR, SMP_SETUP_ADDR, PAGE_FLOOR(sizeof(boot_code)) >> PAGE_BITS, PG_RW|PG_GLOBAL); - vma_add(SMP_SETUP_ADDR, SMP_SETUP_ADDR + PAGE_FLOOR(sizeof(boot_code)), VMA_READ|VMA_WRITE|VMA_CACHEABLE); + page_map(SMP_SETUP_ADDR, SMP_SETUP_ADDR, PAGE_CEIL(sizeof(boot_code)) >> PAGE_BITS, PG_RW|PG_GLOBAL); + vma_add(SMP_SETUP_ADDR, SMP_SETUP_ADDR + PAGE_CEIL(sizeof(boot_code)), VMA_READ|VMA_WRITE|VMA_CACHEABLE); memcpy((void*)SMP_SETUP_ADDR, boot_code, sizeof(boot_code)); for(i=0; i Therefore, we disable the PIC outportb(0xA1, 0xFF); outportb(0x21, 0xFF); @@ -683,6 +684,7 @@ int apic_calibration(void) } // now, we don't longer need the IOAPIC timer and turn it off + LOG_INFO("Disable IOAPIC timer\n"); ioapic_intoff(2, apic_processors[boot_processor]->id); } @@ -721,7 +723,7 @@ static int apic_probe(void) found_mp: if (!apic_mp) { - LOG_ERROR("Didn't find MP config table\n"); + LOG_INFO("Didn't find MP config table\n"); goto no_mp; } @@ -916,12 +918,6 @@ int smp_start(void) // install IDT idt_install(); - /* - * we turned on paging - * => now, we are able to register our task - */ - register_task(); - // enable additional cpu features cpu_detection(); @@ -936,6 +932,12 @@ int smp_start(void) set_idle_task(); + /* + * TSS is set, pagining is enabled + * => now, we are able to register our task + */ + register_task(); + irq_enable(); atomic_int32_inc(&cpu_online); @@ -1039,6 +1041,7 @@ static void apic_err_handler(struct state *s) void shutdown_system(void) { int if_bootprocessor = (boot_processor == apic_cpu_id()); + uint32_t max_lvt; irq_disable(); @@ -1061,8 +1064,11 @@ void shutdown_system(void) if (if_bootprocessor) LOG_INFO("Disable APIC\n"); - lapic_write(APIC_LVT_TSR, 0x10000); // disable thermal sensor interrupt - lapic_write(APIC_LVT_PMC, 0x10000); // disable performance counter interrupt + max_lvt = apic_lvt_entries(); + if (max_lvt >= 4) + lapic_write(APIC_LVT_TSR, 0x10000); // disable thermal sensor interrupt + if (max_lvt >= 5) + lapic_write(APIC_LVT_PMC, 0x10000); // disable performance counter interrupt lapic_write(APIC_SVR, 0x00); // disable the apic // disable x2APIC @@ -1082,17 +1088,16 @@ void shutdown_system(void) } } -static void apic_shutdown(struct state * s) +static void apic_shutdown(struct state* s) { go_down = 1; LOG_DEBUG("Receive shutdown interrupt\n"); } -static void apic_lint0(struct state * s) +static void apic_wakeup(struct state* s) { - // Currently nothing to do - LOG_INFO("Receive LINT0 interrupt\n"); + LOG_DEBUG("Receive wakeup interrupt\n"); } int apic_init(void) @@ -1104,12 +1109,12 @@ int apic_init(void) return ret; // set APIC error handler + irq_install_handler(121, apic_wakeup); irq_install_handler(126, apic_err_handler); #if MAX_CORES > 1 irq_install_handler(80+32, apic_tlb_handler); #endif irq_install_handler(81+32, apic_shutdown); - irq_install_handler(124, apic_lint0); if (apic_processors[boot_processor]) LOG_INFO("Boot processor %u (ID %u)\n", boot_processor, apic_processors[boot_processor]->id); else diff --git a/arch/x86/kernel/entry.asm b/arch/x86/kernel/entry.asm index 0182d5294..6375533da 100644 --- a/arch/x86/kernel/entry.asm +++ b/arch/x86/kernel/entry.asm @@ -69,6 +69,9 @@ align 4 global hbmem_size global uhyve global image_size + global uartport + global cmdline + global cmdsize base dq 0 limit dq 0 cpu_freq dd 0 @@ -93,6 +96,9 @@ align 4 hbmem_base dq 0 hbmem_size dq 0 uhyve dd 0 + uartport dq 0 + cmdline dq 0 + cmdsize dq 0 ; Bootstrap page tables are used during the initialization. align 4096 @@ -114,9 +120,6 @@ boot_pgt: SECTION .ktext align 4 start64: - ; store pointer to the multiboot information - mov [mb_info], QWORD rdx - ; reset registers to kill any stale realmode selectors xor eax, eax mov ds, eax @@ -134,6 +137,9 @@ start64: cmp eax, 0 jne Lno_pml4_init + ; store pointer to the multiboot information + mov [mb_info], QWORD rdx + ; relocate page tables mov rdi, boot_pml4 mov rax, QWORD [rdi] @@ -188,16 +194,17 @@ Lno_mbinfo: xor rcx, rcx mov rsi, 510*0x200000 sub rsi, kernel_start + mov r11, QWORD [image_size] Lremap: mov QWORD [rdi], rax add rax, 0x200000 add rcx, 0x200000 add rdi, 8 - ; note: the whole code segement muust fit in the first pgd + ; note: the whole code segement has to fit in the first pgd cmp rcx, rsi - jnb Lno_pml4_init - cmp rcx, QWORD [image_size] - jb Lremap + jnl Lno_pml4_init + cmp rcx, r11 + jl Lremap Lno_pml4_init: ; Set CR3 @@ -272,9 +279,9 @@ gdt_flush: global isr%1 align 64 isr%1: - push byte 0 ; pseudo error code - push byte %1 - jmp common_stub + push byte 0 ; pseudo error code + push byte %1 + jmp common_stub %endmacro ; Similar to isrstub_pseudo_error, but without pushing @@ -284,8 +291,8 @@ gdt_flush: global isr%1 align 64 isr%1: - push byte %1 - jmp common_stub + push byte %1 + jmp common_stub %endmacro ; Create isr entries, where the number after the @@ -337,9 +344,9 @@ isrstub_pseudo_error 9 global irq%1 align 64 irq%1: - push byte 0 ; pseudo error code - push byte 32+%1 - jmp common_stub + push byte 0 ; pseudo error code + push byte 32+%1 + jmp common_stub %endmacro ; Create entries for the interrupts 0 to 23 @@ -360,15 +367,15 @@ global wakeup align 64 wakeup: push byte 0 ; pseudo error code - push byte 121 - jmp common_stub + push byte 121 + jmp common_stub global mmnif_irq align 64 mmnif_irq: push byte 0 ; pseudo error code - push byte 122 - jmp common_stub + push byte 122 + jmp common_stub global apic_timer align 64 @@ -409,7 +416,6 @@ extern irq_handler extern get_current_stack extern finish_task_switch extern syscall_handler -extern kernel_stack global getcontext align 64 diff --git a/arch/x86/kernel/gdt.c b/arch/x86/kernel/gdt.c index b56f48381..2eca34564 100644 --- a/arch/x86/kernel/gdt.c +++ b/arch/x86/kernel/gdt.c @@ -41,7 +41,7 @@ gdt_ptr_t gp; // currently, our kernel has full access to the ioports static gdt_entry_t gdt[GDT_ENTRIES] = {[0 ... GDT_ENTRIES-1] = {0, 0, 0, 0, 0, 0}}; static tss_t task_state_segments[MAX_CORES] __attribute__ ((aligned (PAGE_SIZE))); -static uint8_t stack_table[MAX_CORES*KERNEL_STACK_SIZE*MAX_IST] __attribute__ ((aligned (PAGE_SIZE))); +static uint8_t stack_table[MAX_CORES][KERNEL_STACK_SIZE*MAX_IST] __attribute__ ((aligned (PAGE_SIZE))); extern const void boot_stack; @@ -149,9 +149,9 @@ void gdt_install(void) for(i=0; iint_no >= MAX_HANDLERS, 0)) { + if (BUILTIN_EXPECT(s->int_no >= MAX_HANDLERS, 0)) { LOG_ERROR("Invalid IRQ number %d\n", s->int_no); return NULL; } diff --git a/arch/x86/kernel/isrs.c b/arch/x86/kernel/isrs.c index dd2583f27..f79cdbd0b 100644 --- a/arch/x86/kernel/isrs.c +++ b/arch/x86/kernel/isrs.c @@ -225,7 +225,7 @@ static void arch_fault_handler(struct state *s) else LOG_WARNING("Unknown exception %d", s->int_no); - LOG_ERROR(" Exception (%d) on core %d at %#x:%#lx, fs = %#lx, gs = %#lx, error code = 0x%#lx, task id = %u, rflags = %#x\n", + LOG_ERROR(" Exception (%d) on core %d at %#x:%#lx, fs = %#lx, gs = %#lx, error code = %#lx, task id = %u, rflags = %#x\n", s->int_no, CORE_ID, s->cs, s->rip, s->fs, s->gs, s->error, per_core(current_task)->id, s->rflags); LOG_ERROR("rax %#lx, rbx %#lx, rcx %#lx, rdx %#lx, rbp, %#lx, rsp %#lx rdi %#lx, rsi %#lx, r8 %#lx, r9 %#lx, r10 %#lx, r11 %#lx, r12 %#lx, r13 %#lx, r14 %#lx, r15 %#lx\n", s->rax, s->rbx, s->rcx, s->rdx, s->rbp, s->rsp, s->rdi, s->rsi, s->r8, s->r9, s->r10, s->r11, s->r12, s->r13, s->r14, s->r15); diff --git a/arch/x86/kernel/pci.c b/arch/x86/kernel/pci.c index 114ecf80e..d35e919fc 100644 --- a/arch/x86/kernel/pci.c +++ b/arch/x86/kernel/pci.c @@ -45,6 +45,7 @@ #define PCI_CFRV 0x08 /* Configuration Revision */ #define PCI_CFLT 0x0c /* Configuration Latency Timer */ #define PCI_CBIO 0x10 /* Configuration Base IO Address */ +#define PCI_CSID 0x2C /* Configuration Subsystem Id & Subsystem Vendor Id */ #define PCI_CFIT 0x3c /* Configuration Interrupt */ #define PCI_CFDA 0x40 /* Configuration Driver Area */ @@ -101,6 +102,11 @@ static uint32_t pci_conf_read(uint32_t bus, uint32_t slot, uint32_t off) return data; } +static inline uint32_t pci_subid(uint32_t bus, uint32_t slot) +{ + return pci_conf_read(bus, slot, PCI_CSID); +} + static inline uint32_t pci_what_irq(uint32_t bus, uint32_t slot) { return pci_conf_read(bus, slot, PCI_CFIT) & 0xFF; @@ -139,15 +145,15 @@ static inline uint32_t pci_what_size(uint32_t bus, uint32_t slot, uint32_t nr) int pci_init(void) { uint32_t slot, bus; - + for (bus = 0; bus < MAX_BUS; bus++) for (slot = 0; slot < MAX_SLOTS; slot++) adapters[bus][slot] = pci_conf_read(bus, slot, PCI_CFID); - + return 0; } -int pci_get_device_info(uint32_t vendor_id, uint32_t device_id, pci_info_t* info, int8_t bus_master) +int pci_get_device_info(uint32_t vendor_id, uint32_t device_id, uint32_t subsystem_id, pci_info_t* info, int8_t bus_master) { uint32_t slot, bus, i; @@ -160,8 +166,9 @@ int pci_get_device_info(uint32_t vendor_id, uint32_t device_id, pci_info_t* info for (bus = 0; bus < MAX_BUS; bus++) { for (slot = 0; slot < MAX_SLOTS; slot++) { if (adapters[bus][slot] != -1) { - if (((adapters[bus][slot] & 0xffff) == vendor_id) && - (((adapters[bus][slot] & 0xffff0000) >> 16) == device_id)) { + if (((adapters[bus][slot] & 0xffff) == vendor_id) && + (((adapters[bus][slot] & 0xffff0000) >> 16) == device_id) && + (((pci_subid(bus, slot) >> 16) & subsystem_id) == subsystem_id)) { for(i=0; i<6; i++) { info->base[i] = pci_what_iobase(bus, slot, i); info->size[i] = (info->base[i]) ? pci_what_size(bus, slot, i) : 0; @@ -195,7 +202,7 @@ int print_pci_adapters(void) if (adapters[bus][slot] != -1) { counter++; LOG_INFO("%d) Vendor ID: 0x%x Device Id: 0x%x\n", - counter, adapters[bus][slot] & 0xffff, + counter, adapters[bus][slot] & 0xffff, (adapters[bus][slot] & 0xffff0000) >> 16); #ifdef WITH_PCI_IDS diff --git a/arch/x86/kernel/processor.c b/arch/x86/kernel/processor.c index 7b1cf0073..ca225dd47 100644 --- a/arch/x86/kernel/processor.c +++ b/arch/x86/kernel/processor.c @@ -172,10 +172,10 @@ static void fpu_init_xsave(union fpu_state* fpu) static uint32_t get_frequency_from_mbinfo(void) { - if (mb_info && (mb_info->flags & MULTIBOOT_INFO_CMDLINE)) + if (mb_info && (mb_info->flags & MULTIBOOT_INFO_CMDLINE) && (cmdline)) { // search in the command line for cpu frequency - char* found = strstr((char*) mb_info->cmdline, "-freq"); + char* found = strstr((char*) (size_t)cmdline, "-freq"); if (!found) return 0; @@ -472,6 +472,8 @@ int cpu_detection(void) { cr4 |= CR4_PGE; if (has_fsgsbase()) cr4 |= CR4_FSGSBASE; + if (has_mce()) + cr4 |= CR4_MCE; // enable machine check exceptions //if (has_vmx()) // cr4 |= CR4_VMXE; cr4 &= ~CR4_TSD; // => every privilege level is able to use rdtsc @@ -552,7 +554,7 @@ int cpu_detection(void) { a = b = c = d = 0; cpuid(1, &a, &b, &cpu_info.feature2, &cpu_info.feature1); - LOG_INFO("CPU features: %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + LOG_INFO("CPU features: %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", has_sse() ? "SSE " : "", has_sse2() ? "SSE2 " : "", has_sse3() ? "SSE3 " : "", @@ -564,6 +566,7 @@ int cpu_detection(void) { has_fma() ? "FMA " : "", has_movbe() ? "MOVBE " : "", has_x2apic() ? "X2APIC " : "", + has_mce() ? "MCE " : "", has_fpu() ? "FPU " : "", has_fxsr() ? "FXSR " : "", has_xsave() ? "XSAVE " : "", diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 30528794d..b25240a0b 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -212,7 +212,7 @@ int hermit_kill(tid_t dest, int signum) return 0; } -void signal_init() +void signal_init(void) { // initialize per-core signal queue for(int i = 0; i < MAX_CORES; i++) { diff --git a/arch/x86/kernel/tasks.c b/arch/x86/kernel/tasks.c index 13bebbee5..4a3a89045 100644 --- a/arch/x86/kernel/tasks.c +++ b/arch/x86/kernel/tasks.c @@ -39,7 +39,10 @@ #include #include -#define TLS_OFFSET 0 +#define TLS_ALIGNBITS 5 +#define TLS_ALIGNSIZE (1L << TLS_ALIGNBITS) +#define TSL_ALIGNMASK ((~0L) << TLS_ALIGNBITS) +#define TLS_FLOOR(addr) ((((size_t)addr) + TLS_ALIGNSIZE - 1) & TSL_ALIGNMASK) /* * Note that linker symbols are not variables, they have no memory allocated for @@ -64,20 +67,20 @@ static int init_tls(void) curr_task->tls_addr = (size_t) &tls_start; curr_task->tls_size = (size_t) &tls_end - (size_t) &tls_start; - tls_addr = kmalloc(curr_task->tls_size + TLS_OFFSET + sizeof(size_t)); + tls_addr = kmalloc(curr_task->tls_size + TLS_ALIGNSIZE + sizeof(size_t)); if (BUILTIN_EXPECT(!tls_addr, 0)) { LOG_ERROR("load_task: heap is missing!\n"); return -ENOMEM; } - memset(tls_addr, 0x00, TLS_OFFSET); - memcpy((void*) (tls_addr+TLS_OFFSET), (void*) curr_task->tls_addr, curr_task->tls_size); - fs = (size_t) tls_addr + curr_task->tls_size + TLS_OFFSET; + memset(tls_addr, 0x00, TLS_ALIGNSIZE); + memcpy((void*) TLS_FLOOR(tls_addr), (void*) curr_task->tls_addr, curr_task->tls_size); + fs = (size_t) TLS_FLOOR(tls_addr) + curr_task->tls_size; *((size_t*)fs) = fs; // set fs register to the TLS segment set_tls(fs); - LOG_INFO("TLS of task %d on core %d starts at 0x%zx (size 0x%zx)\n", curr_task->id, CORE_ID, tls_addr + TLS_OFFSET, curr_task->tls_size); + LOG_INFO("TLS of task %d on core %d starts at 0x%zx (size 0x%zx)\n", curr_task->id, CORE_ID, TLS_FLOOR(tls_addr), curr_task->tls_size); } else set_tls(0); // no TLS => clear fs register return 0; @@ -103,10 +106,10 @@ int is_proxy(void) return 0; if (!is_single_kernel()) return 1; - if (mb_info && (mb_info->flags & MULTIBOOT_INFO_CMDLINE)) + if (mb_info && (mb_info->flags & MULTIBOOT_INFO_CMDLINE) && (cmdline)) { // search in the command line for the "proxy" hint - char* found = strstr((char*) (size_t) mb_info->cmdline, "-proxy"); + char* found = strstr((char*) (size_t) cmdline, "-proxy"); if (found) return 1; } @@ -123,7 +126,6 @@ size_t* get_current_stack(void) else stptr = (stptr + DEFAULT_STACK_SIZE - sizeof(size_t)) & ~0x1F; - set_per_core(kernel_stack, stptr); set_tss(stptr, (size_t) curr_task->ist_addr + KERNEL_STACK_SIZE - 0x10); return curr_task->last_stack_pointer; @@ -190,10 +192,15 @@ int create_default_frame(task_t* task, entry_point_t ep, void* arg, uint32_t cor return 0; } +#define USE_MWAIT + void wait_for_task(void) { +#ifndef USE_MWAIT + HALT; +#else if (!has_mwait()) { - PAUSE; + HALT; } else { void* queue = get_readyqueue(); @@ -203,4 +210,21 @@ void wait_for_task(void) monitor(queue, 0, 0); mwait(0x2 /* 0x2 = c3, 0xF = c0 */, 1 /* break on interrupt flag */); } +#endif +} + +void wakeup_core(uint32_t core_id) +{ +#ifdef USE_MWAIT + // if mwait is available, an IPI isn't required to wakeup the core + if (has_mwait()) + return; +#endif + + // no self IPI required + if (core_id == CORE_ID) + return; + + LOG_DEBUG("wakeup core %d\n", core_id); + apic_send_ipi(core_id, 121); } diff --git a/arch/x86/kernel/timer.c b/arch/x86/kernel/timer.c index f44b49f2e..819adcc26 100644 --- a/arch/x86/kernel/timer.c +++ b/arch/x86/kernel/timer.c @@ -70,10 +70,6 @@ void check_ticks(void) } #endif -static void wakeup_handler(struct state *s) -{ -} - /* * Handles the timer. In this case, it's very simple: We * increment the 'timer_ticks' variable every time the @@ -187,7 +183,6 @@ int timer_init(void) */ irq_install_handler(32, timer_handler); irq_install_handler(123, timer_handler); - irq_install_handler(121, wakeup_handler); #ifdef DYNAMIC_TICKS boot_tsc = has_rdtscp() ? rdtscp(NULL) : rdtsc(); diff --git a/arch/x86/kernel/uart.c b/arch/x86/kernel/uart.c index e96f21920..38ce21427 100644 --- a/arch/x86/kernel/uart.c +++ b/arch/x86/kernel/uart.c @@ -99,28 +99,38 @@ #define DEFAULT_UART_PORT 0xc110 -static size_t iobase = 0; +extern size_t uartport; static inline unsigned char read_from_uart(uint32_t off) { uint8_t c = 0; - if (iobase) - c = inportb(iobase + off); + if (uartport) + c = inportb(uartport + off); return c; } +static inline int is_transmit_empty(void) +{ + if (uartport) + return inportb(uartport + UART_LSR) & 0x20; + + return 1; +} + static inline void write_to_uart(uint32_t off, unsigned char c) { - if (iobase) - outportb(iobase + off, c); + while (is_transmit_empty() == 0) { PAUSE; } + + if (uartport) + outportb(uartport + off, c); } /* Puts a single character on a serial device */ int uart_putchar(unsigned char c) { - if (!iobase) + if (!uartport) return 0; write_to_uart(UART_TX, c); @@ -133,7 +143,7 @@ int uart_puts(const char *text) { size_t i, len = strlen(text); - if (!iobase) + if (!uartport) return 0; for (i = 0; i < len; i++) @@ -144,19 +154,12 @@ int uart_puts(const char *text) static int uart_config(void) { - /* - * enable FIFOs - * clear RX and TX FIFO - * set irq trigger to 8 bytes - */ - write_to_uart(UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT | UART_FCR_TRIGGER_1); + if (!uartport) + return 0; /* disable interrupts */ write_to_uart(UART_IER, 0); - /* DTR + RTS */ - write_to_uart(UART_MCR, UART_MCR_DTR|UART_MCR_RTS); - /* * 8bit word length * 1 stop bit @@ -169,51 +172,59 @@ static int uart_config(void) write_to_uart(UART_LCR, lcr); /* - * set baudrate to 9600 + * set baudrate to 38400 */ - uint32_t divisor = 1843200 / 9600; //115200; - write_to_uart(UART_DLL, divisor & 0xff); - write_to_uart(UART_DLM, (divisor >> 8) & 0xff); + write_to_uart(UART_DLL, 0x03); + write_to_uart(UART_DLM, 0x00); /* set DLAB=0 */ write_to_uart(UART_LCR, lcr & (~UART_LCR_DLAB)); + /* + * enable FIFOs + * clear RX and TX FIFO + * set irq trigger to 8 bytes + */ + write_to_uart(UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT | UART_FCR_TRIGGER_1); + return 0; } -extern const void kernel_start; - int uart_init(void) { if (is_uhyve()) return 0; + if (uartport) + return uart_config(); + pci_info_t pci_info; uint32_t bar = 0; // Searching for Intel's UART device - if (pci_get_device_info(0x8086, 0x0936, &pci_info, 1) == 0) + if (pci_get_device_info(0x8086, 0x0936, PCI_IGNORE_SUBID, &pci_info, 1) == 0) goto Lsuccess; // Searching for Qemu's UART device - if (pci_get_device_info(0x1b36, 0x0002, &pci_info, 1) == 0) + if (pci_get_device_info(0x1b36, 0x0002, PCI_IGNORE_SUBID, &pci_info, 1) == 0) goto Lsuccess; // Searching for Qemu's 2x UART device (pci-serial-2x) - if (pci_get_device_info(0x1b36, 0x0003, &pci_info, 1) == 0) + if (pci_get_device_info(0x1b36, 0x0003, PCI_IGNORE_SUBID, &pci_info, 1) == 0) goto Lsuccess; // Searching for Qemu's 4x UART device (pci-serial-4x) - if (pci_get_device_info(0x1b36, 0x0004, &pci_info, 1) == 0) + if (pci_get_device_info(0x1b36, 0x0004, PCI_IGNORE_SUBID, &pci_info, 1) == 0) goto Lsuccess; // default value of our QEMU configuration - iobase = DEFAULT_UART_PORT; + uartport = DEFAULT_UART_PORT; // configure uart - return uart_config();; + return uart_config(); Lsuccess: - iobase = pci_info.base[bar]; + uartport = pci_info.base[bar]; + //irq_install_handler(32+pci_info.irq, uart_handler); - kprintf("UART uses io address 0x%x\n", iobase); + kprintf("UART uses io address 0x%x\n", uartport); // configure uart return uart_config(); diff --git a/arch/x86/loader/CMakeLists.txt b/arch/x86/loader/CMakeLists.txt index 0d142c166..b9ea11d7e 100644 --- a/arch/x86/loader/CMakeLists.txt +++ b/arch/x86/loader/CMakeLists.txt @@ -24,7 +24,7 @@ target_link_libraries(arch_x86_loader "-T ${CMAKE_CURRENT_LIST_DIR}/link.ld" "-z max-page-size=4096" -Wl,--build-id=none # required because CMake links with gcc, not ld - -nostdlib) + -nostdlib -static) # tools/proxy looks for `ldhermit.elf` set_target_properties(arch_x86_loader PROPERTIES diff --git a/arch/x86/loader/entry.asm b/arch/x86/loader/entry.asm index 2d99a6f36..3ae3d7c1a 100644 --- a/arch/x86/loader/entry.asm +++ b/arch/x86/loader/entry.asm @@ -99,32 +99,24 @@ stublet: ; Interpret multiboot information mov DWORD [mb_info], ebx - ; Initialize CPU features - call cpu_init - - pop ebx ; restore pointer to multiboot structure - lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table. - jmp GDT64.Code:start64 ; Set the code segment and enter 64-bit long mode. - ; This will set up the x86 control registers: ; Caching and the floating point unit are enabled ; Bootstrap page tables are loaded and page size ; extensions (huge pages) enabled. -global cpu_init cpu_init: ; initialize page tables ; map vga 1:1 - push edi - mov eax, VIDEO_MEM_ADDR ; map vga - and eax, 0xFFFFF000 ; page align lower half - mov edi, eax - shr edi, 9 ; (edi >> 12) * 8 (index for boot_pgt) - add edi, boot_pgt - or eax, 0x113 ; set present, global, writable and cache disable bits - mov DWORD [edi], eax - pop edi + ; push edi + ; mov eax, VIDEO_MEM_ADDR ; map vga + ; and eax, 0xFFFFF000 ; page align lower half + ; mov edi, eax + ; shr edi, 9 ; (edi >> 12) * 8 (index for boot_pgt) + ; add edi, boot_pgt + ; or eax, 0x13 ; set present, writable and cache disable bits + ; mov DWORD [edi], eax + ; pop edi ; map multiboot info 1:1 push edi @@ -133,7 +125,7 @@ cpu_init: mov edi, eax shr edi, 9 ; (edi >> 12) * 8 (index for boot_pgt) add edi, boot_pgt - or eax, 0x101 ; set present and global bits + or eax, 0x3 ; set present and writable bits mov DWORD [edi], eax pop edi @@ -151,7 +143,7 @@ L0: cmp ecx, ebx mov edi, eax shr edi, 9 ; (edi >> 12) * 8 (index for boot_pgt) add edi, boot_pgt - or eax, 0x103 ; set present, global and writable bits + or eax, 0x3 ; set present and writable bits mov DWORD [edi], eax add ecx, 0x1000 jmp L0 @@ -188,23 +180,22 @@ L1: test edx, 1 << 29 ; Test if the LM-bit, which is bit 29, is set in the D-register. jz Linvalid ; They aren't, there is no long mode. + ; Set CR3 + mov eax, boot_pml4 + ;or eax, (1 << 0) ; set present bit + mov cr3, eax - ; we need to enable PAE modus + ; we need to enable PAE modus mov eax, cr4 or eax, 1 << 5 mov cr4, eax - ; switch to the compatibility mode (which is part of long mode) + ; switch to the compatibility mode (which is part of long mode) mov ecx, 0xC0000080 rdmsr or eax, 1 << 8 wrmsr - ; Set CR3 - mov eax, boot_pml4 - or eax, (1 << 0) ; set present bit - mov cr3, eax - ; Set CR4 mov eax, cr4 and eax, 0xfffbf9ff ; disable SSE @@ -221,7 +212,9 @@ L1: or eax, (1 << 31) ; enable paging mov cr0, eax - ret + ;pop ebx ; restore pointer to multiboot structure + lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table. + jmp GDT64.Code:start64 ; Set the code segment and enter 64-bit long mode. ; there is no long mode Linvalid: @@ -244,7 +237,7 @@ start64: ; jump to the boot processors's C code extern main - call main + jmp main jmp $ SECTION .data @@ -262,17 +255,17 @@ boot_stack: ; Bootstrap page tables are used during the initialization. ALIGN 4096 boot_pml4: - DQ boot_pdpt + 0x107 ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_USER - times 510 DQ 0 ; PAGE_MAP_ENTRIES - 2 - DQ boot_pml4 + 0x303 ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_SELF (self-reference) + DQ boot_pdpt + 0x7 ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_USER + times 510 DQ 0 ; PAGE_MAP_ENTRIES - 2 + DQ boot_pml4 + 0x3 ; PG_PRESENT | PG_GLOBAL | PG_RW boot_pdpt: - DQ boot_pgd + 0x107 ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_USER - times 510 DQ 0 ; PAGE_MAP_ENTRIES - 2 - DQ boot_pml4 + 0x303 ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_SELF (self-reference) + DQ boot_pgd + 0x7 ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_USER + times 510 DQ 0 ; PAGE_MAP_ENTRIES - 2 + DQ boot_pml4 + 0x3 ; PG_PRESENT | PG_GLOBAL | PG_RW boot_pgd: - DQ boot_pgt + 0x107 ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_USER - times 510 DQ 0 ; PAGE_MAP_ENTRIES - 2 - DQ boot_pml4 + 0x303 ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_SELF (self-reference) + DQ boot_pgt + 0x7 ; PG_PRESENT | PG_GLOBAL | PG_RW | PG_USER + times 510 DQ 0 ; PAGE_MAP_ENTRIES - 2 + DQ boot_pml4 + 0x3 ; PG_PRESENT | PG_GLOBAL | PG_RW boot_pgt: times 512 DQ 0 diff --git a/arch/x86/loader/include/ctype.h b/arch/x86/loader/include/ctype.h new file mode 100644 index 000000000..c5aa3915b --- /dev/null +++ b/arch/x86/loader/include/ctype.h @@ -0,0 +1,129 @@ +/**************************************************************************************** + * + * Author: Stefan Lankes + * Chair for Operating Systems, RWTH Aachen University + * Date: 24/03/2011 + * + **************************************************************************************** + * + * Written by the Chair for Operating Systems, RWTH Aachen University + * + * NO Copyright (C) 2010, Stefan Lankes, + * consider these trivial functions to be public domain. + * + * These functions are distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +/** + * @author Stefan Lankes + * @file include/ctype.h + * @brief Functions related to alphanumerical character values + * + * This file contains functions helping to determine + * the type of alphanumerical character values. + */ + +#ifndef __CTYPE_H_ +#define __CYTPE_H_ + +/** Returns true if the value of 'c' is an ASCII-charater */ +static inline int isascii(int c) +{ + return (((unsigned char)(c))<=0x7f); +} + +/** Applies an and-operation to + * push the value of 'c' into the ASCII-range */ +static inline int toascii(int c) +{ + return (((unsigned char)(c))&0x7f); +} + +/** Returns true if the value of 'c' is the + * space character or a control character */ +static inline int isspace(int c) +{ + if (!isascii(c)) + return 0; + + if (' ' == (unsigned char) c) + return 1; + if ('\n' == (unsigned char) c) + return 1; + if ('\r' == (unsigned char) c) + return 1; + if ('\t' == (unsigned char) c) + return 1; + if ('\v' == (unsigned char) c) + return 1; + if ('\f' == (unsigned char) c) + return 1; + + return 0; +} + +/** Returns true if the value of 'c' is a number */ +static inline int isdigit(int c) +{ + if (!isascii(c)) + return 0; + + if (((unsigned char) c >= '0') && ((unsigned char) c <= '9')) + return 1; + + return 0; +} + +/** Returns true if the value of 'c' is a lower case letter */ +static inline int islower(int c) +{ + if (!isascii(c)) + return 0; + + if (((unsigned char) c >= 'a') && ((unsigned char) c <= 'z')) + return 1; + + return 0; +} + +/** Returns true if the value of 'c' is an upper case letter */ +static inline int isupper(int c) +{ + if (!isascii(c)) + return 0; + + if (((unsigned char) c >= 'A') && ((unsigned char) c <= 'Z')) + return 1; + + return 0; +} + +/** Returns true if the value of 'c' is an alphabetic character */ +static inline int isalpha(int c) +{ + if (isupper(c) || islower(c)) + return 1; + + return 0; +} + +/** Makes the input character lower case.\n Will do nothing if it + * was something different than an upper case letter before. */ +static inline unsigned char tolower(unsigned char c) +{ + if (isupper(c)) + c -= 'A'-'a'; + return c; +} + +/** Makes the input character upper case.\n Will do nothing if it + * was something different than a lower case letter before. */ +static inline unsigned char toupper(unsigned char c) +{ + if (islower(c)) + c -= 'a'-'A'; + return c; +} + +#endif diff --git a/arch/x86/loader/include/limits.h b/arch/x86/loader/include/limits.h new file mode 100644 index 000000000..4f311b8ad --- /dev/null +++ b/arch/x86/loader/include/limits.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2010, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * author Stefan Lankes + * @file include/limits.h + * @brief Define constants related to numerical value-ranges of variable types + * + * This file contains define constants for the numerical + * ranges of the most typical variable types. + */ + +#ifndef __ARCH_LIMITS_H__ +#define __ARCH_LIMITS_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** Number of bits in a char */ +#define CHAR_BIT 8 + +/** Maximum value for a signed char */ +#define SCHAR_MAX 0x7f +/** Minimum value for a signed char */ +#define SCHAR_MIN (-0x7f - 1) + +/** Maximum value for an unsigned char */ +#define UCHAR_MAX 0xff + +/** Maximum value for an unsigned short */ +#define USHRT_MAX 0xffff +/** Maximum value for a short */ +#define SHRT_MAX 0x7fff +/** Minimum value for a short */ +#define SHRT_MIN (-0x7fff - 1) + +/** Maximum value for an unsigned int */ +#define UINT_MAX 0xffffffffU +/** Maximum value for an int */ +#define INT_MAX 0x7fffffff +/** Minimum value for an int */ +#define INT_MIN (-0x7fffffff - 1) + +/** Maximum value for an unsigned long */ +#define ULONG_MAX 0xffffffffUL +/** Maximum value for a long */ +#define LONG_MAX 0x7fffffffL +/** Minimum value for a long */ +#define LONG_MIN (-0x7fffffffL - 1) + +/** Maximum value for an unsigned long long */ +#define ULLONG_MAX 0xffffffffffffffffULL +/** Maximum value for a long long */ +#define LLONG_MAX 0x7fffffffffffffffLL +/** Minimum value for a long long */ +#define LLONG_MIN (-0x7fffffffffffffffLL - 1) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/arch/x86/loader/include/page.h b/arch/x86/loader/include/page.h index ddb158505..adf46fc72 100644 --- a/arch/x86/loader/include/page.h +++ b/arch/x86/loader/include/page.h @@ -86,12 +86,12 @@ static inline size_t sign_extend(ssize_t addr, int bits) #endif /// The number of entries in a page map table -#define PAGE_MAP_ENTRIES (1L << PAGE_MAP_BITS) +#define PAGE_MAP_ENTRIES (1L << PAGE_MAP_BITS) /// Align to next page -#define PAGE_FLOOR(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK) +#define PAGE_CEIL(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK) /// Align to page -#define PAGE_CEIL(addr) ( (addr) & PAGE_MASK) +#define PAGE_FLOOR(addr) ( (addr) & PAGE_MASK) /// Page is present #define PG_PRESENT (1 << 0) diff --git a/arch/x86/loader/include/stddef.h b/arch/x86/loader/include/stddef.h index 36f95f25f..76c2add7b 100644 --- a/arch/x86/loader/include/stddef.h +++ b/arch/x86/loader/include/stddef.h @@ -85,6 +85,14 @@ typedef char int8_t; /// 16 bit wide char type typedef unsigned short wchar_t; +/** @brief String to long + * + * @return Long value of the parsed numerical string + */ +long _strtol(const char* str, char** endptr, int base); + +#define strtol(str, endptr, base) _strtol((str), (endptr), (base)) + #ifdef __cplusplus } #endif diff --git a/arch/x86/loader/include/string.h b/arch/x86/loader/include/string.h index 84ef64a67..3d51a9492 100644 --- a/arch/x86/loader/include/string.h +++ b/arch/x86/loader/include/string.h @@ -41,7 +41,9 @@ char *strncpy(char *dest, const char *src, size_t n); char *strcpy(char *dest, const char *src); int strcmp(const char *s1, const char *s2); int strncmp(const char *s1, const char *s2, size_t n); -char *strstr(const char *s, const char *find); +char *_strstr(const char *s, const char *find); + +#define strstr(s, find) _strstr((s), (find)) #ifdef __cplusplus } diff --git a/arch/x86/loader/include/uart.h b/arch/x86/loader/include/uart.h index 0567aafa5..52d041b9c 100644 --- a/arch/x86/loader/include/uart.h +++ b/arch/x86/loader/include/uart.h @@ -34,17 +34,11 @@ extern "C" { #endif -/** @brief Initialize UART output - * - * @return Returns 0 on success - */ -int uart_init(void); - /** @brief Initialize UART output without a device check * * @return Returns 0 on success */ -int uart_early_init(char*); +int uart_init(const char*); /** @brief Simple string output on a serial device. * @@ -56,7 +50,7 @@ int uart_puts(const char *text); /** @brief Simple character output on a serial device. * - * @return The original input character casted to int + * @return The original input character casted to int */ int uart_putchar(unsigned char c); diff --git a/arch/x86/loader/main.c b/arch/x86/loader/main.c index 12cba0777..b0439de85 100644 --- a/arch/x86/loader/main.c +++ b/arch/x86/loader/main.c @@ -42,8 +42,9 @@ extern const void kernel_start; extern const void kernel_end; extern const void bss_start; extern const void bss_end; +extern size_t uartport; -static int load_code(size_t viraddr, size_t phyaddr, size_t limit, uint32_t file_size, size_t mem_size) +static int load_code(size_t viraddr, size_t phyaddr, size_t limit, uint32_t file_size, size_t mem_size, size_t cmdline, size_t cmdsize) { const size_t displacement = 0x200000ULL - (phyaddr & 0x1FFFFFULL); @@ -65,6 +66,9 @@ static int load_code(size_t viraddr, size_t phyaddr, size_t limit, uint32_t file *((uint32_t*) (viraddr + 0x30)) = 0; // apicid *((uint64_t*) (viraddr + 0x38)) = mem_size; *((uint32_t*) (viraddr + 0x60)) = 1; // numa nodes + *((uint64_t*) (viraddr + 0x98)) = uartport; + *((uint64_t*) (viraddr + 0xA0)) = cmdline; + *((uint64_t*) (viraddr + 0xA8)) = cmdsize; // move file to a 2 MB boundary for(size_t va = viraddr+(npages << PAGE_BITS)+displacement-sizeof(uint8_t); va >= viraddr+displacement; va-=sizeof(uint8_t)) @@ -86,6 +90,8 @@ void main(void) elf_header_t* header = NULL; uint32_t file_size = 0; size_t mem_size = 0; + size_t cmdline_size = 0; + size_t cmdline = 0; // initialize .bss section memset((void*)&bss_start, 0x00, ((size_t) &bss_end - (size_t) &bss_start)); @@ -95,6 +101,12 @@ void main(void) kprintf("Loader starts at %p and ends at %p\n", &kernel_start, &kernel_end); kprintf("Found mb_info at %p\n", mb_info); + if (mb_info && mb_info->cmdline) { + cmdline = (size_t) mb_info->cmdline; + cmdline_size = strlen((char*)cmdline); + } + + // enable paging page_init(); if (mb_info) { @@ -171,7 +183,7 @@ void main(void) viraddr = prog_header->virt_addr; if (!phyaddr) phyaddr = prog_header->offset + (size_t)header; - file_size = prog_header->virt_addr + PAGE_FLOOR(prog_header->file_size) - viraddr; + file_size = prog_header->virt_addr + PAGE_CEIL(prog_header->file_size) - viraddr; mem_size += prog_header->mem_size; } break; @@ -184,7 +196,7 @@ void main(void) } } - if (BUILTIN_EXPECT(load_code(viraddr, phyaddr, limit, file_size, mem_size), 0)) + if (BUILTIN_EXPECT(load_code(viraddr, phyaddr, limit, file_size, mem_size, cmdline, cmdline_size), 0)) goto failed; kprintf("Entry point: 0x%zx\n", header->entry); diff --git a/arch/x86/loader/page.c b/arch/x86/loader/page.c index 3b88adbf8..0cab29437 100644 --- a/arch/x86/loader/page.c +++ b/arch/x86/loader/page.c @@ -45,7 +45,7 @@ extern const void kernel_start; extern const void kernel_end; /// This page is reserved for copying -#define PAGE_TMP (PAGE_FLOOR((size_t) &kernel_start) - PAGE_SIZE) +#define PAGE_TMP (PAGE_CEIL((size_t) &kernel_start) - PAGE_SIZE) /** This PGD table is initialized in entry.asm */ extern size_t* boot_map; @@ -188,12 +188,12 @@ int page_init(void) // already mapped => entry.asm //addr = (size_t) mb_info & PAGE_MASK; - //npages = PAGE_FLOOR(sizeof(*mb_info)) >> PAGE_BITS; + //npages = PAGE_CEIL(sizeof(*mb_info)) >> PAGE_BITS; //page_map(addr, addr, npages, PG_GLOBAL); if (mb_info->flags & MULTIBOOT_INFO_MODS) { addr = mb_info->mods_addr; - npages = PAGE_FLOOR(mb_info->mods_count*sizeof(multiboot_module_t)) >> PAGE_BITS; + npages = PAGE_CEIL(mb_info->mods_count*sizeof(multiboot_module_t)) >> PAGE_BITS; ret = page_map(addr, addr, npages, PG_GLOBAL); kprintf("Map module info at 0x%lx (ret %d)\n", addr, ret); @@ -202,14 +202,14 @@ int page_init(void) // at first we determine the first free page for(int i=0; imods_count; i++) { if (first_page < mmodule[i].mod_end) - first_page = PAGE_FLOOR(mmodule[i].mod_end); + first_page = PAGE_CEIL(mmodule[i].mod_end); } // we map only the first page of each module (= ELF file) because // we need only the program header of the ELF file for(int i=0; imods_count; i++) { addr = mmodule[i].mod_start; - npages = PAGE_FLOOR(mmodule[i].mod_end - mmodule[i].mod_start) >> PAGE_BITS; + npages = PAGE_CEIL(mmodule[i].mod_end - mmodule[i].mod_start) >> PAGE_BITS; ret = page_map(addr, addr, 1 /*npages*/, PG_GLOBAL); kprintf("Map first page of module %d at 0x%lx (ret %d)\n", i, addr, ret); kprintf("Module %d consists %zd\n", i, npages); diff --git a/arch/x86/loader/stdio.c b/arch/x86/loader/stdio.c index c4ee533fb..9c3e5f422 100644 --- a/arch/x86/loader/stdio.c +++ b/arch/x86/loader/stdio.c @@ -31,7 +31,7 @@ int koutput_init(void) { - uart_early_init((char*) mb_info->cmdline); + uart_init((const char*) (size_t)mb_info->cmdline); return 0; } diff --git a/arch/x86/loader/strstr.c b/arch/x86/loader/strstr.c new file mode 100644 index 000000000..d2b687e86 --- /dev/null +++ b/arch/x86/loader/strstr.c @@ -0,0 +1,73 @@ +/* $NetBSD: strstr.c,v 1.1 2005/12/20 19:28:52 christos Exp $ */ + +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Chris Torek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * The code has been taken from NetBSD (sys/libkern/strstr.c) and is consequently + * BSD-licensed. Unnecessary functions have been removed and all typedefs required + * have been added. + */ + +/* HermiCore prelude */ +#include +#include +#include +#include + +/* + * Find the first occurrence of find in s. + */ +char * +_strstr(s, find) + const char *s, *find; +{ + char c, sc; + size_t len; + + if (BUILTIN_EXPECT(!s, 0)) + return NULL; + if (BUILTIN_EXPECT(!find, 0)) + return NULL; + + if ((c = *find++) != 0) { + len = strlen(find); + do { + do { + if ((sc = *s++) == 0) + return (NULL); + } while (sc != c); + } while (strncmp(s, find, len) != 0); + s--; + } + return ((char *) s); +} diff --git a/arch/x86/loader/strtol.c b/arch/x86/loader/strtol.c new file mode 100644 index 000000000..639e11e9f --- /dev/null +++ b/arch/x86/loader/strtol.c @@ -0,0 +1,132 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Chris Torek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * From: @(#)strtol.c 8.1 (Berkeley) 6/4/93 + */ + +/* + * The code has been taken from FreeBSD (sys/libkern/strtol.c) and is consequently + * BSD-licensed. Unnecessary functions have been removed and all typedefs required + * have been added. + */ + +/* HermitCore prelude */ +#include +#include +#include + +/* + * Convert a string to a long integer. + * + * Ignores `locale' stuff. Assumes that the upper and lower case + * alphabets and digits are each contiguous. + */ +long +_strtol(nptr, endptr, base) + const char *nptr; + char **endptr; + int base; +{ + const char *s = nptr; + unsigned long acc; + unsigned char c; + unsigned long cutoff; + int neg = 0, any, cutlim; + + /* + * Skip white space and pick up leading +/- sign if any. + * If base is 0, allow 0x for hex and 0 for octal, else + * assume decimal; if base is already 16, allow 0x. + */ + do { + c = *s++; + } while (isspace(c)); + if (c == '-') { + neg = 1; + c = *s++; + } else if (c == '+') + c = *s++; + if ((base == 0 || base == 16) && + c == '0' && (*s == 'x' || *s == 'X')) { + c = s[1]; + s += 2; + base = 16; + } + if (base == 0) + base = c == '0' ? 8 : 10; + + /* + * Compute the cutoff value between legal numbers and illegal + * numbers. That is the largest legal value, divided by the + * base. An input number that is greater than this value, if + * followed by a legal input character, is too big. One that + * is equal to this value may be valid or not; the limit + * between valid and invalid numbers is then based on the last + * digit. For instance, if the range for longs is + * [-2147483648..2147483647] and the input base is 10, + * cutoff will be set to 214748364 and cutlim to either + * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated + * a value > 214748364, or equal but the next digit is > 7 (or 8), + * the number is too big, and we will return a range error. + * + * Set any if any `digits' consumed; make it negative to indicate + * overflow. + */ + cutoff = neg ? -(unsigned long)LONG_MIN : LONG_MAX; + cutlim = cutoff % (unsigned long)base; + cutoff /= (unsigned long)base; + for (acc = 0, any = 0;; c = *s++) { + if (!isascii(c)) + break; + if (isdigit(c)) + c -= '0'; + else if (isalpha(c)) + c -= isupper(c) ? 'A' - 10 : 'a' - 10; + else + break; + if (c >= base) + break; + if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) + any = -1; + else { + any = 1; + acc *= base; + acc += c; + } + } + if (any < 0) { + acc = neg ? LONG_MIN : LONG_MAX; + } else if (neg) + acc = -acc; + if (endptr != 0) + *((const char **)endptr) = any ? s - 1 : nptr; + return (acc); +} diff --git a/arch/x86/loader/uart.c b/arch/x86/loader/uart.c index d6013dd61..5d0599843 100644 --- a/arch/x86/loader/uart.c +++ b/arch/x86/loader/uart.c @@ -92,28 +92,38 @@ #define DEFAULT_UART_PORT 0 //0xc110 -static size_t iobase = 0; +size_t uartport = 0; static inline unsigned char read_from_uart(uint32_t off) { uint8_t c; - if (iobase) - c = inportb(iobase + off); + if (uartport) + c = inportb(uartport + off); return c; } +static inline int is_transmit_empty(void) +{ + if (uartport) + return inportb(uartport + UART_LSR) & 0x20; + + return 1; +} + static void write_to_uart(uint32_t off, unsigned char c) { - if (iobase) - outportb(iobase + off, c); + while (is_transmit_empty() == 0) ; + + if (uartport) + outportb(uartport + off, c); } /* Puts a single character on a serial device */ int uart_putchar(unsigned char c) { - if (!iobase) + if (!uartport) return 0; write_to_uart(UART_TX, c); @@ -126,7 +136,7 @@ int uart_puts(const char *text) { size_t i, len = strlen(text); - if (!iobase) + if (!uartport) return 0; for (i = 0; i < len; i++) @@ -137,22 +147,12 @@ int uart_puts(const char *text) static int uart_config(void) { - if (!iobase) + if (!uartport) return 0; - /* - * enable FIFOs - * clear RX and TX FIFO - * set irq trigger to 8 bytes - */ - write_to_uart(UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT | UART_FCR_TRIGGER_1); - /* disable interrupts */ write_to_uart(UART_IER, 0); - /* DTR + RTS */ - write_to_uart(UART_MCR, UART_MCR_DTR|UART_MCR_RTS); - /* * 8bit word length * 1 stop bit @@ -165,82 +165,39 @@ static int uart_config(void) write_to_uart(UART_LCR, lcr); /* - * set baudrate to 9600 + * set baudrate to 38400 */ - uint32_t divisor = 1843200 / 9600; // 115200; - write_to_uart(UART_DLL, divisor & 0xff); - write_to_uart(UART_DLM, (divisor >> 8) & 0xff); + write_to_uart(UART_DLL, 0x03); + write_to_uart(UART_DLM, 0x00); /* set DLAB=0 */ write_to_uart(UART_LCR, lcr & (~UART_LCR_DLAB)); + /* + * enable FIFOs + * clear RX and TX FIFO + * set irq trigger to 8 bytes + */ + write_to_uart(UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT | UART_FCR_TRIGGER_1); + return 0; } -extern const void kernel_start; - -int uart_early_init(char* cmdline) +int uart_init(const char* cmdline) { -#if 1 - // default value of our QEMU configuration - iobase = DEFAULT_UART_PORT; -#else - if (BUILTIN_EXPECT(!cmdline, 0)) - return -EINVAL; + char* str; - char* str = strstr(cmdline, "uart="); - if (!str) - return -EINVAL; + if (!uartport && cmdline && ((str = strstr(cmdline, "uart=io:")) != NULL)) + uartport = strtol(str+8, (char **)NULL, 16); - if (strncmp(str, "uart=io:", 8) == 0) { - iobase = strtol(str+8, (char **)NULL, 16); - if (!iobase) - iobase = DEFAULT_UART_PORT; - return -EINVAL; - } -#endif + if (!uartport) + uartport = DEFAULT_UART_PORT; + + if (!uartport) + return 0; // configure uart return uart_config(); } -int uart_init(void) -{ -#ifdef CONFIG_PCI - pci_info_t pci_info; - uint32_t bar = 0; - - // Searching for Intel's UART device - if (pci_get_device_info(0x8086, 0x0936, &pci_info) == 0) - goto Lsuccess; - // Searching for Qemu's UART device - if (pci_get_device_info(0x1b36, 0x0002, &pci_info) == 0) - goto Lsuccess; - // Searching for Qemu's 2x UART device (pci-serial-2x) - if (pci_get_device_info(0x1b36, 0x0003, &pci_info) == 0) - goto Lsuccess; - // Searching for Qemu's 4x UART device (pci-serial-4x) - if (pci_get_device_info(0x1b36, 0x0004, &pci_info) == 0) - goto Lsuccess; - - iobase = DEFAULT_UART_PORT; - - return uart_config(); - -Lsuccess: - iobase = pci_info.base[bar]; - //irq_install_handler(32+pci_info.irq, uart_handler); - kprintf("UART uses io address 0x%x\n", iobase); - - // configure uart - return uart_config(); -#else - // default value of our QEMU configuration - iobase = DEFAULT_UART_PORT; - - // configure uart - return uart_config(); -#endif -} - #endif diff --git a/arch/x86/mm/memory.c b/arch/x86/mm/memory.c index 8313ddb01..697c2b6ce 100644 --- a/arch/x86/mm/memory.c +++ b/arch/x86/mm/memory.c @@ -194,13 +194,13 @@ void* page_alloc(size_t sz, uint32_t flags) { size_t viraddr = 0; size_t phyaddr; - uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS; + uint32_t npages = PAGE_CEIL(sz) >> PAGE_BITS; size_t pflags = PG_PRESENT|PG_GLOBAL|PG_XD; if (BUILTIN_EXPECT(!npages, 0)) goto oom; - viraddr = vma_alloc(PAGE_FLOOR(sz), flags); + viraddr = vma_alloc(PAGE_CEIL(sz), flags); if (BUILTIN_EXPECT(!viraddr, 0)) goto oom; @@ -238,10 +238,10 @@ void page_free(void* viraddr, size_t sz) phyaddr = virt_to_phys((size_t)viraddr); - vma_free((size_t) viraddr, (size_t) viraddr + PAGE_FLOOR(sz)); + vma_free((size_t) viraddr, (size_t) viraddr + PAGE_CEIL(sz)); if (phyaddr) - put_pages(phyaddr, PAGE_FLOOR(sz) >> PAGE_BITS); + put_pages(phyaddr, PAGE_CEIL(sz) >> PAGE_BITS); } int memory_init(void) @@ -267,13 +267,13 @@ int memory_init(void) // mark first available memory slot as free for(; mmap < mmap_end; mmap = (multiboot_memory_map_t*) ((size_t) mmap + sizeof(uint32_t) + mmap->size)) { if (mmap->type == MULTIBOOT_MEMORY_AVAILABLE) { - start_addr = PAGE_FLOOR(mmap->addr); - end_addr = PAGE_CEIL(mmap->addr + mmap->len); + start_addr = PAGE_CEIL(mmap->addr); + end_addr = PAGE_FLOOR(mmap->addr + mmap->len); LOG_INFO("Free region 0x%zx - 0x%zx\n", start_addr, end_addr); if ((start_addr <= base) && (end_addr >= PAGE_2M_FLOOR((size_t) &kernel_start + image_size))) { - init_list.start = PAGE_2M_FLOOR((size_t) &kernel_start + image_size); + init_list.start = PAGE_2M_CEIL((size_t) &kernel_start + image_size); init_list.end = end_addr; LOG_INFO("Add region 0x%zx - 0x%zx\n", init_list.start, init_list.end); @@ -295,13 +295,13 @@ int memory_init(void) atomic_int64_add(&total_pages, (limit-base) >> PAGE_BITS); atomic_int64_add(&total_available_pages, (limit-base) >> PAGE_BITS); - init_list.start = PAGE_2M_FLOOR(base + image_size); + init_list.start = PAGE_2M_CEIL(base + image_size); init_list.end = limit; } // determine allocated memory, we use 2MB pages to map the kernel - atomic_int64_add(&total_allocated_pages, PAGE_2M_FLOOR(image_size) >> PAGE_BITS); - atomic_int64_sub(&total_available_pages, PAGE_2M_FLOOR(image_size) >> PAGE_BITS); + atomic_int64_add(&total_allocated_pages, PAGE_2M_CEIL(image_size) >> PAGE_BITS); + atomic_int64_sub(&total_available_pages, PAGE_2M_CEIL(image_size) >> PAGE_BITS); LOG_INFO("free list starts at 0x%zx, limit 0x%zx\n", init_list.start, init_list.end); @@ -324,10 +324,10 @@ int memory_init(void) for(; mmap < mmap_end; mmap = (multiboot_memory_map_t*) ((size_t) mmap + sizeof(uint32_t) + mmap->size)) { if (mmap->type == MULTIBOOT_MEMORY_AVAILABLE) { - start_addr = PAGE_FLOOR(mmap->addr); - end_addr = PAGE_CEIL(mmap->addr + mmap->len); + start_addr = PAGE_CEIL(mmap->addr); + end_addr = PAGE_FLOOR(mmap->addr + mmap->len); - if ((start_addr <= base) && (end_addr >= PAGE_2M_FLOOR(base+image_size))) + if ((start_addr <= base) && (end_addr >= PAGE_2M_CEIL(base+image_size))) end_addr = base; // ignore everything below 1M => reserve for I/O devices @@ -335,11 +335,11 @@ int memory_init(void) start_addr = GAP_BELOW; if (start_addr < (size_t)mb_info) - start_addr = PAGE_FLOOR((size_t)mb_info); + start_addr = PAGE_CEIL((size_t)mb_info); - if (mb_info->flags & MULTIBOOT_INFO_CMDLINE) { - if (start_addr < (size_t) mb_info->cmdline+2*PAGE_SIZE) - start_addr = PAGE_FLOOR((size_t) mb_info->cmdline+2*PAGE_SIZE); + if ((mb_info->flags & MULTIBOOT_INFO_CMDLINE) && cmdline) { + if (start_addr < (size_t) cmdline+cmdsize) + start_addr = PAGE_CEIL((size_t) cmdline+cmdsize); } if (start_addr >= end_addr) diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c index af4b7c791..f381daf46 100644 --- a/arch/x86/mm/page.c +++ b/arch/x86/mm/page.c @@ -325,11 +325,16 @@ int page_init(void) LOG_INFO("Detect Go runtime! Consequently, HermitCore zeroed heap.\n"); } - if (mb_info && ((mb_info->cmdline & PAGE_MASK) != ((size_t) mb_info & PAGE_MASK))) { - LOG_INFO("Map multiboot cmdline 0x%x into the virtual address space\n", mb_info->cmdline); - // reserve 2 pages for long cmdline strings - page_map(((size_t) mb_info->cmdline) & PAGE_MASK, ((size_t) mb_info->cmdline) & PAGE_MASK, 2, PG_GLOBAL|PG_RW|PG_PRESENT); - } + if (mb_info && (mb_info->flags & MULTIBOOT_INFO_CMDLINE) && (cmdline)) + { + size_t i = 0; + + while(((size_t) cmdline + i) <= ((size_t) cmdline + cmdsize)) + { + page_map(((size_t) cmdline + i) & PAGE_MASK, ((size_t) cmdline + i) & PAGE_MASK, 1, PG_GLOBAL|PG_RW|PG_PRESENT); + i += PAGE_SIZE; + } + } else cmdline = 0; /* Replace default pagefault handler */ irq_uninstall_handler(14); diff --git a/arch/x86/mm/vma.c b/arch/x86/mm/vma.c index cb56aacea..ad58d1b79 100644 --- a/arch/x86/mm/vma.c +++ b/arch/x86/mm/vma.c @@ -25,7 +25,9 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include #include +#include #include int vma_arch_init(void) @@ -37,11 +39,20 @@ int vma_arch_init(void) if (BUILTIN_EXPECT(ret, 0)) goto out; - if ((mb_info->cmdline & PAGE_MASK) != ((size_t) mb_info & PAGE_MASK)) { - // reserve 2 pages for long cmdline strings - ret = vma_add((size_t)mb_info->cmdline & PAGE_MASK, ((size_t)mb_info->cmdline & PAGE_MASK) + 2*PAGE_SIZE, VMA_READ|VMA_WRITE); - if (BUILTIN_EXPECT(ret, 0)) - goto out; + if ((mb_info->flags & MULTIBOOT_INFO_CMDLINE) && cmdline) { + LOG_INFO("vma_arch_init: map cmdline %p (size 0x%zd)", cmdline, cmdsize); + + size_t i = 0; + while(((size_t) cmdline + i) < ((size_t) cmdline + cmdsize)) + { + if ((((size_t)cmdline + i) & PAGE_MASK) != ((size_t) mb_info & PAGE_MASK)) { + ret = vma_add(((size_t)cmdline + i) & PAGE_MASK, (((size_t)cmdline + i) & PAGE_MASK) + PAGE_SIZE, VMA_READ|VMA_WRITE); + if (BUILTIN_EXPECT(ret, 0)) + goto out; + } + + i += PAGE_SIZE; + } } } diff --git a/cmake/HermitCore-Configuration.cmake b/cmake/HermitCore-Configuration.cmake index daebd24cb..bd5753eef 100644 --- a/cmake/HermitCore-Configuration.cmake +++ b/cmake/HermitCore-Configuration.cmake @@ -1,4 +1,4 @@ -set(PACKAGE_VERSION "0.1" CACHE STRING +set(PACKAGE_VERSION "0.2.1" CACHE STRING "HermitCore current version") set(MAX_CORES "512" CACHE STRING diff --git a/config/bzImage b/config/bzImage index 04f570ccc..14c60fd39 100644 Binary files a/config/bzImage and b/config/bzImage differ diff --git a/config/linux_config b/config/linux_config index 5613e1f79..f52a07bd5 100644 --- a/config/linux_config +++ b/config/linux_config @@ -139,7 +139,7 @@ CONFIG_ARCH_SUPPORTS_INT128=y # CONFIG_SYSFS_DEPRECATED is not set # CONFIG_RELAY is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../config/initrd.cpio" +CONFIG_INITRAMFS_SOURCE="/work/lankes/HermitCore/config/initrd.cpio" CONFIG_INITRAMFS_ROOT_UID=0 CONFIG_INITRAMFS_ROOT_GID=0 CONFIG_RD_GZIP=y diff --git a/drivers/net/e1000.c b/drivers/net/e1000.c index 8fb4f4b57..c460f96f6 100644 --- a/drivers/net/e1000.c +++ b/drivers/net/e1000.c @@ -1,4 +1,4 @@ -/* +/* * Copyright 2012 Stefan Lankes, Chair for Operating Systems, * RWTH Aachen University * @@ -58,7 +58,7 @@ typedef struct { uint32_t device; } board_t; -static board_t board_tbl[] = +static board_t board_tbl[] = { {"Intel", "Intel E1000 (82542)", 0x8086, 0x1000}, {"Intel", "Intel E1000 (82543GC FIBER)", 0x8086, 0x1001}, @@ -67,7 +67,7 @@ static board_t board_tbl[] = {"Intel", "Intel E1000 (82544EI FIBER)", 0x8086, 0x1009}, {"Intel", "Intel E1000 (82544GC COPPER)", 0x8086, 0x100C}, {"Intel", "Intel E1000 (82544GC LOM)", 0x8086, 0x100D}, - {"Intel", "Intel E1000 (82540EM)", 0x8086, 0x100E}, + {"Intel", "Intel E1000 (82540EM)", 0x8086, 0x100E}, {"Intel", "Intel E1000 (82540EM LOM)", 0x8086, 0x1015}, {"Intel", "Intel E1000 (82540EP LOM)", 0x8086, 0x1016}, {"Intel", "Intel E1000 (82540EP)", 0x8086, 0x1017}, @@ -132,7 +132,7 @@ static uint16_t eeprom_read(volatile uint8_t* base, uint8_t addr) e1000_write(base, E1000_EERD, 1 | ((uint32_t)(addr) << 8)); - while(!((tmp = e1000_read(base, E1000_EERD)) & (1 << 4))) + while(!((tmp = e1000_read(base, E1000_EERD)) & (1 << 4))) udelay(1); data = (uint16_t)((tmp >> 16) & 0xFFFF); @@ -148,7 +148,7 @@ static uint16_t eeprom_read(uint8_t* base, uint8_t addr) e1000_write(base, E1000_EERD, 1 | ((uint32_t)(addr) << 2)); - while(!((tmp = e1000_read(base, E1000_EERD)) & (1 << 1))) + while(!((tmp = e1000_read(base, E1000_EERD)) & (1 << 1))) udelay(1); data = (uint16_t)((tmp >> 16) & 0xFFFF); @@ -198,7 +198,7 @@ static err_t e1000if_output(struct netif* netif, struct pbuf* p) // update the tail so the hardware knows it's ready e1000if->tx_tail = (e1000if->tx_tail + 1) % NUM_TX_DESCRIPTORS; - e1000_write(e1000if->bar0, E1000_TDT, e1000if->tx_tail); + e1000_write(e1000if->bar0, E1000_TDT, e1000if->tx_tail); #if ETH_PAD_SIZE pbuf_header(p, ETH_PAD_SIZE); /* reclaim the padding word */ @@ -256,7 +256,7 @@ static void e1000_rx_inthandler(struct netif* netif) LINK_STATS_INC(link.drop); } -no_eop: +no_eop: e1000if->rx_desc[e1000if->rx_tail].status = 0; // update tail and write the value to the device @@ -333,12 +333,12 @@ err_t e1000if_init(struct netif* netif) uint16_t tmp16, speed, cold = 0x40; uint8_t tmp8, is64bit, mem_type, prefetch; static uint8_t num = 0; - + LWIP_ASSERT("netif != NULL", (netif != NULL)); tmp8 = 0; while (board_tbl[tmp8].vendor_str) { - if (pci_get_device_info(board_tbl[tmp8].vendor, board_tbl[tmp8].device, &pci_info, 1) == 0) + if (pci_get_device_info(board_tbl[tmp8].vendor, board_tbl[tmp8].device, PCI_IGNORE_SUBID, &pci_info, 1) == 0) break; tmp8++; } @@ -370,11 +370,11 @@ err_t e1000if_init(struct netif* netif) netif->state = e1000if; mynetif = netif; - e1000if->bar0 = (uint8_t*) vma_alloc(PAGE_FLOOR(pci_info.size[0]), VMA_READ|VMA_WRITE); + e1000if->bar0 = (uint8_t*) vma_alloc(PAGE_CEIL(pci_info.size[0]), VMA_READ|VMA_WRITE); if (BUILTIN_EXPECT(!e1000if->bar0, 0)) goto oom; - int ret = page_map((size_t)e1000if->bar0, PAGE_CEIL(pci_info.base[0]), PAGE_FLOOR(pci_info.size[0]) >> PAGE_BITS, PG_GLOBAL|PG_RW|PG_PCD); + int ret = page_map((size_t)e1000if->bar0, PAGE_FLOOR(pci_info.base[0]), PAGE_CEIL(pci_info.size[0]) >> PAGE_BITS, PG_GLOBAL|PG_RW|PG_PCD); if (BUILTIN_EXPECT(ret, 0)) goto oom; @@ -394,7 +394,7 @@ err_t e1000if_init(struct netif* netif) goto oom; memset((void*) e1000if->tx_desc, 0x00, NUM_TX_DESCRIPTORS*sizeof(tx_desc_t)); - LWIP_DEBUGF(NETIF_DEBUG, ("e1000if_init: Found %s at mmio 0x%x (size 0x%x), irq %u\n", board_tbl[tmp8].device_str, + LWIP_DEBUGF(NETIF_DEBUG, ("e1000if_init: Found %s at mmio 0x%x (size 0x%x), irq %u\n", board_tbl[tmp8].device_str, pci_info.base[0] & ~0xF, pci_info.size[0], e1000if->irq)); //LWIP_DEBUGF(NETIF_DEBUG, ("e1000if_init: Map iobase to %p\n", e1000if->bar0)); LWIP_DEBUGF(NETIF_DEBUG, ("e1000if_init: is64bit %u, prefetch %u\n", is64bit, prefetch)); @@ -439,7 +439,7 @@ err_t e1000if_init(struct netif* netif) // transmit buffer length; NUM_TX_DESCRIPTORS 16-byte descriptors e1000_write(e1000if->bar0, E1000_TDLEN , (uint32_t)(NUM_TX_DESCRIPTORS * sizeof(tx_desc_t))); - + // setup head and tail pointers e1000_write(e1000if->bar0, E1000_TDH, 0); e1000_write(e1000if->bar0, E1000_TDT, 0); @@ -472,7 +472,7 @@ err_t e1000if_init(struct netif* netif) tmp32 = 0; for(tmp8=0; tmp8<2; tmp8++) ((uint8_t*) &tmp32)[tmp8] = netif->hwaddr[tmp8+4]; - e1000_write(e1000if->bar0, E1000_RA+4, tmp32 | (1 << 31)); // set also AV bit to check incoming packets + e1000_write(e1000if->bar0, E1000_RA+4, tmp32 | (1 << 31)); // set also AV bit to check incoming packets /* Zero out the other receive addresses. */ for (tmp8=1; tmp8<16; tmp8++) { diff --git a/drivers/net/mmnif.c b/drivers/net/mmnif.c index 3ff08aa44..eab0284c6 100644 --- a/drivers/net/mmnif.c +++ b/drivers/net/mmnif.c @@ -594,7 +594,7 @@ err_t mmnif_init(struct netif *netif) goto out; } - err = vma_add((size_t)header_start_address, PAGE_FLOOR((size_t)header_start_address + ((nodes * header_size) >> PAGE_BITS)), VMA_READ|VMA_WRITE|VMA_CACHEABLE); + err = vma_add((size_t)header_start_address, PAGE_CEIL((size_t)header_start_address + ((nodes * header_size) >> PAGE_BITS)), VMA_READ|VMA_WRITE|VMA_CACHEABLE); if (BUILTIN_EXPECT(err, 0)) { LOG_ERROR("mmnif init(): vma_add failed for header_start_address %p\n", header_start_address); goto out; @@ -620,7 +620,7 @@ err_t mmnif_init(struct netif *netif) goto out; } - err = vma_add((size_t)heap_start_address, PAGE_FLOOR((size_t)heap_start_address + ((nodes * heap_size) >> PAGE_BITS)), VMA_READ|VMA_WRITE|VMA_CACHEABLE); + err = vma_add((size_t)heap_start_address, PAGE_CEIL((size_t)heap_start_address + ((nodes * heap_size) >> PAGE_BITS)), VMA_READ|VMA_WRITE|VMA_CACHEABLE); if (BUILTIN_EXPECT(!heap_start_address, 0)) { LOG_ERROR("mmnif init(): vma_add failed for heap_start_address %p\n", heap_start_address); @@ -686,8 +686,6 @@ err_t mmnif_init(struct netif *netif) /* maximum transfer unit */ netif->mtu = 1500; - /* broadcast capability, keep all default flags */ - //netif->flags |= NETIF_FLAG_BROADCAST; /* set link up */ netif->flags |= NETIF_FLAG_LINK_UP; diff --git a/drivers/net/rtl8139.c b/drivers/net/rtl8139.c index e3e5bd0e0..3358cfbcf 100644 --- a/drivers/net/rtl8139.c +++ b/drivers/net/rtl8139.c @@ -1,4 +1,4 @@ -/* +/* * Copyright 2010 Stefan Lankes, Chair for Operating Systems, * RWTH Aachen University * @@ -68,7 +68,7 @@ typedef struct { uint32_t device; } board_t; -static board_t board_tbl[] = +static board_t board_tbl[] = { {"RealTek", "RealTek RTL8139", 0x10ec, 0x8139}, {"RealTek", "RealTek RTL8129 Fast Ethernet", 0x10ec, 0x8129}, @@ -307,7 +307,7 @@ err_t rtl8139if_init(struct netif* netif) tmp8 = 0; while (board_tbl[tmp8].vendor_str) { - if (pci_get_device_info(board_tbl[tmp8].vendor, board_tbl[tmp8].device, &pci_info, 1) == 0) + if (pci_get_device_info(board_tbl[tmp8].vendor, board_tbl[tmp8].device, PCI_IGNORE_SUBID, &pci_info, 1) == 0) break; tmp8++; } @@ -388,8 +388,8 @@ err_t rtl8139if_init(struct netif* netif) outportb(rtl8139if->iobase + CR, CR_RST); /* - * The RST bit must be checked to make sure that the chip has finished the reset. - * If the RST bit is high (1), then the reset is still in operation. + * The RST bit must be checked to make sure that the chip has finished the reset. + * If the RST bit is high (1), then the reset is still in operation. */ udelay(10000); tmp16 = 10000; @@ -419,7 +419,7 @@ err_t rtl8139if_init(struct netif* netif) outportb(rtl8139if->iobase + CONFIG1, 0); // disable driver loaded and lanwake bits, turn driver loaded bit back on - outportb(rtl8139if->iobase + CONFIG1, + outportb(rtl8139if->iobase + CONFIG1, (inportb(rtl8139if->iobase + CONFIG1) & ~(CONFIG1_DVRLOAD | CONFIG1_LWACT)) | CONFIG1_DVRLOAD); // unlock config register @@ -430,7 +430,7 @@ err_t rtl8139if_init(struct netif* netif) * AB - Accept Broadcast: Accept broadcast packets sent to mac ff:ff:ff:ff:ff:ff * AM - Accept Multicast: Accept multicast packets. * APM - Accept Physical Match: Accept packets send to NIC's MAC address. - * AAP - Accept All Packets. Accept all packets (run in promiscuous mode). + * AAP - Accept All Packets. Accept all packets (run in promiscuous mode). */ outportl(rtl8139if->iobase + RCR, RCR_MXDMA2|RCR_MXDMA1|RCR_MXDMA0|RCR_AB|RCR_AM|RCR_APM|RCR_AAP); // The WRAP bit isn't set! @@ -456,7 +456,7 @@ err_t rtl8139if_init(struct netif* netif) if (tmp16 & BMCR_SPD1000) speed = 1000; else if (tmp16 & BMCR_SPD100) - speed = 100; + speed = 100; else speed = 10; // Enable Receive and Transmitter diff --git a/drivers/net/vioif.c b/drivers/net/vioif.c new file mode 100644 index 000000000..68c6a345f --- /dev/null +++ b/drivers/net/vioif.c @@ -0,0 +1,458 @@ +/* + * Copyright (c) 2017, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define VENDOR_ID 0x1AF4 +#define VIOIF_BUFFER_SIZE 0x2048 +#define MIN(a, b) (a) < (b) ? (a) : (b) +#define QUEUE_LIMIT 256 + +/* NOTE: RX queue is 0, TX queue is 1 - Virtio Std. §5.1.2 */ +#define TX_NUM 1 +#define RX_NUM 0 + +static struct netif* mynetif = NULL; + +static inline void vioif_enable_interrupts(virt_queue_t* vq) +{ + vq->vring.used->flags = 0; +} + +static inline void vioif_disable_interrupts(virt_queue_t* vq) +{ + vq->vring.used->flags = 1; +} + +/* + * @return error code + * - ERR_OK: packet transferred to hardware + * - ERR_CONN: no link or link failure + * - ERR_IF: could not transfer to link (hardware buffer full?) + */ +static err_t vioif_output(struct netif* netif, struct pbuf* p) +{ + vioif_t* vioif = netif->state; + virt_queue_t* vq = &vioif->queues[TX_NUM]; + struct pbuf *q; + uint32_t i; + uint16_t buffer_index; + + if (BUILTIN_EXPECT(p->tot_len > 1792, 0)) { + LOG_ERROR("vioif_output: packet is longer than 1792 bytes\n"); + return ERR_IF; + } + + for(buffer_index=0; buffer_indexvring.num; buffer_index++) { + if (!vq->vring.desc[buffer_index].len) { + LOG_DEBUG("vioif_output: buffer %u is free\n", buffer_index); + break; + } + } + LOG_DEBUG("vioif: found free buffer %d\n", buffer_index); + + if (BUILTIN_EXPECT(buffer_index >= vq->vring.num, 0)) { + LOG_ERROR("vioif_output: too many packets at once\n"); + return ERR_IF; + } + +#if ETH_PAD_SIZE + pbuf_header(p, -ETH_PAD_SIZE); /* drop the padding word */ +#endif + + const size_t hdr_sz = sizeof(struct virtio_net_hdr); + // NOTE: packet is fully checksummed => all flags are set to zero + memset((void*) (vq->virt_buffer + buffer_index * VIOIF_BUFFER_SIZE), 0x00, hdr_sz); + + vq->vring.desc[buffer_index].addr = vq->phys_buffer + buffer_index * VIOIF_BUFFER_SIZE; + vq->vring.desc[buffer_index].len = p->tot_len + hdr_sz; + vq->vring.desc[buffer_index].flags = 0; + // we send only one buffer because it is large enough for our packet + vq->vring.desc[buffer_index].next = 0; //(buffer_index+1) % vq->vring.num; + + + /* + * q traverses through linked list of pbuf's + * This list MUST consist of a single packet ONLY + */ + for (q = p, i = 0; q != 0; q = q->next) { + memcpy((void*) (vq->virt_buffer + hdr_sz + buffer_index * VIOIF_BUFFER_SIZE + i), q->payload, q->len); + i += q->len; + } + + // Add it in the available ring + uint16_t index = vq->vring.avail->idx % vq->vring.num; + vq->vring.avail->ring[index] = buffer_index; + + // besure that everything is written + mb(); + + vq->vring.avail->idx++; + + // besure that everything is written + mb(); + + /* + * Notify the changes + * NOTE: RX queue is 0, TX queue is 1 - Virtio Std. §5.1.2 + */ + outportw(vioif->iobase+VIRTIO_PCI_QUEUE_NOTIFY, TX_NUM); + +#if ETH_PAD_SIZE + pbuf_header(p, ETH_PAD_SIZE); /* reclaim the padding word */ +#endif + + LINK_STATS_INC(link.xmit); + + return ERR_OK; +} + +static void vioif_rx_inthandler(struct netif* netif) +{ + vioif_t* vioif = mynetif->state; + virt_queue_t* vq = &vioif->queues[RX_NUM]; + + while(vq->last_seen_used != vq->vring.used->idx) + { + const size_t hdr_sz = sizeof(struct virtio_net_hdr); + struct vring_used_elem* used = &vq->vring.used->ring[vq->last_seen_used % vq->vring.num]; + struct virtio_net_hdr* hdr = (struct virtio_net_hdr*) (vq->virt_buffer + used->id * VIOIF_BUFFER_SIZE); + + LOG_DEBUG("vq->vring.used->idx %d, vq->vring.used->flags %d, vq->last_seen_used %d\n", vq->vring.used->idx, vq->vring.used->flags, vq->last_seen_used); + LOG_DEBUG("used id %d, len %d\n", used->id, used->len); + LOG_DEBUG("hdr len %d, flags %d\n", hdr->hdr_len, hdr->flags); + + struct pbuf* p = pbuf_alloc(PBUF_RAW, used->len, PBUF_POOL); + if (p) { + uint16_t pos; + struct pbuf* q; + +#if ETH_PAD_SIZE + pbuf_header(p, -ETH_PAD_SIZE); /* drop the padding word */ +#endif + for(q=p, pos=0; q!=NULL; q=q->next) { + memcpy((uint8_t*) q->payload, + (uint8_t*) (vq->virt_buffer + hdr_sz + used->id * VIOIF_BUFFER_SIZE + pos), + q->len); + pos += q->len; + } +#if ETH_PAD_SIZE + pbuf_header(p, ETH_PAD_SIZE); /* reclaim the padding word */ +#endif + LINK_STATS_INC(link.recv); + + // forward packet to LwIP + netif->input(p, netif); + } else { + LOG_ERROR("vioif_rx_inthandler: not enough memory!\n"); + LINK_STATS_INC(link.memerr); + LINK_STATS_INC(link.drop); + goto oom; + } + + vq->vring.avail->ring[vq->vring.avail->idx % vq->vring.num] = used->id; + vq->vring.avail->idx++; + vq->last_seen_used++; + } + +oom: + vioif->polling = 0; + vioif_enable_interrupts(vq); + mb(); +} + + +/* this function is called in the context of the tcpip thread or the irq handler (by using NO_SYS) */ +static void vioif_poll(void* ctx) +{ + vioif_rx_inthandler(mynetif); +} + +static void vioif_handler(struct state* s) +{ + vioif_t* vioif = mynetif->state; + + LOG_DEBUG("vioif: receive interrupt\n"); + + // reset interrupt by reading the isr port + uint8_t isr = inportb(vioif->iobase+VIRTIO_PCI_ISR); + + // do we receiven an interrupt for this device? + if (!(isr & 0x01)) + return; + + // free TX queue + virt_queue_t* vq = &vioif->queues[1]; + + vioif_disable_interrupts(vq); + while(vq->last_seen_used != vq->vring.used->idx) + { + struct vring_used_elem* used = &vq->vring.used->ring[vq->last_seen_used % vq->vring.num]; + LOG_DEBUG("consumed TX elements: index %u, len %u\n", used->id, used->len); + // mark as free + vq->vring.desc[used->id].len = 0; + vq->last_seen_used++; + } + vioif_enable_interrupts(vq); + mb(); + + // check RX qeueue + vq = &vioif->queues[0]; + vioif_disable_interrupts(vq); + if (!vioif->polling && (vq->last_seen_used != vq->vring.used->idx)) + { +#if NO_SYS + vioif_poll(NULL); +#else + if (tcpip_callback_with_block(vioif_poll, NULL, 0) == ERR_OK) { + vioif->polling = 1; + } else { + LOG_ERROR("rtl8139if_handler: unable to send a poll request to the tcpip thread\n"); + } +#endif + } else vioif_enable_interrupts(vq); + mb(); +} + +static int vioif_queue_setup(vioif_t* dev) +{ + virt_queue_t* vq; + uint32_t total_size; + unsigned int num; + + for (uint32_t index=0; indexqueues[index]; + + memset(vq, 0x00, sizeof(virt_queue_t)); + + // determine queue size + outportw(dev->iobase+VIRTIO_PCI_QUEUE_SEL, index); + num = inportw(dev->iobase+VIRTIO_PCI_QUEUE_NUM); + if (!num) return -1; + + LOG_INFO("vioif: queue_size %u (index %u)\n", num, index); + + total_size = vring_size(num, PAGE_SIZE); + + // allocate and init memory for the virtual queue + void* vring_base = page_alloc(total_size, VMA_READ|VMA_WRITE|VMA_CACHEABLE); + if (BUILTIN_EXPECT(!vring_base, 0)) { + LOG_INFO("Not enough memory to create queue %u\n", index); + return -1; + } + memset((void*)vring_base, 0x00, total_size); + vring_init(&vq->vring, num, vring_base, PAGE_SIZE); + + if (num > QUEUE_LIMIT) { + vq->vring.num = num = QUEUE_LIMIT; + LOG_INFO("vioif: set queue limit to %u (index %u)\n", vq->vring.num, index); + } + + vq->virt_buffer = (uint64_t) page_alloc(num*VIOIF_BUFFER_SIZE, VMA_READ|VMA_WRITE|VMA_CACHEABLE); + if (BUILTIN_EXPECT(!vq->virt_buffer, 0)) { + LOG_INFO("Not enough memory to create buffer %u\n", index); + return -1; + } + vq->phys_buffer = virt_to_phys(vq->virt_buffer); + + for(int i=0; ivring.desc[i].addr = vq->phys_buffer + i * VIOIF_BUFFER_SIZE; + if (index == RX_NUM) { + /* NOTE: RX queue is 0, TX queue is 1 - Virtio Std. §5.1.2 */ + vq->vring.desc[i].len = VIOIF_BUFFER_SIZE; + vq->vring.desc[i].flags = VRING_DESC_F_WRITE; + vq->vring.avail->ring[vq->vring.avail->idx % num] = i; + vq->vring.avail->idx++; + } + } + + // register buffer + outportw(dev->iobase+VIRTIO_PCI_QUEUE_SEL, index); + outportl(dev->iobase+VIRTIO_PCI_QUEUE_PFN, virt_to_phys((size_t) vring_base) >> PAGE_BITS); + } + + return 0; +} + +err_t vioif_init(struct netif* netif) +{ + static uint8_t num = 0; + vioif_t* vioif; + pci_info_t pci_info; + int i; + + LWIP_ASSERT("netif != NULL", (netif != NULL)); + + for(i=0x100; i<=0x103F; i++) { + if ((pci_get_device_info(VENDOR_ID, i, 1, &pci_info, 1) == 0)) { + LOG_INFO("Found vioif (Vendor ID 0x%x, Device Id 0x%x)\n", VENDOR_ID, i); + break; + } + } + + if (i > 0x103F) + return ERR_ARG; + + vioif = kmalloc(sizeof(vioif_t)); + if (!vioif) { + LOG_ERROR("virtioif_init: out of memory\n"); + return ERR_MEM; + } + memset(vioif, 0x00, sizeof(vioif_t)); + + vioif->iomem = pci_info.base[1]; + vioif->iobase = pci_info.base[0]; + vioif->irq = pci_info.irq; + LOG_INFO("vioif uses IRQ %d and IO port 0x%x, IO men 0x%x\n", (int32_t) vioif->irq, vioif->iobase, vioif->iomem); + + // reset interface + outportb(vioif->iobase + VIRTIO_PCI_STATUS, 0); + LOG_INFO("vioif status: 0x%x\n", (uint32_t) inportb(vioif->iobase + VIRTIO_PCI_STATUS)); + + // tell the device that we have noticed it + outportb(vioif->iobase + VIRTIO_PCI_STATUS, VIRTIO_CONFIG_S_ACKNOWLEDGE); + // tell the device that we will support it. + outportb(vioif->iobase + VIRTIO_PCI_STATUS, VIRTIO_CONFIG_S_ACKNOWLEDGE|VIRTIO_CONFIG_S_DRIVER); + + LOG_INFO("host features 0x%x\n", inportl(vioif->iobase + VIRTIO_PCI_HOST_FEATURES)); + + uint32_t features = inportl(vioif->iobase + VIRTIO_PCI_HOST_FEATURES); + uint32_t required = (1UL << VIRTIO_NET_F_MAC) | (1UL << VIRTIO_NET_F_STATUS); + + if ((features & required) != required) { + LOG_ERROR("Host isn't able to fulfill HermireCore's requirements\n"); + outportb(vioif->iobase + VIRTIO_PCI_STATUS, VIRTIO_CONFIG_S_FAILED); + kfree(vioif); + return ERR_ARG; + } + + required = features; + required &= ~(1UL << VIRTIO_NET_F_CTRL_VQ); + required &= ~(1UL << VIRTIO_NET_F_GUEST_TSO4); + required &= ~(1UL << VIRTIO_NET_F_GUEST_TSO6); + required &= ~(1UL << VIRTIO_NET_F_GUEST_UFO); + required &= ~(1UL << VIRTIO_RING_F_EVENT_IDX); + required &= ~(1UL << VIRTIO_NET_F_MRG_RXBUF); + required &= ~(1UL << VIRTIO_NET_F_MQ); + + LOG_INFO("wanted guest features 0x%x\n", required); + outportl(vioif->iobase + VIRTIO_PCI_GUEST_FEATURES, required); + vioif->features = inportl(vioif->iobase + VIRTIO_PCI_GUEST_FEATURES); + LOG_INFO("current guest features 0x%x\n", vioif->features); + + // tell the device that the features are OK + outportb(vioif->iobase + VIRTIO_PCI_STATUS, VIRTIO_CONFIG_S_ACKNOWLEDGE|VIRTIO_CONFIG_S_DRIVER|VIRTIO_CONFIG_S_FEATURES_OK); + + // check if the host accept these features + uint8_t status = inportb(vioif->iobase + VIRTIO_PCI_STATUS); + if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) { + LOG_ERROR("device features are ignored: status 0x%x\n", (uint32_t) status); + outportb(vioif->iobase + VIRTIO_PCI_STATUS, VIRTIO_CONFIG_S_FAILED); + kfree(vioif); + return ERR_ARG; + } + + /* hardware address length */ + netif->hwaddr_len = ETHARP_HWADDR_LEN; + + // determine the mac address of this card + LWIP_DEBUGF(NETIF_DEBUG, ("vioif_init: MAC address ")); + for (uint8_t tmp8=0; tmp8hwaddr[tmp8] = inportb(vioif->iobase + VIRTIO_PCI_CONFIG_OFF(vioif->msix_enabled) + tmp8); + LWIP_DEBUGF(NETIF_DEBUG, ("%02x ", netif->hwaddr[tmp8])); + } + LWIP_DEBUGF(NETIF_DEBUG, ("\n")); + + // Setup virt queues + if (BUILTIN_EXPECT(vioif_queue_setup(vioif) < 0, 0)) { + outportb(vioif->iobase + VIRTIO_PCI_STATUS, VIRTIO_CONFIG_S_FAILED); + kfree(vioif); + return ERR_ARG; + } + + netif->state = vioif; + mynetif = netif; + + irq_install_handler(vioif->irq+32, vioif_handler); + + /* + * Initialize the snmp variables and counters inside the struct netif. + * The last argument should be replaced with your link speed, in units + * of bits per second. + */ + NETIF_INIT_SNMP(netif, snmp_ifType_ethernet_csmacd, 1000); + + /* administrative details */ + netif->name[0] = 'e'; + netif->name[1] = 'n'; + netif->num = num; + num++; + /* downward functions */ + netif->output = etharp_output; + netif->linkoutput = vioif_output; + /* set maximum transfer unit + * Google Compute Platform supports only a MTU of 1460 + */ + netif->mtu = 1460; + /* broadcast capability */ + netif->flags |= NETIF_FLAG_BROADCAST | NETIF_FLAG_ETHARP | NETIF_FLAG_IGMP | NETIF_FLAG_LINK_UP | NETIF_FLAG_MLD6; +#if LWIP_IPV6 + netif->output_ip6 = ethip6_output; + netif_create_ip6_linklocal_address(netif, 1); + netif->ip6_autoconfig_enabled = 1; +#endif + + // tell the device that the drivers is initialized + outportb(vioif->iobase + VIRTIO_PCI_STATUS, VIRTIO_CONFIG_S_ACKNOWLEDGE|VIRTIO_CONFIG_S_DRIVER|VIRTIO_CONFIG_S_DRIVER_OK|VIRTIO_CONFIG_S_FEATURES_OK); + + LOG_INFO("vioif status: 0x%x\n", (uint32_t) inportb(vioif->iobase + VIRTIO_PCI_STATUS)); + LOG_INFO("vioif link is %s\n", + inportl(vioif->iobase + VIRTIO_PCI_CONFIG_OFF(vioif->msix_enabled) + ETHARP_HWADDR_LEN) & VIRTIO_NET_S_LINK_UP ? "up" : "down"); + + return ERR_OK; +} diff --git a/drivers/net/vioif.h b/drivers/net/vioif.h new file mode 100644 index 000000000..20dc9e82d --- /dev/null +++ b/drivers/net/vioif.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2017, Stefan Lankes, RWTH Aachen University + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __NET_VIOIF_H__ +#define __NET_VIOIF_H__ + +#include +#include + +#define VIOIF_NUM_QUEUES 2 + +typedef struct +{ + struct vring vring; + uint64_t virt_buffer; + uint64_t phys_buffer; + uint16_t last_seen_used; +} virt_queue_t; + +/* + * Helper struct to hold private data used to operate your ethernet interface. + */ +typedef struct vioif { + struct eth_addr *ethaddr; + /* Add whatever per-interface state that is needed here. */ + uint32_t iomem; + uint32_t iobase; + uint32_t features; + uint8_t msix_enabled; + uint8_t irq; + uint8_t polling; + virt_queue_t queues[VIOIF_NUM_QUEUES]; +} vioif_t; + +/* + * Initialize the network driver for the virtio network interface + */ +err_t vioif_init(struct netif* netif); + +#endif diff --git a/include/hermit/malloc.h b/include/hermit/malloc.h index 8537078f0..fbdf4fa98 100644 --- a/include/hermit/malloc.h +++ b/include/hermit/malloc.h @@ -54,7 +54,7 @@ union buddy; /** @brief Buddy * * Every free memory block is stored in a linked list according to its size. - * We can use this free memory to store store this buddy_t union which represents + * We can use this free memory to store this buddy_t union which represents * this block (the buddy_t union is alligned to the front). * Therefore the address of the buddy_t union is equal with the address * of the underlying free memory block. @@ -71,8 +71,6 @@ typedef union buddy { uint8_t exponent; /// Must be equal to BUDDY_MAGIC for a valid memory block uint16_t magic; - /// padding to gurantee a sizeof 32Byte - uint8_t padding[28]; } prefix; } buddy_t; @@ -84,4 +82,3 @@ void buddy_dump(void); #endif #endif - diff --git a/include/hermit/spinlock.h b/include/hermit/spinlock.h index fbfba6519..cd44b1116 100644 --- a/include/hermit/spinlock.h +++ b/include/hermit/spinlock.h @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -59,8 +60,8 @@ inline static int spinlock_init(spinlock_t* s) { if (BUILTIN_EXPECT(!s, 0)) return -EINVAL; - atomic_int32_set(&s->queue, 0); - atomic_int32_set(&s->dequeue, 1); + atomic_int64_set(&s->queue, 0); + atomic_int64_set(&s->dequeue, 1); s->owner = MAX_TASKS; s->counter = 0; @@ -68,7 +69,7 @@ inline static int spinlock_init(spinlock_t* s) { } /** @brief Destroy spinlock after use - * @return + * @return * - 0 on success * - -EINVAL (-22) on failure */ @@ -82,13 +83,13 @@ inline static int spinlock_destroy(spinlock_t* s) { return 0; } -/** @brief Lock spinlock at entry of critical section +/** @brief Lock spinlock at entry of critical section * @return * - 0 on success * - -EINVAL (-22) on failure */ inline static int spinlock_lock(spinlock_t* s) { - int32_t ticket; + int64_t ticket; task_t* curr_task; if (BUILTIN_EXPECT(!s, 0)) @@ -100,22 +101,18 @@ inline static int spinlock_lock(spinlock_t* s) { return 0; } -#if 1 - ticket = atomic_int32_inc(&s->queue); - while(atomic_int32_read(&s->dequeue) != ticket) { + ticket = atomic_int64_inc(&s->queue); + while(atomic_int64_read(&s->dequeue) != ticket) { PAUSE; } s->owner = curr_task->id; s->counter = 1; -#else - while( atomic_int32_test_and_set(&s->dequeue,0) ); -#endif return 0; } -/** @brief Unlock spinlock on exit of critical section - * @return +/** @brief Unlock spinlock on exit of critical section + * @return * - 0 on success * - -EINVAL (-22) on failure */ @@ -126,11 +123,7 @@ inline static int spinlock_unlock(spinlock_t* s) { s->counter--; if (!s->counter) { s->owner = MAX_TASKS; -#if 1 - atomic_int32_inc(&s->dequeue); -#else - atomic_int32_set(&s->dequeue,1); -#endif + atomic_int64_inc(&s->dequeue); } return 0; @@ -140,7 +133,7 @@ inline static int spinlock_unlock(spinlock_t* s) { * * Initialize each irqsave spinlock before use! * - * @return + * @return * - 0 on success * - -EINVAL (-22) on failure */ @@ -148,8 +141,8 @@ inline static int spinlock_irqsave_init(spinlock_irqsave_t* s) { if (BUILTIN_EXPECT(!s, 0)) return -EINVAL; - atomic_int32_set(&s->queue, 0); - atomic_int32_set(&s->dequeue, 1); + atomic_int64_set(&s->queue, 0); + atomic_int64_set(&s->dequeue, 1); s->flags = 0; s->coreid = (uint32_t)-1; s->counter = 0; @@ -158,7 +151,7 @@ inline static int spinlock_irqsave_init(spinlock_irqsave_t* s) { } /** @brief Destroy irqsave spinlock after use - * @return + * @return * - 0 on success * - -EINVAL (-22) on failure */ @@ -174,13 +167,13 @@ inline static int spinlock_irqsave_destroy(spinlock_irqsave_t* s) { } /** @brief Lock spinlock on entry of critical section and disable interrupts - * @return + * @return * - 0 on success * - -EINVAL (-22) on failure */ inline static int spinlock_irqsave_lock(spinlock_irqsave_t* s) { + int64_t ticket; uint8_t flags; - int32_t ticket; if (BUILTIN_EXPECT(!s, 0)) return -EINVAL; @@ -191,8 +184,8 @@ inline static int spinlock_irqsave_lock(spinlock_irqsave_t* s) { return 0; } - ticket = atomic_int32_inc(&s->queue); - while (atomic_int32_read(&s->dequeue) != ticket) { + ticket = atomic_int64_inc(&s->queue); + while (atomic_int64_read(&s->dequeue) != ticket) { PAUSE; } @@ -204,7 +197,7 @@ inline static int spinlock_irqsave_lock(spinlock_irqsave_t* s) { } /** @brief Unlock spinlock on exit of critical section and re-enable interrupts - * @return + * @return * - 0 on success * - -EINVAL (-22) on failure */ @@ -220,7 +213,7 @@ inline static int spinlock_irqsave_unlock(spinlock_irqsave_t* s) { s->coreid = (uint32_t) -1; s->flags = 0; - atomic_int32_inc(&s->dequeue); + atomic_int64_inc(&s->dequeue); irq_nested_enable(flags); } diff --git a/include/hermit/spinlock_types.h b/include/hermit/spinlock_types.h index ba61a6a2b..dd7873fe1 100644 --- a/include/hermit/spinlock_types.h +++ b/include/hermit/spinlock_types.h @@ -44,9 +44,9 @@ extern "C" { /** @brief Spinlock structure */ typedef struct spinlock { /// Internal queue - atomic_int32_t queue; - /// Internal dequeue - atomic_int32_t dequeue; + atomic_int64_t queue; + /// Internal dequeue + atomic_int64_t dequeue; /// Owner of this spinlock structure tid_t owner; /// Internal counter var @@ -55,9 +55,9 @@ typedef struct spinlock { typedef struct spinlock_irqsave { /// Internal queue - atomic_int32_t queue; + atomic_int64_t queue; /// Internal dequeue - atomic_int32_t dequeue; + atomic_int64_t dequeue; /// Core Id of the lock owner uint32_t coreid; /// Internal counter var diff --git a/include/hermit/stddef.h b/include/hermit/stddef.h index d62cab7a1..818ad5fc8 100644 --- a/include/hermit/stddef.h +++ b/include/hermit/stddef.h @@ -48,7 +48,7 @@ extern const size_t image_size; #define TIMER_FREQ 100 /* in HZ */ #define CLOCK_TICK_RATE 1193182 /* 8254 chip's internal oscillator frequency */ #define CACHE_LINE 64 -#define HEAP_START (PAGE_2M_FLOOR((size_t)&kernel_start + image_size) + 4*PAGE_SIZE) +#define HEAP_START (PAGE_2M_CEIL((size_t)&kernel_start + image_size) + 4*PAGE_SIZE) #define HEAP_SIZE (1ULL << 32) #define KMSG_SIZE 0x1000 #define INT_SYSCALL 0x80 @@ -83,9 +83,6 @@ typedef unsigned int tid_t; struct task; DECLARE_PER_CORE(struct task*, current_task); -/* allows fast access to the kernel stack */ -DECLARE_PER_CORE(char*, kernel_stack); - #if MAX_CORES > 1 /* allows fast access to the core id */ DECLARE_PER_CORE(uint32_t, __core_id); diff --git a/include/hermit/tasks.h b/include/hermit/tasks.h index c32a120b9..9842bd7aa 100644 --- a/include/hermit/tasks.h +++ b/include/hermit/tasks.h @@ -166,6 +166,14 @@ void reschedule(void); */ int wakeup_task(tid_t); +/** @brief Wake up a core_id + * + * Wakeup core to be sure that + * the core isn't in halt state + * + * @param core_id Specifies the core + */ +void wakeup_core(uint32_t core_id); /** @brief Block current task * diff --git a/include/hermit/virtio_config.h b/include/hermit/virtio_config.h new file mode 100644 index 000000000..203ed7878 --- /dev/null +++ b/include/hermit/virtio_config.h @@ -0,0 +1,64 @@ +#ifndef __VIRTIO_CONFIG_H +#define __VIRTIO_CONFIG_H +/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so + * anyone can use the definitions to implement compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ + +/* Virtio devices use a standardized configuration space to define their + * features and pass configuration information, but each implementation can + * store and access that space differently. */ +#include + +/* Status byte for guest to report progress, and synchronize features. */ +/* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */ +#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1 +/* We have found a driver for the device. */ +#define VIRTIO_CONFIG_S_DRIVER 2 +/* Driver has used its parts of the config, and is happy */ +#define VIRTIO_CONFIG_S_DRIVER_OK 4 +/* Driver has finished configuring features */ +#define VIRTIO_CONFIG_S_FEATURES_OK 8 +/* We've given up on this device. */ +#define VIRTIO_CONFIG_S_FAILED 0x80 + +/* Some virtio feature bits (currently bits 28 through 32) are reserved for the + * transport being used (eg. virtio_ring), the rest are per-device feature + * bits. */ +#define VIRTIO_TRANSPORT_F_START 28 +#define VIRTIO_TRANSPORT_F_END 33 + +#ifndef VIRTIO_CONFIG_NO_LEGACY +/* Do we get callbacks when the ring is completely used, even if we've + * suppressed them? */ +#define VIRTIO_F_NOTIFY_ON_EMPTY 24 + +/* Can the device handle any descriptor layout? */ +#define VIRTIO_F_ANY_LAYOUT 27 +#endif /* VIRTIO_CONFIG_NO_LEGACY */ + +/* v1.0 compliant. */ +#define VIRTIO_F_VERSION_1 32 + +#endif /* _LINUX_VIRTIO_CONFIG_H */ diff --git a/include/hermit/virtio_ids.h b/include/hermit/virtio_ids.h new file mode 100644 index 000000000..4ff9b038c --- /dev/null +++ b/include/hermit/virtio_ids.h @@ -0,0 +1,45 @@ +#ifndef __VIRTIO_IDS_H +#define __VIRTIO_IDS_H +/* + * Virtio IDs + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ + +#define VIRTIO_ID_NET 1 /* virtio net */ +#define VIRTIO_ID_BLOCK 2 /* virtio block */ +#define VIRTIO_ID_CONSOLE 3 /* virtio console */ +#define VIRTIO_ID_RNG 4 /* virtio rng */ +#define VIRTIO_ID_BALLOON 5 /* virtio balloon */ +#define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */ +#define VIRTIO_ID_SCSI 8 /* virtio scsi */ +#define VIRTIO_ID_9P 9 /* 9p virtio console */ +#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ +#define VIRTIO_ID_CAIF 12 /* Virtio caif */ +#define VIRTIO_ID_GPU 16 /* virtio GPU */ +#define VIRTIO_ID_INPUT 18 /* virtio input */ + +#endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/hermit/virtio_net.h b/include/hermit/virtio_net.h new file mode 100644 index 000000000..21d2cc52c --- /dev/null +++ b/include/hermit/virtio_net.h @@ -0,0 +1,245 @@ +#ifndef __VIRTIO_NET_H +#define __VIRTIO_NET_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ +#include +#include +#include +#include +#include + +/* The feature bitmap for virtio net */ +#define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */ +#define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */ +#define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Dynamic offload configuration. */ +#define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */ +#define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */ +#define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */ +#define VIRTIO_NET_F_GUEST_ECN 9 /* Guest can handle TSO[6] w/ ECN in. */ +#define VIRTIO_NET_F_GUEST_UFO 10 /* Guest can handle UFO in. */ +#define VIRTIO_NET_F_HOST_TSO4 11 /* Host can handle TSOv4 in. */ +#define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */ +#define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */ +#define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */ +#define VIRTIO_NET_F_MRG_RXBUF 15 /* Host can merge receive buffers. */ +#define VIRTIO_NET_F_STATUS 16 /* virtio_net_config.status available */ +#define VIRTIO_NET_F_CTRL_VQ 17 /* Control channel available */ +#define VIRTIO_NET_F_CTRL_RX 18 /* Control channel RX mode support */ +#define VIRTIO_NET_F_CTRL_VLAN 19 /* Control channel VLAN filtering */ +#define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */ +#define VIRTIO_NET_F_GUEST_ANNOUNCE 21 /* Guest can announce device on the + * network */ +#define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow + * Steering */ +#define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ + +#ifndef VIRTIO_NET_NO_LEGACY +#define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */ +#endif /* VIRTIO_NET_NO_LEGACY */ + +#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ +#define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ + +struct virtio_net_config { + /* The config defining mac address (if VIRTIO_NET_F_MAC) */ + __u8 mac[ETHARP_HWADDR_LEN]; + /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */ + __u16 status; + /* Maximum number of each of transmit and receive queues; + * see VIRTIO_NET_F_MQ and VIRTIO_NET_CTRL_MQ. + * Legal values are between 1 and 0x8000 + */ + __u16 max_virtqueue_pairs; +} __attribute__((packed)); + +/* + * This header comes first in the scatter-gather list. If you don't + * specify GSO or CSUM features, you can simply ignore the header. + * + * This is bitwise-equivalent to the legacy struct virtio_net_hdr_mrg_rxbuf, + * only flattened. + */ +struct virtio_net_hdr_v1 { +#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */ +#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ + __u8 flags; +#define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */ +#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ +#define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */ +#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */ +#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */ + __u8 gso_type; + __virtio16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */ + __virtio16 gso_size; /* Bytes to append to hdr_len per frame */ + __virtio16 csum_start; /* Position to start checksumming from */ + __virtio16 csum_offset; /* Offset after that to place checksum */ + __virtio16 num_buffers; /* Number of merged rx buffers */ +}; + +#ifndef VIRTIO_NET_NO_LEGACY +/* This header comes first in the scatter-gather list. + * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must + * be the first element of the scatter-gather list. If you don't + * specify GSO or CSUM features, you can simply ignore the header. */ +struct virtio_net_hdr { + /* See VIRTIO_NET_HDR_F_* */ + __u8 flags; + /* See VIRTIO_NET_HDR_GSO_* */ + __u8 gso_type; + __virtio16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */ + __virtio16 gso_size; /* Bytes to append to hdr_len per frame */ + __virtio16 csum_start; /* Position to start checksumming from */ + __virtio16 csum_offset; /* Offset after that to place checksum */ +}; + +/* This is the version of the header to use when the MRG_RXBUF + * feature has been negotiated. */ +struct virtio_net_hdr_mrg_rxbuf { + struct virtio_net_hdr hdr; + __virtio16 num_buffers; /* Number of merged rx buffers */ +}; +#endif /* ...VIRTIO_NET_NO_LEGACY */ + +/* + * Control virtqueue data structures + * + * The control virtqueue expects a header in the first sg entry + * and an ack/status response in the last entry. Data for the + * command goes in between. + */ +struct virtio_net_ctrl_hdr { + __u8 class; + __u8 cmd; +} __attribute__((packed)); + +typedef __u8 virtio_net_ctrl_ack; + +#define VIRTIO_NET_OK 0 +#define VIRTIO_NET_ERR 1 + +/* + * Control the RX mode, ie. promisucous, allmulti, etc... + * All commands require an "out" sg entry containing a 1 byte + * state value, zero = disable, non-zero = enable. Commands + * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature. + * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA. + */ +#define VIRTIO_NET_CTRL_RX 0 + #define VIRTIO_NET_CTRL_RX_PROMISC 0 + #define VIRTIO_NET_CTRL_RX_ALLMULTI 1 + #define VIRTIO_NET_CTRL_RX_ALLUNI 2 + #define VIRTIO_NET_CTRL_RX_NOMULTI 3 + #define VIRTIO_NET_CTRL_RX_NOUNI 4 + #define VIRTIO_NET_CTRL_RX_NOBCAST 5 + +/* + * Control the MAC + * + * The MAC filter table is managed by the hypervisor, the guest should + * assume the size is infinite. Filtering should be considered + * non-perfect, ie. based on hypervisor resources, the guest may + * received packets from sources not specified in the filter list. + * + * In addition to the class/cmd header, the TABLE_SET command requires + * two out scatterlists. Each contains a 4 byte count of entries followed + * by a concatenated byte stream of the ETH_ALEN MAC addresses. The + * first sg list contains unicast addresses, the second is for multicast. + * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature + * is available. + * + * The ADDR_SET command requests one out scatterlist, it contains a + * 6 bytes MAC address. This functionality is present if the + * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available. + */ +struct virtio_net_ctrl_mac { + __virtio32 entries; + __u8 macs[][ETHARP_HWADDR_LEN]; +} __attribute__((packed)); + +#define VIRTIO_NET_CTRL_MAC 1 + #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 + #define VIRTIO_NET_CTRL_MAC_ADDR_SET 1 + +/* + * Control VLAN filtering + * + * The VLAN filter table is controlled via a simple ADD/DEL interface. + * VLAN IDs not added may be filterd by the hypervisor. Del is the + * opposite of add. Both commands expect an out entry containing a 2 + * byte VLAN ID. VLAN filterting is available with the + * VIRTIO_NET_F_CTRL_VLAN feature bit. + */ +#define VIRTIO_NET_CTRL_VLAN 2 + #define VIRTIO_NET_CTRL_VLAN_ADD 0 + #define VIRTIO_NET_CTRL_VLAN_DEL 1 + +/* + * Control link announce acknowledgement + * + * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that + * driver has recevied the notification; device would clear the + * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives + * this command. + */ +#define VIRTIO_NET_CTRL_ANNOUNCE 3 + #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0 + +/* + * Control Receive Flow Steering + * + * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET + * enables Receive Flow Steering, specifying the number of the transmit and + * receive queues that will be used. After the command is consumed and acked by + * the device, the device will not steer new packets on receive virtqueues + * other than specified nor read from transmit virtqueues other than specified. + * Accordingly, driver should not transmit new packets on virtqueues other than + * specified. + */ +struct virtio_net_ctrl_mq { + __virtio16 virtqueue_pairs; +}; + +#define VIRTIO_NET_CTRL_MQ 4 + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 + +/* + * Control network offloads + * + * Reconfigures the network offloads that Guest can handle. + * + * Available with the VIRTIO_NET_F_CTRL_GUEST_OFFLOADS feature bit. + * + * Command data format matches the feature bit mask exactly. + * + * See VIRTIO_NET_F_GUEST_* for the list of offloads + * that can be enabled/disabled. + */ +#define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 +#define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 + +#endif /* _LINUX_VIRTIO_NET_H */ diff --git a/include/hermit/virtio_pci.h b/include/hermit/virtio_pci.h new file mode 100644 index 000000000..a210bb328 --- /dev/null +++ b/include/hermit/virtio_pci.h @@ -0,0 +1,200 @@ +/* + * Virtio PCI driver + * + * This module allows virtio devices to be used over a virtual PCI device. + * This can be used with QEMU based VMMs like KVM or Xen. + * + * Copyright IBM Corp. 2007 + * + * Authors: + * Anthony Liguori + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef __VIRTIO_PCI_H +#define __VIRTIO_PCI_Hq + +#include +#include + +#ifndef VIRTIO_PCI_NO_LEGACY + +/* A 32-bit r/o bitmask of the features supported by the host */ +#define VIRTIO_PCI_HOST_FEATURES 0 + +/* A 32-bit r/w bitmask of features activated by the guest */ +#define VIRTIO_PCI_GUEST_FEATURES 4 + +/* A 32-bit r/w PFN for the currently selected queue */ +#define VIRTIO_PCI_QUEUE_PFN 8 + +/* A 16-bit r/o queue size for the currently selected queue */ +#define VIRTIO_PCI_QUEUE_NUM 12 + +/* A 16-bit r/w queue selector */ +#define VIRTIO_PCI_QUEUE_SEL 14 + +/* A 16-bit r/w queue notifier */ +#define VIRTIO_PCI_QUEUE_NOTIFY 16 + +/* An 8-bit device status register. */ +#define VIRTIO_PCI_STATUS 18 + +/* An 8-bit r/o interrupt status register. Reading the value will return the + * current contents of the ISR and will also clear it. This is effectively + * a read-and-acknowledge. */ +#define VIRTIO_PCI_ISR 19 + +/* MSI-X registers: only enabled if MSI-X is enabled. */ +/* A 16-bit vector for configuration changes. */ +#define VIRTIO_MSI_CONFIG_VECTOR 20 +/* A 16-bit vector for selected queue notifications. */ +#define VIRTIO_MSI_QUEUE_VECTOR 22 + +/* The remaining space is defined by each driver as the per-driver + * configuration space */ +#define VIRTIO_PCI_CONFIG_OFF(msix_enabled) ((msix_enabled) ? 24 : 20) +/* Deprecated: please use VIRTIO_PCI_CONFIG_OFF instead */ +#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->msix_enabled) + +/* Virtio ABI version, this must match exactly */ +#define VIRTIO_PCI_ABI_VERSION 0 + +/* How many bits to shift physical queue address written to QUEUE_PFN. + * 12 is historical, and due to x86 page size. */ +#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 + +/* The alignment to use between consumer and producer parts of vring. + * x86 pagesize again. */ +#define VIRTIO_PCI_VRING_ALIGN 4096 + +#endif /* VIRTIO_PCI_NO_LEGACY */ + +/* The bit of the ISR which indicates a device configuration change. */ +#define VIRTIO_PCI_ISR_CONFIG 0x2 +/* Vector value used to disable MSI for queue */ +#define VIRTIO_MSI_NO_VECTOR 0xffff + +#ifndef VIRTIO_PCI_NO_MODERN + +/* IDs for different capabilities. Must all exist. */ + +/* Common configuration */ +#define VIRTIO_PCI_CAP_COMMON_CFG 1 +/* Notifications */ +#define VIRTIO_PCI_CAP_NOTIFY_CFG 2 +/* ISR access */ +#define VIRTIO_PCI_CAP_ISR_CFG 3 +/* Device specific configuration */ +#define VIRTIO_PCI_CAP_DEVICE_CFG 4 +/* PCI configuration access */ +#define VIRTIO_PCI_CAP_PCI_CFG 5 + +/* This is the PCI capability header: */ +struct virtio_pci_cap { + __u8 cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */ + __u8 cap_next; /* Generic PCI field: next ptr. */ + __u8 cap_len; /* Generic PCI field: capability length */ + __u8 cfg_type; /* Identifies the structure. */ + __u8 bar; /* Where to find it. */ + __u8 padding[3]; /* Pad to full dword. */ + __le32 offset; /* Offset within bar. */ + __le32 length; /* Length of the structure, in bytes. */ +}; + +struct virtio_pci_notify_cap { + struct virtio_pci_cap cap; + __le32 notify_off_multiplier; /* Multiplier for queue_notify_off. */ +}; + +/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */ +struct virtio_pci_common_cfg { + /* About the whole device. */ + __le32 device_feature_select; /* read-write */ + __le32 device_feature; /* read-only */ + __le32 guest_feature_select; /* read-write */ + __le32 guest_feature; /* read-write */ + __le16 msix_config; /* read-write */ + __le16 num_queues; /* read-only */ + __u8 device_status; /* read-write */ + __u8 config_generation; /* read-only */ + + /* About a specific virtqueue. */ + __le16 queue_select; /* read-write */ + __le16 queue_size; /* read-write, power of 2. */ + __le16 queue_msix_vector; /* read-write */ + __le16 queue_enable; /* read-write */ + __le16 queue_notify_off; /* read-only */ + __le32 queue_desc_lo; /* read-write */ + __le32 queue_desc_hi; /* read-write */ + __le32 queue_avail_lo; /* read-write */ + __le32 queue_avail_hi; /* read-write */ + __le32 queue_used_lo; /* read-write */ + __le32 queue_used_hi; /* read-write */ +}; + +/* Fields in VIRTIO_PCI_CAP_PCI_CFG: */ +struct virtio_pci_cfg_cap { + struct virtio_pci_cap cap; + __u8 pci_cfg_data[4]; /* Data for BAR access. */ +}; + +/* Macro versions of offsets for the Old Timers! */ +#define VIRTIO_PCI_CAP_VNDR 0 +#define VIRTIO_PCI_CAP_NEXT 1 +#define VIRTIO_PCI_CAP_LEN 2 +#define VIRTIO_PCI_CAP_CFG_TYPE 3 +#define VIRTIO_PCI_CAP_BAR 4 +#define VIRTIO_PCI_CAP_OFFSET 8 +#define VIRTIO_PCI_CAP_LENGTH 12 + +#define VIRTIO_PCI_NOTIFY_CAP_MULT 16 + +#define VIRTIO_PCI_COMMON_DFSELECT 0 +#define VIRTIO_PCI_COMMON_DF 4 +#define VIRTIO_PCI_COMMON_GFSELECT 8 +#define VIRTIO_PCI_COMMON_GF 12 +#define VIRTIO_PCI_COMMON_MSIX 16 +#define VIRTIO_PCI_COMMON_NUMQ 18 +#define VIRTIO_PCI_COMMON_STATUS 20 +#define VIRTIO_PCI_COMMON_CFGGENERATION 21 +#define VIRTIO_PCI_COMMON_Q_SELECT 22 +#define VIRTIO_PCI_COMMON_Q_SIZE 24 +#define VIRTIO_PCI_COMMON_Q_MSIX 26 +#define VIRTIO_PCI_COMMON_Q_ENABLE 28 +#define VIRTIO_PCI_COMMON_Q_NOFF 30 +#define VIRTIO_PCI_COMMON_Q_DESCLO 32 +#define VIRTIO_PCI_COMMON_Q_DESCHI 36 +#define VIRTIO_PCI_COMMON_Q_AVAILLO 40 +#define VIRTIO_PCI_COMMON_Q_AVAILHI 44 +#define VIRTIO_PCI_COMMON_Q_USEDLO 48 +#define VIRTIO_PCI_COMMON_Q_USEDHI 52 + +#endif /* VIRTIO_PCI_NO_MODERN */ + +#endif diff --git a/include/hermit/virtio_ring.h b/include/hermit/virtio_ring.h new file mode 100644 index 000000000..7680935df --- /dev/null +++ b/include/hermit/virtio_ring.h @@ -0,0 +1,171 @@ +#ifndef __VIRTIO_RING_H +#define __VIRTIO_RING_H +/* An interface for efficient virtio implementation, currently for use by KVM + * and lguest, but hopefully others soon. Do NOT change this since it will + * break existing servers and clients. + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright Rusty Russell IBM Corporation 2007. */ +#include +#include + +/* This marks a buffer as continuing via the next field. */ +#define VRING_DESC_F_NEXT 1 +/* This marks a buffer as write-only (otherwise read-only). */ +#define VRING_DESC_F_WRITE 2 +/* This means the buffer contains a list of buffer descriptors. */ +#define VRING_DESC_F_INDIRECT 4 + +/* The Host uses this in used->flags to advise the Guest: don't kick me when + * you add a buffer. It's unreliable, so it's simply an optimization. Guest + * will still kick if it's out of buffers. */ +#define VRING_USED_F_NO_NOTIFY 1 +/* The Guest uses this in avail->flags to advise the Host: don't interrupt me + * when you consume a buffer. It's unreliable, so it's simply an + * optimization. */ +#define VRING_AVAIL_F_NO_INTERRUPT 1 + +/* We support indirect buffer descriptors */ +#define VIRTIO_RING_F_INDIRECT_DESC 28 + +/* The Guest publishes the used index for which it expects an interrupt + * at the end of the avail ring. Host should ignore the avail->flags field. */ +/* The Host publishes the avail index for which it expects a kick + * at the end of the used ring. Guest should ignore the used->flags field. */ +#define VIRTIO_RING_F_EVENT_IDX 29 + +/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ +struct vring_desc { + /* Address (guest-physical). */ + __virtio64 addr; + /* Length. */ + __virtio32 len; + /* The flags as indicated above. */ + __virtio16 flags; + /* We chain unused descriptors via this, too */ + __virtio16 next; +}; + +struct vring_avail { + __virtio16 flags; + __virtio16 idx; + __virtio16 ring[]; +}; + +/* u32 is used here for ids for padding reasons. */ +struct vring_used_elem { + /* Index of start of used descriptor chain. */ + __virtio32 id; + /* Total length of the descriptor chain which was used (written to) */ + __virtio32 len; +}; + +struct vring_used { + __virtio16 flags; + __virtio16 idx; + struct vring_used_elem ring[]; +}; + +struct vring { + unsigned int num; + + struct vring_desc *desc; + + struct vring_avail *avail; + + struct vring_used *used; +}; + +/* Alignment requirements for vring elements. + * When using pre-virtio 1.0 layout, these fall out naturally. + */ +#define VRING_AVAIL_ALIGN_SIZE 2 +#define VRING_USED_ALIGN_SIZE 4 +#define VRING_DESC_ALIGN_SIZE 16 + +/* The standard layout for the ring is a continuous chunk of memory which looks + * like this. We assume num is a power of 2. + * + * struct vring + * { + * // The actual descriptors (16 bytes each) + * struct vring_desc desc[num]; + * + * // A ring of available descriptor heads with free-running index. + * __virtio16 avail_flags; + * __virtio16 avail_idx; + * __virtio16 available[num]; + * __virtio16 used_event_idx; + * + * // Padding to the next align boundary. + * char pad[]; + * + * // A ring of used descriptor heads with free-running index. + * __virtio16 used_flags; + * __virtio16 used_idx; + * struct vring_used_elem used[num]; + * __virtio16 avail_event_idx; + * }; + */ +/* We publish the used event index at the end of the available ring, and vice + * versa. They are at the end for backwards compatibility. */ +#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) +#define vring_avail_event(vr) (*(__virtio16 *)&(vr)->used->ring[(vr)->num]) + +static __inline__ void vring_init(struct vring *vr, unsigned int num, void *p, + unsigned long align) +{ + vr->num = num; + vr->desc = p; + vr->avail = p + num*sizeof(struct vring_desc); + vr->used = (void *)(((uintptr_t)&vr->avail->ring[num] + sizeof(__virtio16) + + align-1) & ~(align - 1)); +} + +static __inline__ unsigned vring_size(unsigned int num, unsigned long align) +{ + return ((sizeof(struct vring_desc) * num + sizeof(__virtio16) * (3 + num) + + align - 1) & ~(align - 1)) + + sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num; +} + +/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ +/* Assuming a given event_idx value from the other side, if + * we have just incremented index from old to new_idx, + * should we trigger an event? */ +static __inline__ int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old) +{ + /* Note: Xen has similar logic for notification hold-off + * in include/xen/interface/io/ring.h with req_event and req_prod + * corresponding to event_idx + 1 and new_idx respectively. + * Note also that req_event and req_prod in Xen start at 1, + * event indexes in virtio start at 0. */ + return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old); +} + +#endif /* _LINUX_VIRTIO_RING_H */ diff --git a/include/hermit/virtio_types.h b/include/hermit/virtio_types.h new file mode 100644 index 000000000..4792dea96 --- /dev/null +++ b/include/hermit/virtio_types.h @@ -0,0 +1,57 @@ +#ifndef __VIRTIO_TYPES_H +#define __VIRTIO_TYPES_H +/* Type definitions for virtio implementations. + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Copyright (C) 2014 Red Hat, Inc. + * Author: Michael S. Tsirkin + */ +#include + +/* + * __virtio{16,32,64} have the following meaning: + * - __u{16,32,64} for virtio devices in legacy mode, accessed in native endian + * - __le{16,32,64} for standard-compliant virtio devices + */ + +typedef uint8_t __u8; +typedef uint16_t __u16; +typedef uint32_t __u32; +typedef uint64_t __u64; +typedef size_t uintptr_t; + +#define __bitwise__ + +typedef __u16 __bitwise__ __virtio16; +typedef __u32 __bitwise__ __virtio32; +typedef __u64 __bitwise__ __virtio64; +typedef __u32 __bitwise__ __le32; +typedef __u16 __bitwise__ __le16; +typedef __u8 __bitwise__ __le8; + +#endif /* __VIRTIO_TYPES_H */ diff --git a/kernel/main.c b/kernel/main.c index 1b8d151be..d2fe61327 100644 --- a/kernel/main.c +++ b/kernel/main.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -58,6 +59,7 @@ #include #include #include +#include #define HERMIT_PORT 0x494E #define HERMIT_MAGIC 0x7E317 @@ -98,20 +100,6 @@ rcce_mpb_t* rcce_mpb = NULL; extern void signal_init(); -#if 0 -static int foo(void* arg) -{ - int i; - - for(i=0; i<5; i++) { - LOG_INFO("hello from %s\n", (char*) arg); - sleep(1); - } - - return 0; -} -#endif - static int hermit_init(void) { uint32_t i; @@ -172,11 +160,15 @@ static int init_netifs(void) LOG_INFO("TCP/IP initialized.\n"); sys_sem_free(&sem); - if (is_uhyve()) + if (is_uhyve()) { + LOG_INFO("HermitCore is running on uhyve!\n"); return -ENODEV; + } if (!is_single_kernel()) { + LOG_INFO("HermitCore is running side-by-side to Linux!\n"); + /* Set network address variables */ IP_ADDR4(&gw, 192,168,28,1); IP_ADDR4(&ipaddr, 192,168,28,isle+2); @@ -189,16 +181,11 @@ static int init_netifs(void) * - gw : the gateway wicht should be used * - mmnif_init : the initialization which has to be done in order to use our interface * - ip_input : tells him that he should use ip_input - */ -#if LWIP_TCPIP_CORE_LOCKING_INPUT - if ((err = netifapi_netif_add(&default_netif, ip_2_ip4(&ipaddr), ip_2_ip4(&netmask), ip_2_ip4(&gw), NULL, mmnif_init, ip_input)) != ERR_OK) -#else - /* + * * Note: Our drivers guarantee that the input function will be called in the context of the tcpip thread. * => Therefore, we are able to use ip_input instead of tcpip_input */ if ((err = netifapi_netif_add(&default_netif, ip_2_ip4(&ipaddr), ip_2_ip4(&netmask), ip_2_ip4(&gw), NULL, mmnif_init, ip_input)) != ERR_OK) -#endif { LOG_ERROR("Unable to add the intra network interface: err = %d\n", err); return -ENODEV; @@ -215,6 +202,8 @@ static int init_netifs(void) /* Note: Our drivers guarantee that the input function will be called in the context of the tcpip thread. * => Therefore, we are able to use ethernet_input instead of tcpip_input */ + if ((err = netifapi_netif_add(&default_netif, ip_2_ip4(&ipaddr), ip_2_ip4(&netmask), ip_2_ip4(&gw), NULL, vioif_init, ethernet_input)) == ERR_OK) + goto success; if ((err = netifapi_netif_add(&default_netif, ip_2_ip4(&ipaddr), ip_2_ip4(&netmask), ip_2_ip4(&gw), NULL, rtl8139if_init, ethernet_input)) == ERR_OK) goto success; if ((err = netifapi_netif_add(&default_netif, ip_2_ip4(&ipaddr), ip_2_ip4(&netmask), ip_2_ip4(&gw), NULL, e1000if_init, ethernet_input)) == ERR_OK) @@ -291,8 +280,6 @@ int smp_main(void) while(atomic_int32_read(&cpu_online) < atomic_int32_read(&possible_cpus)) PAUSE; - //create_kernel_task(NULL, foo, "foo2", NORMAL_PRIO); - while(1) { check_workqueues(); wait_for_task(); @@ -324,43 +311,6 @@ static int init_rcce(void) return 0; } -#if 0 -// some stress tests -static void lock_test(void) -{ - uint64_t start, end; - int i; - static spinlock_t _lock = SPINLOCK_INIT; - static sem_t _sem = SEM_INIT(1); - - start = rdtsc(); - - for(i=0; i<10000; i++) - { - spinlock_lock(&_lock); - NOP; - spinlock_unlock(&_lock); - } - - end = rdtsc(); - - LOG_INFO("locks %lld (iterations %d)\n", end-start, i); - - start = rdtsc(); - - for(i=0; i<10000; i++) - { - sem_wait(&_sem, 0); - NOP; - sem_post(&_sem); - } - - end = rdtsc(); - - LOG_INFO("sem %lld (iterations %d)\n", end-start, i); -} -#endif - int libc_start(int argc, char** argv, char** env); // init task => creates all other tasks an initialize the LwIP @@ -392,17 +342,14 @@ static int initd(void* arg) } curr_task->heap->flags = VMA_HEAP|VMA_USER; - curr_task->heap->start = PAGE_FLOOR(heap); - curr_task->heap->end = PAGE_FLOOR(heap); + curr_task->heap->start = PAGE_CEIL(heap); + curr_task->heap->end = PAGE_CEIL(heap); // region is already reserved for the heap, we have to change the // property of the first page vma_free(curr_task->heap->start, curr_task->heap->start+PAGE_SIZE); vma_add(curr_task->heap->start, curr_task->heap->start+PAGE_SIZE, VMA_HEAP|VMA_USER); - //create_kernel_task(NULL, foo, "foo1", NORMAL_PRIO); - //create_kernel_task(NULL, foo, "foo2", NORMAL_PRIO); - // initialize network err = init_netifs(); @@ -586,6 +533,8 @@ int hermit_main(void) LOG_INFO("Current available memory: %zd MiB\n", atomic_int64_read(&total_available_pages) * PAGE_SIZE / (1024ULL*1024ULL)); LOG_INFO("Core %d is the boot processor\n", boot_processor); LOG_INFO("System is able to use %d processors\n", possible_cpus); + if (mb_info) + LOG_INFO("Kernel cmdline: %s\n", (char*) (size_t) mb_info->cmdline); if (hbmem_base) LOG_INFO("Found high bandwidth memory at 0x%zx (size 0x%zx)\n", hbmem_base, hbmem_size); diff --git a/kernel/syscall.c b/kernel/syscall.c index 719a30944..9307704b7 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -90,7 +90,7 @@ typedef struct { void NORETURN sys_exit(int arg) { if (is_uhyve()) { - uhyve_send(UHYVE_PORT_EXIT, (unsigned) (size_t) &arg); + uhyve_send(UHYVE_PORT_EXIT, (unsigned) virt_to_phys((size_t) &arg)); } else { sys_exit_t sysargs = {__NR_exit, arg}; @@ -290,11 +290,11 @@ ssize_t sys_sbrk(ssize_t incr) heap->end += incr; // reserve VMA regions - if (PAGE_CEIL(heap->end) > PAGE_CEIL(ret)) { + if (PAGE_FLOOR(heap->end) > PAGE_FLOOR(ret)) { // region is already reserved for the heap, we have to change the // property - vma_free(PAGE_CEIL(ret), PAGE_FLOOR(heap->end)); - vma_add(PAGE_CEIL(ret), PAGE_FLOOR(heap->end), VMA_HEAP|VMA_USER); + vma_free(PAGE_FLOOR(ret), PAGE_CEIL(heap->end)); + vma_add(PAGE_FLOOR(ret), PAGE_CEIL(heap->end), VMA_HEAP|VMA_USER); } } else ret = -ENOMEM; @@ -424,6 +424,56 @@ out: return ret; } +int sys_spinlock_init(spinlock_t** lock) +{ + int ret; + + if (BUILTIN_EXPECT(!lock, 0)) + return -EINVAL; + + *lock = (spinlock_t*) kmalloc(sizeof(spinlock_t)); + if (BUILTIN_EXPECT(!(*lock), 0)) + return -ENOMEM; + + ret = spinlock_init(*lock); + if (ret) { + kfree(*lock); + *lock = NULL; + } + + return ret; +} + +int sys_spinlock_destroy(spinlock_t* lock) +{ + int ret; + + if (BUILTIN_EXPECT(!lock, 0)) + return -EINVAL; + + ret = spinlock_destroy(lock); + if (!ret) + kfree(lock); + + return ret; +} + +int sys_spinlock_lock(spinlock_t* lock) +{ + if (BUILTIN_EXPECT(!lock, 0)) + return -EINVAL; + + return spinlock_lock(lock); +} + +int sys_spinlock_unlock(spinlock_t* lock) +{ + if (BUILTIN_EXPECT(!lock, 0)) + return -EINVAL; + + return spinlock_unlock(lock); +} + void sys_msleep(unsigned int ms) { if (ms * TIMER_FREQ / 1000 > 0) diff --git a/kernel/tasks.c b/kernel/tasks.c index cd92d1010..f886fc0a2 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -67,7 +67,6 @@ static readyqueues_t readyqueues[1] = {[0] = {task_table+0, NULL, 0, 0, 0, {[0 . #endif DEFINE_PER_CORE(task_t*, current_task, task_table+0); -DEFINE_PER_CORE(char*, kernel_stack, NULL); #if MAX_CORES > 1 DEFINE_PER_CORE(uint32_t, __core_id, 0); @@ -176,6 +175,10 @@ static void readyqueues_push_back(uint32_t core_id, task_t* task) // increase the number of ready tasks readyqueues[core_id].nr_tasks++; + + // should we wakeup the core? + if (readyqueues[core_id].nr_tasks == 1) + wakeup_core(core_id); } @@ -278,9 +281,8 @@ int multitasking_init(void) task_table[0].prio = IDLE_PRIO; task_table[0].stack = (char*) ((size_t)&boot_stack + core_id * KERNEL_STACK_SIZE); task_table[0].ist_addr = (char*)&boot_ist; - set_per_core(kernel_stack, task_table[0].stack + KERNEL_STACK_SIZE - 0x10); set_per_core(current_task, task_table+0); - arch_init_task(task_table+0); + arch_init_task(task_table+0); readyqueues[core_id].idle = task_table+0; @@ -303,12 +305,11 @@ int set_idle_task(void) task_table[i].last_stack_pointer = NULL; task_table[i].stack = (char*) ((size_t)&boot_stack + core_id * KERNEL_STACK_SIZE); task_table[i].ist_addr = create_stack(KERNEL_STACK_SIZE); - set_per_core(kernel_stack, task_table[i].stack + KERNEL_STACK_SIZE - 0x10); task_table[i].prio = IDLE_PRIO; task_table[i].heap = NULL; readyqueues[core_id].idle = task_table+i; set_per_core(current_task, readyqueues[core_id].idle); - arch_init_task(task_table+i); + arch_init_task(task_table+i); ret = 0; break; @@ -483,7 +484,7 @@ int clone_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio) task_table[i].stack = stack; task_table[i].prio = prio; task_table[i].heap = curr_task->heap; - task_table[i].start_tick = get_clock_tick(); + task_table[i].start_tick = get_clock_tick(); task_table[i].last_tsc = 0; task_table[i].parent = curr_task->id; task_table[i].tls_addr = curr_task->tls_addr; @@ -513,6 +514,9 @@ int clone_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio) readyqueues[core_id].queue[prio-1].last->next = task_table+i; readyqueues[core_id].queue[prio-1].last = task_table+i; } + // should we wakeup the core? + if (readyqueues[core_id].nr_tasks == 1) + wakeup_core(core_id); spinlock_irqsave_unlock(&readyqueues[core_id].lock); break; } @@ -530,11 +534,6 @@ out: destroy_stack(ist, KERNEL_STACK_SIZE); } -#if 0 - if (core_id != CORE_ID) - apic_send_ipi(core_id, 121); -#endif - return ret; } @@ -634,11 +633,6 @@ out: kfree(counter); } -#if 0 - if (core_id != CORE_ID) - apic_send_ipi(core_id, 121); -#endif - return ret; } @@ -674,6 +668,8 @@ int wakeup_task(tid_t id) core_id = task->last_core; if (task->status == TASK_BLOCKED) { + LOG_DEBUG("wakeup task %d\n", id); + task->status = TASK_READY; ret = 0; @@ -711,6 +707,8 @@ int block_task(tid_t id) core_id = task->last_core; if (task->status == TASK_RUNNING) { + LOG_DEBUG("block task %d\n", id); + task->status = TASK_BLOCKED; spinlock_irqsave_lock(&readyqueues[core_id].lock); diff --git a/lwip b/lwip index ab6d60a62..51d48fe0c 160000 --- a/lwip +++ b/lwip @@ -1 +1 @@ -Subproject commit ab6d60a6276788949b38c020a62d51564fc69a8e +Subproject commit 51d48fe0c67131da346c9ef280b2019c77f6e607 diff --git a/mm/malloc.c b/mm/malloc.c index 7bdafc67d..dac637062 100644 --- a/mm/malloc.c +++ b/mm/malloc.c @@ -134,13 +134,13 @@ void buddy_dump(void) void* palloc(size_t sz, uint32_t flags) { size_t phyaddr, viraddr, bits; - uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS; + uint32_t npages = PAGE_CEIL(sz) >> PAGE_BITS; int err; LOG_DEBUG("palloc(%zd) (%u pages)\n", sz, npages); // get free virtual address space - viraddr = vma_alloc(PAGE_FLOOR(sz), flags); + viraddr = vma_alloc(PAGE_CEIL(sz), flags); if (BUILTIN_EXPECT(!viraddr, 0)) return NULL; @@ -168,7 +168,7 @@ void* palloc(size_t sz, uint32_t flags) void* create_stack(size_t sz) { size_t phyaddr, viraddr, bits; - uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS; + uint32_t npages = PAGE_CEIL(sz) >> PAGE_BITS; int err; LOG_DEBUG("create_stack(0x%zx) (%u pages)\n", DEFAULT_STACK_SIZE, npages); @@ -204,7 +204,7 @@ void* create_stack(size_t sz) int destroy_stack(void* viraddr, size_t sz) { size_t phyaddr; - uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS; + uint32_t npages = PAGE_CEIL(sz) >> PAGE_BITS; LOG_DEBUG("destroy_stack(0x%zx) (size 0x%zx)\n", viraddr, DEFAULT_STACK_SIZE); diff --git a/mm/vma.c b/mm/vma.c index 88f30bdd1..c89b5a3c4 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -54,12 +54,12 @@ int vma_init(void) int ret; LOG_INFO("vma_init: reserve vma region 0x%llx - 0x%llx\n", - PAGE_2M_CEIL((size_t) &kernel_start), - PAGE_2M_FLOOR((size_t) &kernel_start + image_size)); + PAGE_2M_FLOOR((size_t) &kernel_start), + PAGE_2M_CEIL((size_t) &kernel_start + image_size)); // add Kernel - ret = vma_add(PAGE_2M_CEIL((size_t) &kernel_start), - PAGE_2M_FLOOR((size_t) &kernel_start + image_size), + ret = vma_add(PAGE_2M_FLOOR((size_t) &kernel_start), + PAGE_2M_CEIL((size_t) &kernel_start + image_size), VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE); if (BUILTIN_EXPECT(ret, 0)) goto out; diff --git a/test.sh b/test.sh deleted file mode 100644 index 45f43446a..000000000 --- a/test.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -# -# do not use this script -# it is written only for internal tests via Travis CI - -FILES="usr/tests/hello usr/tests/hellof usr/tests/hello++ usr/tests/thr_hello usr/tests/pi usr/benchmarks/stream usr/benchmarks/basic usr/tests/signals" -PROXY=/opt/hermit/bin/proxy - -for f in $FILES; do echo "check $f..."; timeout --kill-after=5m 5m $PROXY $f || exit 1; done - -# test echo server at port 8000 -HERMIT_APP_PORT=8000 $PROXY usr/tests/server & -sleep 10 -curl http://127.0.0.1:8000/help -sleep 1 -curl http://127.0.0.1:8000/hello -sleep 1 - -# kill server -kill $! diff --git a/tests.sh b/tests.sh new file mode 100755 index 000000000..3cabbadbe --- /dev/null +++ b/tests.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# +# do not use this script +# it is written only for internal tests via Travis CI + +TDIR=build/local_prefix/opt/hermit/x86_64-hermit/extra +FILES="$TDIR/tests/hello $TDIR/tests/hellof $TDIR/tests/hello++ $TDIR/tests/thr_hello $TDIR/tests/pi $TDIR/benchmarks/stream $TDIR/benchmarks/basic $TDIR/tests/signals $TDIR/tests/test-malloc $TDIR/tests/test-malloc-mt" +PROXY=build/local_prefix/opt/hermit/bin/proxy + +for f in $FILES; do echo "check $f..."; HERMIT_ISLE=qemu HERMIT_CPUS=1 HERMIT_KVM=0 HERMIT_VERBOSE=1 timeout --kill-after=5m 5m $PROXY $f || exit 1; done + +# test echo server at port 8000 +HERMIT_ISLE=qemu HERMIT_CPUS=1 HERMIT_KVM=0 HERMIT_VERBOSE=1 HERMIT_APP_PORT=8000 $PROXY $TDIR/tests/server & +sleep 10 +curl http://127.0.0.1:8000/help +sleep 1 +curl http://127.0.0.1:8000/hello +sleep 1 + +# kill server +kill $! diff --git a/tools/proxy.c b/tools/proxy.c index 3ebac52a6..8c5adf472 100644 --- a/tools/proxy.c +++ b/tools/proxy.c @@ -104,10 +104,12 @@ static void qemu_fini(void) unlink(pidname); if (id >= 0) { - int status = 0; + int ret; - kill(id, SIGINT); - wait(&status); + do { + ret = kill(id, SIGINT); + sched_yield(); + } while((ret < 0) && (errno == ESRCH)); } } @@ -268,7 +270,7 @@ static void wait_hermit_available(void) return; int fd = inotify_init(); - if ( fd < 0 ) { + if (fd < 0) { perror( "inotify_init" ); exit(1); } diff --git a/tools/uhyve.c b/tools/uhyve.c index 556f020a1..da84a3276 100644 --- a/tools/uhyve.c +++ b/tools/uhyve.c @@ -156,6 +156,7 @@ static bool cap_tsc_deadline = false; static bool cap_irqchip = false; static bool cap_adjust_clock_stable = false; static bool verbose = false; +static bool full_checkpoint = false; static uint32_t ncores = 1; static uint8_t* guest_mem = NULL; static uint8_t* klog = NULL; @@ -435,6 +436,7 @@ static int load_checkpoint(uint8_t* mem, char* path) size_t paddr = elf_entry; int ret; struct timeval begin, end; + uint32_t i; if (verbose) gettimeofday(&begin, NULL); @@ -457,7 +459,8 @@ static int load_checkpoint(uint8_t* mem, char* path) return ret; #endif - for(uint32_t i=0; i<=no_checkpoint; i++) + i = full_checkpoint ? no_checkpoint : 0; + for(; i<=no_checkpoint; i++) { snprintf(fname, MAX_FNAME, "checkpoint/chk%u_mem.dat", i); @@ -803,8 +806,10 @@ static int vcpu_loop(void) unsigned data = *((unsigned*)((size_t)run+run->io.data_offset)); uhyve_close_t* uhyve_close = (uhyve_close_t*) (guest_mem+data); - if (uhyve_close->ret > 2) + if (uhyve_close->fd > 2) uhyve_close->ret = close(uhyve_close->fd); + else + uhyve_close->ret = 0; break; } @@ -1064,12 +1069,16 @@ int uhyve_init(char *path) FILE* f = fopen("checkpoint/chk_config.txt", "r"); if (f != NULL) { + int tmp = 0; restart = true; fscanf(f, "number of cores: %u\n", &ncores); fscanf(f, "memory size: 0x%zx\n", &guest_size); fscanf(f, "checkpoint number: %u\n", &no_checkpoint); fscanf(f, "entry point: 0x%zx", &elf_entry); + fscanf(f, "full checkpoint: %d", &tmp); + full_checkpoint = tmp ? true : false; + if (verbose) fprintf(stderr, "Restart from checkpoint %u (ncores %d, mem size 0x%zx)\n", no_checkpoint, ncores, guest_size); fclose(f); @@ -1081,6 +1090,10 @@ int uhyve_init(char *path) const char* hermit_cpus = getenv("HERMIT_CPUS"); if (hermit_cpus) ncores = (uint32_t) atoi(hermit_cpus); + + const char* full_chk = getenv("HERMIT_FULLCHECKPOINT"); + if (full_chk && (strcmp(full_chk, "0") != 0)) + full_checkpoint = true; } vcpu_threads = (pthread_t*) calloc(ncores, sizeof(pthread_t)); @@ -1133,7 +1146,7 @@ int uhyve_init(char *path) mprotect(guest_mem + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE); } - char* merge = getenv("HERMIT_MERGEABLE"); + const char* merge = getenv("HERMIT_MERGEABLE"); if (merge && (strcmp(merge, "0") != 0)) { /* * The KSM feature is intended for applications that generate @@ -1206,7 +1219,7 @@ int uhyve_init(char *path) static void timer_handler(int signum) { struct stat st = {0}; - const size_t flag = no_checkpoint > 0 ? PG_DIRTY : PG_ACCESSED; + const size_t flag = (!full_checkpoint && (no_checkpoint > 0)) ? PG_DIRTY : PG_ACCESSED; char fname[MAX_FNAME]; struct timeval begin, end; @@ -1320,7 +1333,8 @@ nextslot: for(size_t l=0; l<(1 << PAGE_MAP_BITS); l++) { if ((pgt[l] & (PG_PRESENT|flag)) == (PG_PRESENT|flag)) { //printf("\t\t\t*pgt[%zd] 0x%zx, 4KB\n", l, pgt[l] & ~PG_XD); - pgt[l] = pgt[l] & ~(PG_DIRTY|PG_ACCESSED); + if (!full_checkpoint) + pgt[l] = pgt[l] & ~(PG_DIRTY|PG_ACCESSED); size_t pgt_entry = pgt[l] & ~PG_PSE; // because PAT use the same bit as PSE if (fwrite(&pgt_entry, sizeof(size_t), 1, f) != 1) err(1, "fwrite failed"); @@ -1330,7 +1344,8 @@ nextslot: } } else if ((pgd[k] & flag) == flag) { //printf("\t\t*pgd[%zd] 0x%zx, 2MB\n", k, pgd[k] & ~PG_XD); - pgd[k] = pgd[k] & ~(PG_DIRTY|PG_ACCESSED); + if (!full_checkpoint) + pgd[k] = pgd[k] & ~(PG_DIRTY|PG_ACCESSED); if (fwrite(pgd+k, sizeof(size_t), 1, f) != 1) err(1, "fwrite failed"); if (fwrite((size_t*) (guest_mem + (pgd[k] & PAGE_2M_MASK)), (1UL << PAGE_2M_BITS), 1, f) != 1) @@ -1355,6 +1370,10 @@ nextslot: fprintf(f, "memory size: 0x%zx\n", guest_size); fprintf(f, "checkpoint number: %u\n", no_checkpoint); fprintf(f, "entry point: 0x%zx", elf_entry); + if (full_checkpoint) + fprintf(f, "full checkpoint: 1"); + else + fprintf(f, "full checkpoint: 0"); fclose(f); diff --git a/usr/tests/CMakeLists.txt b/usr/tests/CMakeLists.txt index a54926f79..e910a016c 100644 --- a/usr/tests/CMakeLists.txt +++ b/usr/tests/CMakeLists.txt @@ -9,6 +9,11 @@ add_executable(hello++ hello++.cpp) add_executable(hellof hellof.f90) add_executable(pi pi.go) +add_executable(test-malloc test-malloc.c) +add_executable(test-malloc-mt test-malloc-mt.c) +target_compile_options(test-malloc-mt PRIVATE -pthread) +target_link_libraries(test-malloc-mt pthread) + add_executable(server server.go) target_link_libraries(server netgo) @@ -16,9 +21,11 @@ add_executable(RCCE_minimum RCCE_minimum.c) target_link_libraries(RCCE_minimum ircce) add_executable(thr_hello thr_hello.c) +target_compile_options(thr_hello PRIVATE -pthread) target_link_libraries(thr_hello pthread) add_executable(signals signals.c) +target_compile_options(signals PRIVATE -pthread) target_link_libraries(signals pthread) # deployment diff --git a/usr/tests/test-malloc-mt.c b/usr/tests/test-malloc-mt.c new file mode 100644 index 000000000..9e35d2de0 --- /dev/null +++ b/usr/tests/test-malloc-mt.c @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include +#include + +#ifndef NUM_THREADS +#define NUM_THREADS 3 +#endif + +#ifndef NUM_ITER +#define NUM_ITER 10000 +#endif + +#ifndef SIZE +#define SIZE 16384 +#endif + +__thread void* buf; + +static void* perform_work( void* argument ) +{ + int passed_in_value; + + passed_in_value = *( ( int* )argument ); + printf( "Hello World! It's me, thread %d with argument %d!\n", getpid(), passed_in_value ); + + /* optionally: insert more useful stuff here */ + for(int i=0; i +#include +#include +#include + +#ifndef NUM_ITER +#define NUM_ITER 100000 +#endif + +#ifndef SIZE +#define SIZE 16*1024 +#endif + +void* buf; + +int main(int argc, char** argv) +{ + /* optionally: insert more useful stuff here */ + + for(int i=0; i