diff --git a/.bintray_descriptor.json b/.bintray_descriptor.json index 03c3d01fb..04d4f7be2 100644 --- a/.bintray_descriptor.json +++ b/.bintray_descriptor.json @@ -13,7 +13,7 @@ }, "version": { - "name": "0.2.3", + "name": "0.2.5", "desc": "HermitCore's kernel as libOS", "gpgSign": false }, diff --git a/CMakeLists.txt b/CMakeLists.txt index cc47105a6..2e566e411 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -199,7 +199,7 @@ set(CPACK_SYSTEM_NAME all) set(CPACK_PACKAGE_VERSION_MAJOR 0) set(CPACK_PACKAGE_VERSION_MINOR 2) -set(CPACK_PACKAGE_VERSION_PATCH 3) +set(CPACK_PACKAGE_VERSION_PATCH 5) set(CPACK_PACKAGE_CONTACT "Stefan Lankes ") diff --git a/arch/x86/include/asm/gdt.h b/arch/x86/include/asm/gdt.h index 1f8c7f01d..3f8b462ca 100644 --- a/arch/x86/include/asm/gdt.h +++ b/arch/x86/include/asm/gdt.h @@ -51,26 +51,26 @@ extern "C" { #define GDT_FLAG_TSS_BUSY 0x02 #define GDT_FLAG_SEGMENT 0x10 -/// Privilege level: Ring 0 +/// Privilege level: Ring 0 #define GDT_FLAG_RING0 0x00 /// Privilege level: Ring 1 #define GDT_FLAG_RING1 0x20 -/// Privilege level: Ring 2 +/// Privilege level: Ring 2 #define GDT_FLAG_RING2 0x40 -/// Privilege level: Ring 3 +/// Privilege level: Ring 3 #define GDT_FLAG_RING3 0x60 /// Segment is present #define GDT_FLAG_PRESENT 0x80 /// Segment was accessed #define GDT_FLAG_ACCESSED 0x01 -/** - * @brief Granularity of segment limit +/** + * @brief Granularity of segment limit * - set: segment limit unit is 4 KB (page size) * - not set: unit is bytes */ #define GDT_FLAG_4K_GRAN 0x80 /** - * @brief Default operand size + * @brief Default operand size * - set: 32 bit * - not set: 16 bit */ @@ -78,7 +78,7 @@ extern "C" { #define GDT_FLAG_32_BIT 0x40 #define GDT_FLAG_64_BIT 0x20 -/** @brief Defines a GDT entry +/** @brief Defines a GDT entry * * A global descriptor table entry consists of: * - 32 bit base address (chunkwise embedded into this structure) @@ -115,16 +115,16 @@ typedef struct { #if GDT_ENTRIES > 8192 #error Too many GDT entries! -#endif +#endif /** @brief Installs the global descriptor table * * The installation involves the following steps: - * - set up the special GDT pointer + * - set up the special GDT pointer * - set up the entries in our GDT - * - finally call gdt_flush() in our assembler file + * - finally call gdt_flush() in our assembler file * in order to tell the processor where the new GDT is - * - update the new segment registers + * - update the new segment registers */ void gdt_install(void); @@ -143,6 +143,10 @@ void gdt_set_gate(int num, unsigned long base, unsigned long limit, void configure_gdt_entry(gdt_entry_t *dest_entry, unsigned long base, unsigned long limit, unsigned char access, unsigned char gran); +/** @brief Initialize the task state segments + */ +void tss_init(void); + #ifdef __cplusplus } #endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 15aa50a70..6d29d30db 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -256,6 +256,7 @@ extern "C" { #define MSR_HWP_REQUEST 0x00000774 #define MSR_HWP_STATUS 0x00000777 +#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) #define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << 16) #define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << 20) #define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << 38) diff --git a/arch/x86/kernel/gdt.c b/arch/x86/kernel/gdt.c index dca9c9f3b..2c78fd6c5 100644 --- a/arch/x86/kernel/gdt.c +++ b/arch/x86/kernel/gdt.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -41,7 +42,6 @@ gdt_ptr_t gp; // currently, our kernel has full access to the ioports static gdt_entry_t gdt[GDT_ENTRIES] = {[0 ... GDT_ENTRIES-1] = {0, 0, 0, 0, 0, 0}}; static tss_t task_state_segments[MAX_CORES] __attribute__ ((aligned (PAGE_SIZE))); -static uint8_t stack_table[MAX_CORES][KERNEL_STACK_SIZE*MAX_IST] __attribute__ ((aligned (PAGE_SIZE))); extern const void boot_stack; @@ -94,7 +94,8 @@ void gdt_install(void) { int i, num = 0; - memset(task_state_segments, 0x00, MAX_CORES*sizeof(tss_t)); + // part of bss => already initialized + //memset(task_state_segments, 0x00, MAX_CORES*sizeof(tss_t)); /* Setup the GDT pointer and limit */ gp.limit = (sizeof(gdt_entry_t) * GDT_ENTRIES) - 1; @@ -148,10 +149,6 @@ void gdt_install(void) */ for(i=0; i= 0x80000001) cpuid(0x80000001, &a, &b, &c, &cpu_info.feature3); - if (extended >= 0x80000008) { + if (extended >= 0x80000004) { uint32_t* bint = (uint32_t*) cpu_brand; cpuid(0x80000002, bint+0, bint+1, bint+2, bint+3); @@ -451,7 +451,7 @@ int cpu_detection(void) { kprintf("Syscall instruction: %s\n", (cpu_info.feature3 & CPU_FEATURE_SYSCALL) ? "available" : "unavailable"); } - //TODO: add check for SMEP and SMAP + //TODO: add check for SMEP, PCE and SMAP // be sure that AM, NE and MP is enabled cr0 = read_cr0(); @@ -476,7 +476,9 @@ int cpu_detection(void) { cr4 |= CR4_MCE; // enable machine check exceptions //if (has_vmx()) // cr4 |= CR4_VMXE; - cr4 &= ~CR4_TSD; // => every privilege level is able to use rdtsc + cr4 &= ~(CR4_PCE|CR4_TSD); // disable performance monitoring counter + // clear TSD => every privilege level is able + // to use rdtsc write_cr4(cr4); @@ -633,6 +635,7 @@ int cpu_detection(void) { LOG_INFO("Maximum input value for hypervisor: 0x%x\n", a); } + if (first_time) { LOG_INFO("CR0 0x%llx, CR4 0x%llx\n", read_cr0(), read_cr4()); LOG_INFO("size of xsave_t: %d\n", sizeof(xsave_t)); diff --git a/arch/x86/mm/memory.c b/arch/x86/mm/memory.c index e858616e7..f1df8327f 100644 --- a/arch/x86/mm/memory.c +++ b/arch/x86/mm/memory.c @@ -367,6 +367,9 @@ int memory_init(void) } } + // Ok, we are now able to use our memory management => update tss + tss_init(); + return ret; oom: diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c index 2d78bad7a..01b08d1f3 100644 --- a/arch/x86/mm/page.c +++ b/arch/x86/mm/page.c @@ -96,7 +96,7 @@ static uint8_t expect_zeroed_pages = 0; size_t virt_to_phys(size_t addr) { if ((addr > (size_t) &kernel_start) && - (addr <= PAGE_2M_FLOOR((size_t) &kernel_start + image_size))) + (addr <= PAGE_2M_CEIL((size_t) &kernel_start + image_size))) { size_t vpn = addr >> (PAGE_2M_BITS); // virtual page number size_t entry = self[1][vpn]; // page table entry @@ -400,7 +400,8 @@ int page_init(void) while(((size_t) cmdline + i) <= ((size_t) cmdline + cmdsize)) { - page_map(((size_t) cmdline + i) & PAGE_MASK, ((size_t) cmdline + i) & PAGE_MASK, 1, PG_GLOBAL|PG_RW|PG_PRESENT); + page_map(((size_t) cmdline + i) & PAGE_MASK, ((size_t) cmdline + i) & PAGE_MASK, + 1, PG_NX|PG_GLOBAL|PG_RW|PG_PRESENT); i += PAGE_SIZE; } } else cmdline = 0; diff --git a/arch/x86/mm/vma.c b/arch/x86/mm/vma.c index ad58d1b79..ea565dea0 100644 --- a/arch/x86/mm/vma.c +++ b/arch/x86/mm/vma.c @@ -35,18 +35,20 @@ int vma_arch_init(void) int ret = 0; if (mb_info) { - ret = vma_add((size_t)mb_info & PAGE_MASK, ((size_t)mb_info & PAGE_MASK) + PAGE_SIZE, VMA_READ|VMA_WRITE); + ret = vma_add((size_t)mb_info & PAGE_MASK, ((size_t)mb_info & PAGE_MASK) + PAGE_SIZE, + VMA_READ|VMA_WRITE|VMA_CACHEABLE); if (BUILTIN_EXPECT(ret, 0)) goto out; if ((mb_info->flags & MULTIBOOT_INFO_CMDLINE) && cmdline) { - LOG_INFO("vma_arch_init: map cmdline %p (size 0x%zd)", cmdline, cmdsize); + LOG_INFO("vma_arch_init: map cmdline %p (size 0x%zd)\n", cmdline, cmdsize); size_t i = 0; while(((size_t) cmdline + i) < ((size_t) cmdline + cmdsize)) { if ((((size_t)cmdline + i) & PAGE_MASK) != ((size_t) mb_info & PAGE_MASK)) { - ret = vma_add(((size_t)cmdline + i) & PAGE_MASK, (((size_t)cmdline + i) & PAGE_MASK) + PAGE_SIZE, VMA_READ|VMA_WRITE); + ret = vma_add(((size_t)cmdline + i) & PAGE_MASK, (((size_t)cmdline + i) & PAGE_MASK) + PAGE_SIZE, + VMA_READ|VMA_WRITE|VMA_CACHEABLE); if (BUILTIN_EXPECT(ret, 0)) goto out; } diff --git a/cmake/HermitCore-Configuration.cmake b/cmake/HermitCore-Configuration.cmake index c38566f8c..8ca70e181 100644 --- a/cmake/HermitCore-Configuration.cmake +++ b/cmake/HermitCore-Configuration.cmake @@ -1,4 +1,4 @@ -set(PACKAGE_VERSION "0.2.2" CACHE STRING +set(PACKAGE_VERSION "0.2.5" CACHE STRING "HermitCore current version") set(MAX_CORES "512" CACHE STRING diff --git a/cmake/local-cmake.sh b/cmake/local-cmake.sh index 4dee60c2f..949f4f793 100644 --- a/cmake/local-cmake.sh +++ b/cmake/local-cmake.sh @@ -63,7 +63,7 @@ then fi echo "-- Local CMake v${MAJOR}.${MINOR} installed to ${CMAKE_DIR_REL}" - echo "-- Next time you source this script, no download will be neccessary" + echo "-- Next time you source this script, no download will be necessary" fi export PATH="${CMAKE_DIR}/bin:${PATH}" diff --git a/drivers/net/rtl8139.c b/drivers/net/rtl8139.c index 3358cfbcf..fce54da8f 100644 --- a/drivers/net/rtl8139.c +++ b/drivers/net/rtl8139.c @@ -50,6 +50,9 @@ #define TX_BUF_LEN 4096 #define MIN(a, b) (a) < (b) ? (a) : (b) +static uint8_t rx_buffer[RX_BUF_LEN+16 /* header size */] __attribute__ ((aligned (PAGE_SIZE))); +static uint8_t tx_buffer[4][TX_BUF_LEN] __attribute__ ((aligned (PAGE_SIZE))); + /* * To set the RTL8139 to accept only the Transmit OK (TOK) and Receive OK (ROK) * interrupts, we would have the TOK and ROK bits of the IMR high and leave the @@ -328,26 +331,15 @@ err_t rtl8139if_init(struct netif* netif) rtl8139if->irq = pci_info.irq; /* allocate the receive buffer */ - rtl8139if->rx_buffer = page_alloc(RX_BUF_LEN + 16 /* header size */, VMA_READ|VMA_WRITE); - if (!(rtl8139if->rx_buffer)) { - LOG_ERROR("rtl8139if_init: out of memory\n"); - kfree(rtl8139if); - return ERR_MEM; - } - memset(rtl8139if->rx_buffer, 0x00, RX_BUF_LEN + 16); + rtl8139if->rx_buffer = rx_buffer; + //memset(rtl8139if->rx_buffer, 0x00, RX_BUF_LEN + 16); /* allocate the send buffers */ - rtl8139if->tx_buffer[0] = page_alloc(4*TX_BUF_LEN, VMA_READ|VMA_WRITE); - if (!(rtl8139if->tx_buffer[0])) { - LOG_ERROR("rtl8139if_init: out of memory\n"); - page_free(rtl8139if->rx_buffer, RX_BUF_LEN + 16); - kfree(rtl8139if); - return ERR_MEM; - } - memset(rtl8139if->tx_buffer[0], 0x00, 4*TX_BUF_LEN); - rtl8139if->tx_buffer[1] = rtl8139if->tx_buffer[0] + 1*TX_BUF_LEN; - rtl8139if->tx_buffer[2] = rtl8139if->tx_buffer[0] + 2*TX_BUF_LEN; - rtl8139if->tx_buffer[3] = rtl8139if->tx_buffer[0] + 3*TX_BUF_LEN; + rtl8139if->tx_buffer[0] = tx_buffer[0]; + //memset(rtl8139if->tx_buffer[0], 0x00, 4*TX_BUF_LEN); + rtl8139if->tx_buffer[1] = tx_buffer[1]; + rtl8139if->tx_buffer[2] = tx_buffer[2]; + rtl8139if->tx_buffer[3] = tx_buffer[3]; netif->state = rtl8139if; mynetif = netif; @@ -355,8 +347,6 @@ err_t rtl8139if_init(struct netif* netif) tmp32 = inportl(rtl8139if->iobase + TCR); if (tmp32 == 0xFFFFFF) { LOG_ERROR("rtl8139if_init: ERROR\n"); - page_free(rtl8139if->rx_buffer, RX_BUF_LEN + 16); - page_free(rtl8139if->tx_buffer[0], 4*TX_BUF_LEN); kfree(rtl8139if); memset(netif, 0x00, sizeof(struct netif)); mynetif = NULL; @@ -400,8 +390,6 @@ err_t rtl8139if_init(struct netif* netif) if (!tmp16) { // it seems not to work LOG_ERROR("RTL8139 reset failed\n"); - page_free(rtl8139if->rx_buffer, RX_BUF_LEN + 16); - page_free(rtl8139if->tx_buffer[0], 4*TX_BUF_LEN); kfree(rtl8139if); memset(netif, 0x00, sizeof(struct netif)); mynetif = NULL; diff --git a/include/hermit/semaphore.h b/include/hermit/semaphore.h index 9a731ad3b..5cd2708c9 100644 --- a/include/hermit/semaphore.h +++ b/include/hermit/semaphore.h @@ -56,14 +56,15 @@ extern "C" { * - 0 on success * - -EINVAL on invalid argument */ -inline static int sem_init(sem_t* s, unsigned int v) { +inline static int sem_init(sem_t* s, unsigned int v) +{ unsigned int i; if (BUILTIN_EXPECT(!s, 0)) return -EINVAL; s->value = v; - s->pos = 0; + s->rpos = s->wpos = 0; for(i=0; iqueue[i] = MAX_TASKS; spinlock_irqsave_init(&s->lock); @@ -76,7 +77,8 @@ inline static int sem_init(sem_t* s, unsigned int v) { * - 0 on success * - -EINVAL on invalid argument */ -inline static int sem_destroy(sem_t* s) { +inline static int sem_destroy(sem_t* s) +{ if (BUILTIN_EXPECT(!s, 0)) return -EINVAL; @@ -94,7 +96,8 @@ inline static int sem_destroy(sem_t* s) { * - -EINVAL on invalid argument * - -ECANCELED on failure (You still have to wait) */ -inline static int sem_trywait(sem_t* s) { +inline static int sem_trywait(sem_t* s) +{ int ret = -ECANCELED; if (BUILTIN_EXPECT(!s, 0)) @@ -114,12 +117,13 @@ inline static int sem_trywait(sem_t* s) { * * @param s Address of the according sem_t structure * @param ms Timeout in milliseconds - * @return + * @return * - 0 on success * - -EINVAL on invalid argument * - -ETIME on timer expired */ -inline static int sem_wait(sem_t* s, uint32_t ms) { +inline static int sem_wait(sem_t* s, uint32_t ms) +{ task_t* curr_task = per_core(current_task); if (BUILTIN_EXPECT(!s, 0)) @@ -132,8 +136,8 @@ next_try1: s->value--; spinlock_irqsave_unlock(&s->lock); } else { - s->queue[s->pos] = curr_task->id; - s->pos = (s->pos + 1) % MAX_TASKS; + s->queue[s->wpos] = curr_task->id; + s->wpos = (s->wpos + 1) % MAX_TASKS; block_current_task(); spinlock_irqsave_unlock(&s->lock); reschedule(); @@ -157,8 +161,8 @@ next_try2: spinlock_irqsave_unlock(&s->lock); goto timeout; } - s->queue[s->pos] = curr_task->id; - s->pos = (s->pos + 1) % MAX_TASKS; + s->queue[s->wpos] = curr_task->id; + s->wpos = (s->wpos + 1) % MAX_TASKS; set_timer(deadline); spinlock_irqsave_unlock(&s->lock); reschedule(); @@ -181,28 +185,23 @@ timeout: return 0; } -/** @brief Give back resource +/** @brief Give back resource * @return * - 0 on success * - -EINVAL on invalid argument */ -inline static int sem_post(sem_t* s) { - unsigned int k, i; - +inline static int sem_post(sem_t* s) +{ if (BUILTIN_EXPECT(!s, 0)) return -EINVAL; spinlock_irqsave_lock(&s->lock); s->value++; - i = s->pos; - for(k=0; kqueue[i] < MAX_TASKS) { - wakeup_task(s->queue[i]); - s->queue[i] = MAX_TASKS; - break; - } - i = (i + 1) % MAX_TASKS; + if (s->queue[s->rpos] < MAX_TASKS) { + wakeup_task(s->queue[s->rpos]); + s->queue[s->rpos] = MAX_TASKS; + s->rpos = (s->rpos + 1) % MAX_TASKS; } spinlock_irqsave_unlock(&s->lock); diff --git a/include/hermit/semaphore_types.h b/include/hermit/semaphore_types.h index 0a511493c..b97e6694f 100644 --- a/include/hermit/semaphore_types.h +++ b/include/hermit/semaphore_types.h @@ -46,8 +46,10 @@ typedef struct sem { unsigned int value; /// Queue of waiting tasks tid_t queue[MAX_TASKS]; - /// Position in queue - unsigned int pos; + /// Position in queue to add a task + unsigned int wpos; + /// Position in queue to get a task + unsigned int rpos; /// Access lock spinlock_irqsave_t lock; } sem_t; diff --git a/mm/vma.c b/mm/vma.c index 4c99cec1d..e55f0356f 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -91,6 +91,8 @@ size_t vma_alloc(size_t size, uint32_t flags) size_t base = VMA_MIN; size_t limit = VMA_MAX; + size = PAGE_CEIL(size); + spinlock_irqsave_lock(lock); // first fit search for free memory area @@ -237,7 +239,7 @@ int vma_add(size_t start, size_t end, uint32_t flags) if (pred && (pred->end == start) && (pred->flags == flags)) { pred->end = end; // resize VMA - LOG_DEBUG("vma_alloc: resize vma, start 0x%zx, pred->start 0x%zx, pred->end 0x%zx\n", start, pred->start, pred->end); + LOG_DEBUG("vma_add: resize vma, start 0x%zx, pred->start 0x%zx, pred->end 0x%zx\n", start, pred->start, pred->end); } else { // insert new VMA vma_t* new = kmalloc(sizeof(vma_t)); @@ -251,9 +253,11 @@ int vma_add(size_t start, size_t end, uint32_t flags) new->flags = flags; new->next = succ; new->prev = pred; + LOG_DEBUG("vma_add: create new vma, new->start 0x%zx, new->end 0x%zx\n", new->start, new->end); if (succ) succ->prev = new; + if (pred) pred->next = new; else @@ -266,21 +270,22 @@ fail: return ret; } +static void print_vma(vma_t *vma) +{ + while (vma) { + LOG_INFO("0x%lx - 0x%lx: size=0x%x, flags=%c%c%c%s\n", vma->start, vma->end, vma->end - vma->start, + (vma->flags & VMA_READ) ? 'r' : '-', + (vma->flags & VMA_WRITE) ? 'w' : '-', + (vma->flags & VMA_EXECUTE) ? 'x' : '-', + (vma->flags & VMA_CACHEABLE) ? "" : " (uncached)"); + vma = vma->next; + } +} + void vma_dump(void) { - void print_vma(vma_t *vma) { - while (vma) { - LOG_INFO("0x%lx - 0x%lx: size=0x%x, flags=%c%c%c%s\n", vma->start, vma->end, vma->end - vma->start, - (vma->flags & VMA_READ) ? 'r' : '-', - (vma->flags & VMA_WRITE) ? 'w' : '-', - (vma->flags & VMA_EXECUTE) ? 'x' : '-', - (vma->flags & VMA_CACHEABLE) ? "" : " (uncached)"); - vma = vma->next; - } - } - LOG_INFO("VMAs:\n"); spinlock_irqsave_lock(&hermit_mm_lock); - print_vma(&vma_boot); + print_vma(vma_list); spinlock_irqsave_unlock(&hermit_mm_lock); } diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 63d957156..4e77645ef 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -5,7 +5,7 @@ include(../cmake/HermitCore-Paths.cmake) add_compile_options(-std=c99) -add_executable(proxy proxy.c uhyve.c uhyve-ibv.c uhyve-net.c) +add_executable(proxy proxy.c uhyve-ibv.c utils.c uhyve.c uhyve-net.c) target_compile_options(proxy PUBLIC -pthread) target_link_libraries(proxy pthread ibverbs) diff --git a/tools/proxy.c b/tools/proxy.c index 951917d6d..ca5a6471e 100644 --- a/tools/proxy.c +++ b/tools/proxy.c @@ -129,33 +129,13 @@ static void exit_handler(int sig) static char* get_append_string(void) { - char line[2048]; - char* match; - char* point; + uint32_t freq = get_cpufreq(); + if (freq == 0) + return "-freq0 -proxy"; - FILE* fp = fopen("/proc/cpuinfo", "r"); - if (!fp) - return "-freq0"; + snprintf(cmdline, MAX_PATH, "\"-freq%u -proxy\"", freq); - while(fgets(line, 2048, fp)) { - if ((match = strstr(line, "cpu MHz")) == NULL) - continue; - - // scan strinf for the next number - for(; (*match < 0x30) || (*match > 0x39); match++) - ; - - for(point = match; ((*point != '.') && (*point != '\0')); point++) - ; - *point = '\0'; - - snprintf(cmdline, MAX_PATH, "\"-freq%s -proxy\"", match); - fclose(fp); - - return cmdline; - } - - return "-freq0"; + return cmdline; } static int env_init(char *path) diff --git a/tools/proxy.h b/tools/proxy.h index e82def4a8..8cfd8caf3 100644 --- a/tools/proxy.h +++ b/tools/proxy.h @@ -28,6 +28,12 @@ #ifndef __PROXY_H__ #define __PROXY_H__ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include #include #define HERMIT_ELFOSABI 0x42 @@ -42,4 +48,8 @@ int uhyve_init(char *path); int uhyve_loop(void); +// define some helper functions +uint32_t get_cpufreq(void); +ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset); + #endif diff --git a/tools/uhyve.c b/tools/uhyve.c index 65750f516..e2a316f55 100644 --- a/tools/uhyve.c +++ b/tools/uhyve.c @@ -32,7 +32,7 @@ * remove memory limit */ -#define _GNU_SOURCE + #define _GNU_SOURCE #include #include @@ -291,75 +291,6 @@ static void uhyve_atexit(void) close_fd(&kvm); } -static uint32_t get_cpufreq(void) -{ - char line[128]; - uint32_t freq = 0; - char* match; - - FILE* fp = fopen("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", "r"); - if (fp != NULL) { - if (fgets(line, sizeof(line), fp) != NULL) { - // cpuinfo_max_freq is in kHz - freq = (uint32_t) atoi(line) / 1000; - } - - fclose(fp); - } else if( (fp = fopen("/proc/cpuinfo", "r")) ) { - // Resorting to /proc/cpuinfo, however on most systems this will only - // return the current frequency that might change over time. - // Currently only needed when running inside a VM - - // read until we find the line indicating cpu frequency - while(fgets(line, sizeof(line), fp) != NULL) { - match = strstr(line, "cpu MHz"); - - if(match != NULL) { - // advance pointer to beginning of number - while( ((*match < '0') || (*match > '9')) && (*match != '\0') ) - match++; - - freq = (uint32_t) atoi(match); - break; - } - } - - fclose(fp); - } - - return freq; -} - -static ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset) -{ - ssize_t total = 0; - char *p = buf; - - if (count > SSIZE_MAX) { - errno = E2BIG; - return -1; - } - - while (count > 0) { - ssize_t nr; - - nr = pread(fd, p, count, offset); - if (nr == 0) - return total; - else if (nr == -1 && errno == EINTR) - continue; - else if (nr == -1) - return -1; - - count -= nr; - total += nr; - p += nr; - offset += nr; - } - - return total; -} - static int load_kernel(uint8_t* mem, char* path) { Elf64_Ehdr hdr; @@ -1289,9 +1220,21 @@ static int vcpu_init(void) kvm_ioctl(vcpufd, KVM_SET_XSAVE, &xsave); kvm_ioctl(vcpufd, KVM_SET_VCPU_EVENTS, &events); } else { + struct { + struct kvm_msrs info; + struct kvm_msr_entry entries[MAX_MSR_ENTRIES]; + } msr_data; + struct kvm_msr_entry *msrs = msr_data.entries; + // be sure that the multiprocessor is runable kvm_ioctl(vcpufd, KVM_SET_MP_STATE, &mp_state); + // enable fast string operations + msrs[0].index = MSR_IA32_MISC_ENABLE; + msrs[0].data = 1; + msr_data.info.nmsrs = 1; + kvm_ioctl(vcpufd, KVM_SET_MSRS, &msr_data); + /* Setup registers and memory. */ setup_system(vcpufd, guest_mem, cpuid); kvm_ioctl(vcpufd, KVM_SET_REGS, ®s); diff --git a/tools/utils.c b/tools/utils.c new file mode 100644 index 000000000..043ff2384 --- /dev/null +++ b/tools/utils.c @@ -0,0 +1,171 @@ +/* +* Copyright (c) 2017, Stefan Lankes, RWTH Aachen University +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* * Neither the name of the University nor the names of its contributors +* may be used to endorse or promote products derived from this +* software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include + +#include "proxy.h" + +inline static void __cpuid(uint32_t code, uint32_t* a, uint32_t* b, uint32_t* c, uint32_t* d) +{ + __asm volatile ("cpuid" : "=a"(*a), "=b"(*b), "=c"(*c), "=d"(*d) : "0"(code), "2"(*c)); +} + +// Try to determine the frequency from the CPU brand. +// Code is derived from the manual "Intel Processor +// Identification and the CPUID Instruction". +static uint32_t get_frequency_from_brand(void) +{ + char cpu_brand[4*3*sizeof(uint32_t)+1] = {[0 ... 4*3*sizeof(uint32_t)] = 0}; + uint32_t* bint = (uint32_t*) cpu_brand; + uint32_t index, multiplier = 0; + uint32_t cpu_freq = 0; + uint32_t extended; + + __cpuid(0x80000000, &extended, bint+1, bint+2, bint+3); + if (extended < 0x80000004) + return 0; + + __cpuid(0x80000002, bint+0, bint+1, bint+2, bint+3); + __cpuid(0x80000003, bint+4, bint+5, bint+6, bint+7); + __cpuid(0x80000004, bint+8, bint+9, bint+10, bint+11); + + for(index=0; index 0) { + uint32_t freq; + + // Compute frequency (in MHz) from brand string + if (cpu_brand[index-3] == '.') { // If format is “x.xx” + freq = (uint32_t)(cpu_brand[index-4] - '0') * multiplier; + freq += (uint32_t)(cpu_brand[index-2] - '0') * (multiplier / 10); + freq += (uint32_t)(cpu_brand[index-1] - '0') * (multiplier / 100); + } else { // If format is xxxx + freq = (uint32_t)(cpu_brand[index-4] - '0') * 1000; + freq += (uint32_t)(cpu_brand[index-3] - '0') * 100; + freq += (uint32_t)(cpu_brand[index-2] - '0') * 10; + freq += (uint32_t)(cpu_brand[index-1] - '0'); + freq *= multiplier; + } + + return freq; + } + } + + return 0; +} + +uint32_t get_cpufreq(void) +{ + char line[128]; + uint32_t freq = 0; + char* match; + + freq = get_frequency_from_brand(); + if (freq > 0) + return freq; + + // TODO: fallback solution, on some systems is cpuinfo_max_freq the turbo frequency + // => wrong value + FILE* fp = fopen("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", "r"); + if (fp != NULL) { + if (fgets(line, sizeof(line), fp) != NULL) { + // cpuinfo_max_freq is in kHz + freq = (uint32_t) atoi(line) / 1000; + } + + fclose(fp); + } else if( (fp = fopen("/proc/cpuinfo", "r")) ) { + // Resorting to /proc/cpuinfo, however on most systems this will only + // return the current frequency that might change over time. + // Currently only needed when running inside a VM + + // read until we find the line indicating cpu frequency + while(fgets(line, sizeof(line), fp) != NULL) { + match = strstr(line, "cpu MHz"); + + if(match != NULL) { + // advance pointer to beginning of number + while( ((*match < '0') || (*match > '9')) && (*match != '\0') ) + match++; + + freq = (uint32_t) atoi(match); + break; + } + } + + fclose(fp); + } + + return freq; +} + +ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset) +{ + ssize_t total = 0; + char *p = buf; + + if (count > SSIZE_MAX) { + errno = E2BIG; + return -1; + } + + while (count > 0) { + ssize_t nr; + + nr = pread(fd, p, count, offset); + if (nr == 0) + return total; + else if (nr == -1 && errno == EINTR) + continue; + else if (nr == -1) + return -1; + + count -= nr; + total += nr; + p += nr; + offset += nr; + } + + return total; +}