mirror of
https://github.com/hermitcore/libhermit.git
synced 2025-03-09 00:00:03 +01:00
add draft to integrate a hypervisor directly in the proxy
- this reduce the overhead because the boot time is smaller in comparsion to qemu - furthermore, a more direct communication via VM_EXISTS is possible - the current version doesn't support a network interface The hypervisor based on KVM and is called uhyve. You could test the hypervisor with following command: HERMIT_ISLE=uhyve HERMIT_VERBOSE=1 tools/proxy usr/tests/hello
This commit is contained in:
parent
82eeb6f076
commit
5c648f22c1
12 changed files with 978 additions and 52 deletions
|
@ -205,6 +205,7 @@ typedef struct {
|
|||
size_t ss_size; /* Stack size. */
|
||||
} stack_t;
|
||||
|
||||
const int32_t is_uhyve(void);
|
||||
const int32_t is_single_kernel(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -69,6 +69,7 @@ align 4
|
|||
global mb_info
|
||||
global hbmem_base
|
||||
global hbmem_size
|
||||
global uhyve
|
||||
base dq 0
|
||||
limit dq 0
|
||||
cpu_freq dd 0
|
||||
|
@ -92,6 +93,7 @@ align 4
|
|||
mb_info dq 0
|
||||
hbmem_base dq 0
|
||||
hbmem_size dq 0
|
||||
uhyve dd 0
|
||||
|
||||
; Bootstrap page tables are used during the initialization.
|
||||
align 4096
|
||||
|
@ -666,6 +668,12 @@ Lgo3:
|
|||
add rsp, 16
|
||||
iretq
|
||||
|
||||
global is_uhyve
|
||||
align 64
|
||||
is_uhyve
|
||||
mov eax, DWORD [uhyve]
|
||||
ret
|
||||
|
||||
global is_single_kernel
|
||||
align 64
|
||||
is_single_kernel:
|
||||
|
|
|
@ -154,7 +154,7 @@ int pci_get_device_info(uint32_t vendor_id, uint32_t device_id, pci_info_t* info
|
|||
if (!info)
|
||||
return -EINVAL;
|
||||
|
||||
if (!mechanism)
|
||||
if (!mechanism && !is_uhyve())
|
||||
pci_init();
|
||||
|
||||
for (bus = 0; bus < MAX_BUS; bus++) {
|
||||
|
|
|
@ -230,6 +230,8 @@ extern const void kernel_start;
|
|||
|
||||
int uart_early_init(char* cmdline)
|
||||
{
|
||||
if (is_uhyve())
|
||||
return 0;
|
||||
#if 1
|
||||
// default value of our QEMU configuration
|
||||
iobase = 0xc110;
|
||||
|
@ -275,6 +277,9 @@ int uart_early_init(char* cmdline)
|
|||
|
||||
int uart_init(void)
|
||||
{
|
||||
if (is_uhyve())
|
||||
return 0;
|
||||
|
||||
#ifdef CONFIG_PCI
|
||||
pci_info_t pci_info;
|
||||
uint32_t bar = 0;
|
||||
|
|
|
@ -56,6 +56,13 @@ extern "C" {
|
|||
|
||||
#define DYNAMIC_TICKS
|
||||
|
||||
#define UHYVE_PORT_WRITE 0x499
|
||||
#define UHYVE_PORT_OPEN 0x500
|
||||
#define UHYVE_PORT_CLOSE 0x501
|
||||
#define UHYVE_PORT_READ 0x502
|
||||
#define UHYVE_PORT_EXIT 0x503
|
||||
#define UHYVE_PORT_LSEEK 0x504
|
||||
|
||||
#define BUILTIN_EXPECT(exp, b) __builtin_expect((exp), (b))
|
||||
//#define BUILTIN_EXPECT(exp, b) (exp)
|
||||
#define NORETURN __attribute__((noreturn))
|
||||
|
|
|
@ -177,6 +177,9 @@ static int init_netifs(void)
|
|||
LOG_INFO("TCP/IP initialized.\n");
|
||||
sys_sem_free(&sem);
|
||||
|
||||
if (is_uhyve())
|
||||
return -ENODEV;
|
||||
|
||||
if (!is_single_kernel())
|
||||
{
|
||||
/* Set network address variables */
|
||||
|
@ -403,6 +406,17 @@ static int initd(void* arg)
|
|||
// initialize network
|
||||
init_netifs();
|
||||
|
||||
if (is_uhyve())
|
||||
{
|
||||
char* dummy[] = {"app_name", NULL};
|
||||
|
||||
LOG_INFO("Boot time: %d ms\n", (get_clock_tick() * 1000) / TIMER_FREQ);
|
||||
// call user code
|
||||
libc_start(1, dummy, NULL); //argc, argv, environ);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (is_single_kernel()) {
|
||||
char* dummy[] = {"app_name", NULL};
|
||||
|
|
119
kernel/syscall.c
119
kernel/syscall.c
|
@ -37,6 +37,7 @@
|
|||
#include <hermit/memory.h>
|
||||
#include <hermit/signal.h>
|
||||
#include <hermit/logging.h>
|
||||
#include <asm/io.h>
|
||||
#include <sys/poll.h>
|
||||
|
||||
#include <lwip/sockets.h>
|
||||
|
@ -89,24 +90,28 @@ typedef struct {
|
|||
/** @brief To be called by the systemcall to exit tasks */
|
||||
void NORETURN sys_exit(int arg)
|
||||
{
|
||||
sys_exit_t sysargs = {__NR_exit, arg};
|
||||
|
||||
spinlock_irqsave_lock(&lwip_lock);
|
||||
if (libc_sd >= 0)
|
||||
{
|
||||
int s = libc_sd;
|
||||
|
||||
lwip_write(s, &sysargs, sizeof(sysargs));
|
||||
libc_sd = -1;
|
||||
|
||||
spinlock_irqsave_unlock(&lwip_lock);
|
||||
|
||||
// switch to LwIP thread
|
||||
reschedule();
|
||||
|
||||
lwip_close(s);
|
||||
if (is_uhyve()) {
|
||||
outportl(UHYVE_PORT_EXIT, (unsigned) (size_t) &arg);
|
||||
} else {
|
||||
spinlock_irqsave_unlock(&lwip_lock);
|
||||
sys_exit_t sysargs = {__NR_exit, arg};
|
||||
|
||||
spinlock_irqsave_lock(&lwip_lock);
|
||||
if (libc_sd >= 0)
|
||||
{
|
||||
int s = libc_sd;
|
||||
|
||||
lwip_write(s, &sysargs, sizeof(sysargs));
|
||||
libc_sd = -1;
|
||||
|
||||
spinlock_irqsave_unlock(&lwip_lock);
|
||||
|
||||
// switch to LwIP thread
|
||||
reschedule();
|
||||
|
||||
lwip_close(s);
|
||||
} else {
|
||||
spinlock_irqsave_unlock(&lwip_lock);
|
||||
}
|
||||
}
|
||||
|
||||
do_exit(arg);
|
||||
|
@ -118,8 +123,23 @@ typedef struct {
|
|||
size_t len;
|
||||
} __attribute__((packed)) sys_read_t;
|
||||
|
||||
typedef struct {
|
||||
int fd;
|
||||
char* buf;
|
||||
size_t len;
|
||||
ssize_t ret;
|
||||
} __attribute__((packed)) uhyve_read_t;
|
||||
|
||||
ssize_t sys_read(int fd, char* buf, size_t len)
|
||||
{
|
||||
if (is_uhyve()) {
|
||||
uhyve_read_t uhyve_args = {fd, (char*) virt_to_phys((size_t) buf), len, -1};
|
||||
|
||||
outportl(UHYVE_PORT_READ, (unsigned)virt_to_phys((size_t)&uhyve_args));
|
||||
|
||||
return uhyve_args.ret;
|
||||
}
|
||||
|
||||
sys_read_t sysargs = {__NR_read, fd, len};
|
||||
ssize_t j, ret;
|
||||
int s;
|
||||
|
@ -175,15 +195,29 @@ typedef struct {
|
|||
size_t len;
|
||||
} __attribute__((packed)) sys_write_t;
|
||||
|
||||
typedef struct {
|
||||
int fd;
|
||||
const char* buf;
|
||||
size_t len;
|
||||
} __attribute__((packed)) uhyve_write_t;
|
||||
|
||||
ssize_t sys_write(int fd, const char* buf, size_t len)
|
||||
{
|
||||
ssize_t i, ret;
|
||||
sys_write_t sysargs = {__NR_write, fd, len};
|
||||
int s;
|
||||
|
||||
if (BUILTIN_EXPECT(!buf, 0))
|
||||
return -1;
|
||||
|
||||
if (is_uhyve()) {
|
||||
uhyve_write_t uhyve_args = {fd, (const char*) virt_to_phys((size_t) buf), len};
|
||||
|
||||
outportl(UHYVE_PORT_WRITE, (unsigned)virt_to_phys((size_t)&uhyve_args));
|
||||
|
||||
return uhyve_args.len;
|
||||
}
|
||||
|
||||
ssize_t i, ret;
|
||||
int s;
|
||||
sys_write_t sysargs = {__NR_write, fd, len};
|
||||
|
||||
// do we have an LwIP file descriptor?
|
||||
if (fd & LWIP_FD_BIT) {
|
||||
ret = lwip_write(fd & ~LWIP_FD_BIT, buf, len);
|
||||
|
@ -273,8 +307,24 @@ ssize_t sys_sbrk(ssize_t incr)
|
|||
return ret;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
const char* name;
|
||||
int flags;
|
||||
int mode;
|
||||
int ret;
|
||||
} __attribute__((packed)) uhyve_open_t;
|
||||
|
||||
int sys_open(const char* name, int flags, int mode)
|
||||
{
|
||||
if (is_uhyve()) {
|
||||
uhyve_open_t uhyve_open = {(const char*)virt_to_phys((size_t)name), flags, mode, -1};
|
||||
|
||||
kprintf("name %s, %p, 0x%zx\n", name, name, uhyve_open.name);
|
||||
outportl(UHYVE_PORT_OPEN, (unsigned)virt_to_phys((size_t) &uhyve_open));
|
||||
|
||||
return uhyve_open.ret;
|
||||
}
|
||||
|
||||
int s, i, ret, sysnr = __NR_open;
|
||||
size_t len;
|
||||
|
||||
|
@ -331,8 +381,21 @@ typedef struct {
|
|||
int fd;
|
||||
} __attribute__((packed)) sys_close_t;
|
||||
|
||||
typedef struct {
|
||||
int fd;
|
||||
int ret;
|
||||
} __attribute__((packed)) uhyve_close_t;
|
||||
|
||||
int sys_close(int fd)
|
||||
{
|
||||
if (is_uhyve()) {
|
||||
uhyve_close_t uhyve_close = {fd, -1};
|
||||
|
||||
outportl(UHYVE_PORT_CLOSE, (unsigned)virt_to_phys((size_t) &uhyve_close));
|
||||
|
||||
return uhyve_close.ret;
|
||||
}
|
||||
|
||||
int ret, s;
|
||||
sys_close_t sysargs = {__NR_close, fd};
|
||||
|
||||
|
@ -449,8 +512,22 @@ typedef struct {
|
|||
int whence;
|
||||
} __attribute__((packed)) sys_lseek_t;
|
||||
|
||||
typedef struct {
|
||||
int fd;
|
||||
off_t offset;
|
||||
int whence;
|
||||
} __attribute__((packed)) uhyve_lseek_t;
|
||||
|
||||
off_t sys_lseek(int fd, off_t offset, int whence)
|
||||
{
|
||||
if (is_uhyve()) {
|
||||
uhyve_lseek_t uhyve_lseek = { fd, offset, whence };
|
||||
|
||||
outportl(UHYVE_PORT_LSEEK, (unsigned)virt_to_phys((size_t) &uhyve_lseek));
|
||||
|
||||
return uhyve_lseek.offset;
|
||||
}
|
||||
|
||||
off_t off;
|
||||
sys_lseek_t sysargs = {__NR_lseek, fd, offset, whence};
|
||||
int s;
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
MAKE = make
|
||||
CC = gcc
|
||||
CP = cp
|
||||
CFLAGS = -O2 -Wall -std=gnu99 $(ARCH_OPT)
|
||||
PROXYFILES = proxy init.sh $(shell find ../usr/tests ../usr/benchmarks ../usr/openmpbench -type f -executable)
|
||||
|
||||
|
@ -13,13 +14,13 @@ endif
|
|||
# other implicit rules
|
||||
%.o : %.c
|
||||
@echo [CC] $@
|
||||
$Q$(CC) -c $(CFLAGS) -o $@ $<
|
||||
$Q$(CC) -c $(CFLAGS) -pthread -o $@ $<
|
||||
|
||||
all: proxy
|
||||
|
||||
proxy: proxy.o
|
||||
proxy: proxy.o uhyve.o
|
||||
@echo [LD] $@
|
||||
$Q$(CC) $(CFLAGS) -o $@ $<
|
||||
$Q$(CC) $(CFLAGS) -pthread -o $@ $< uhyve.o
|
||||
|
||||
clean:
|
||||
@echo Cleaning tools
|
||||
|
|
|
@ -47,6 +47,8 @@
|
|||
#include <sys/ioctl.h>
|
||||
#include <net/if.h>
|
||||
|
||||
#include "proxy.h"
|
||||
|
||||
#define MAX_PATH 255
|
||||
#define MAX_ARGS 1024
|
||||
#define INADDR(a, b, c, d) (struct in_addr) { .s_addr = ((((((d) << 8) | (c)) << 8) | (b)) << 8) | (a) }
|
||||
|
@ -54,14 +56,6 @@
|
|||
#define HERMIT_PORT 0x494E
|
||||
#define HERMIT_IP(isle) INADDR(192, 168, 28, isle + 2)
|
||||
#define HERMIT_MAGIC 0x7E317
|
||||
#define HERMIT_ELFOSABI 0x42
|
||||
|
||||
#define __HERMIT_exit 0
|
||||
#define __HERMIT_write 1
|
||||
#define __HERMIT_open 2
|
||||
#define __HERMIT_close 3
|
||||
#define __HERMIT_read 4
|
||||
#define __HERMIT_lseek 5
|
||||
|
||||
#define EVENT_SIZE (sizeof (struct inotify_event))
|
||||
#define BUF_LEN (1024 * (EVENT_SIZE + 16))
|
||||
|
@ -80,24 +74,27 @@ static void stop_hermit(void);
|
|||
static void dump_log(void);
|
||||
static int init_multi(char *path);
|
||||
static int init_qemu(char *path);
|
||||
int init_uhyve(char *path);
|
||||
|
||||
static void fini_env(void)
|
||||
static void fini_qemu(void)
|
||||
{
|
||||
if (qemu) {
|
||||
int status = 0;
|
||||
int status = 0;
|
||||
|
||||
if (id) {
|
||||
kill(id, SIGINT);
|
||||
wait(&status);
|
||||
}
|
||||
if (id) {
|
||||
kill(id, SIGINT);
|
||||
wait(&status);
|
||||
}
|
||||
|
||||
dump_log();
|
||||
puts("");
|
||||
unlink(tmpname);
|
||||
} else {
|
||||
dump_log();
|
||||
stop_hermit();
|
||||
} }
|
||||
dump_log();
|
||||
puts("");
|
||||
unlink(tmpname);
|
||||
}
|
||||
|
||||
static void fini_multi(void)
|
||||
{
|
||||
dump_log();
|
||||
stop_hermit();
|
||||
}
|
||||
|
||||
static void exit_handler(int sig)
|
||||
{
|
||||
|
@ -126,7 +123,7 @@ static char* cpufreq(void)
|
|||
;
|
||||
*point = '\0';
|
||||
|
||||
snprintf(cmdline, MAX_PATH, "-freq%s", match);
|
||||
snprintf(cmdline, MAX_PATH, "-freq%s", match);
|
||||
fclose(fp);
|
||||
|
||||
return cmdline;
|
||||
|
@ -139,6 +136,7 @@ static int init_env(char *path)
|
|||
{
|
||||
char* str;
|
||||
struct sigaction sINT, sTERM;
|
||||
unsigned int uhyve = 0;
|
||||
|
||||
// define action for SIGINT
|
||||
sINT.sa_handler = exit_handler;
|
||||
|
@ -163,6 +161,11 @@ static int init_env(char *path)
|
|||
{
|
||||
if (strncmp(str, "qemu", 4) == 0) {
|
||||
qemu = 1;
|
||||
uhyve = 0;
|
||||
isle_nr = 0;
|
||||
} else if (strncmp(str, "uhyve", 5) == 0) {
|
||||
uhyve = 1;
|
||||
qemu = 0;
|
||||
isle_nr = 0;
|
||||
} else {
|
||||
isle_nr = atoi(str);
|
||||
|
@ -179,10 +182,15 @@ static int init_env(char *path)
|
|||
port = HERMIT_PORT;
|
||||
}
|
||||
|
||||
if (qemu)
|
||||
if (qemu) {
|
||||
atexit(fini_qemu);
|
||||
return init_qemu(path);
|
||||
else
|
||||
} else if (uhyve) {
|
||||
return init_uhyve(path);
|
||||
} else {
|
||||
atexit(fini_multi);
|
||||
return init_multi(path);
|
||||
}
|
||||
}
|
||||
|
||||
static int is_hermit_available(void)
|
||||
|
@ -820,7 +828,9 @@ int main(int argc, char **argv)
|
|||
struct sockaddr_in serv_name;
|
||||
|
||||
init_env(argv[1]);
|
||||
atexit(fini_env);
|
||||
|
||||
// in case of uhyve, we will never reach this point
|
||||
// => we could now establish an IP connection to HermitCore
|
||||
|
||||
#if 0
|
||||
// check if mmnif interface is available
|
||||
|
@ -831,7 +841,7 @@ int main(int argc, char **argv)
|
|||
strncpy(ethreq.ifr_name, "mmnif", IFNAMSIZ);
|
||||
|
||||
while(1) {
|
||||
/* this socket doesn't really matter, we just need a descriptor
|
||||
/* this socket doesn't really matter, we just need a descriptor
|
||||
* to perform the ioctl on */
|
||||
s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
|
||||
ioctl(s, SIOCGIFFLAGS, ðreq);
|
||||
|
|
42
tools/proxy.h
Normal file
42
tools/proxy.h
Normal file
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright (c) 2017, Stefan Lankes, RWTH Aachen University
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __PROXY_H__
|
||||
#define __PROXY_H__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define HERMIT_ELFOSABI 0x42
|
||||
|
||||
#define __HERMIT_exit 0
|
||||
#define __HERMIT_write 1
|
||||
#define __HERMIT_open 2
|
||||
#define __HERMIT_close 3
|
||||
#define __HERMIT_read 4
|
||||
#define __HERMIT_lseek 5
|
||||
|
||||
#endif
|
108
tools/uhyve-cpu.h
Normal file
108
tools/uhyve-cpu.h
Normal file
|
@ -0,0 +1,108 @@
|
|||
#ifndef __UHYVE_CPU_H__
|
||||
#define __UHYVE_CPU_H__
|
||||
|
||||
#ifndef _BITUL
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
#define _AC(X,Y) X
|
||||
#define _AT(T,X) X
|
||||
#else
|
||||
#define __AC(X,Y) (X##Y)
|
||||
#define _AC(X,Y) __AC(X,Y)
|
||||
#define _AT(T,X) ((T)(X))
|
||||
#endif
|
||||
|
||||
#define _BITUL(x) (_AC(1,UL) << (x))
|
||||
#define _BITULL(x) (_AC(1,ULL) << (x))
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* EFLAGS bits
|
||||
*/
|
||||
#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
|
||||
|
||||
/*
|
||||
* Basic CPU control in CR0
|
||||
*/
|
||||
#define X86_CR0_PE_BIT 0 /* Protection Enable */
|
||||
#define X86_CR0_PE _BITUL(X86_CR0_PE_BIT)
|
||||
#define X86_CR0_PG_BIT 31 /* Paging */
|
||||
#define X86_CR0_PG _BITUL(X86_CR0_PG_BIT)
|
||||
|
||||
/*
|
||||
* Intel CPU features in CR4
|
||||
*/
|
||||
#define X86_CR4_PAE_BIT 5 /* enable physical address extensions */
|
||||
#define X86_CR4_PAE _BITUL(X86_CR4_PAE_BIT)
|
||||
|
||||
/*
|
||||
* Intel long mode page directory/table entries
|
||||
*/
|
||||
#define X86_PDPT_P_BIT 0 /* Present */
|
||||
#define X86_PDPT_P _BITUL(X86_PDPT_P_BIT)
|
||||
#define X86_PDPT_RW_BIT 1 /* Writable */
|
||||
#define X86_PDPT_RW _BITUL(X86_PDPT_RW_BIT)
|
||||
#define X86_PDPT_PS_BIT 7 /* Page size */
|
||||
#define X86_PDPT_PS _BITUL(X86_PDPT_PS_BIT)
|
||||
|
||||
/*
|
||||
* GDT and KVM segment manipulation
|
||||
*/
|
||||
|
||||
#define GDT_DESC_OFFSET(n) ((n) * 0x8)
|
||||
|
||||
#define GDT_GET_BASE(x) ( \
|
||||
(((x) & 0xFF00000000000000) >> 32) | \
|
||||
(((x) & 0x000000FF00000000) >> 16) | \
|
||||
(((x) & 0x00000000FFFF0000) >> 16))
|
||||
|
||||
#define GDT_GET_LIMIT(x) (__u32)( \
|
||||
(((x) & 0x000F000000000000) >> 32) | \
|
||||
(((x) & 0x000000000000FFFF)))
|
||||
|
||||
/* Constructor for a conventional segment GDT (or LDT) entry */
|
||||
/* This is a macro so it can be used in initializers */
|
||||
#define GDT_ENTRY(flags, base, limit) \
|
||||
((((base) & _AC(0xff000000, ULL)) << (56-24)) | \
|
||||
(((flags) & _AC(0x0000f0ff, ULL)) << 40) | \
|
||||
(((limit) & _AC(0x000f0000, ULL)) << (48-16)) | \
|
||||
(((base) & _AC(0x00ffffff, ULL)) << 16) | \
|
||||
(((limit) & _AC(0x0000ffff, ULL))))
|
||||
|
||||
struct _kvm_segment {
|
||||
__u64 base;
|
||||
__u32 limit;
|
||||
__u16 selector;
|
||||
__u8 type;
|
||||
__u8 present, dpl, db, s, l, g, avl;
|
||||
__u8 unusable;
|
||||
__u8 padding;
|
||||
};
|
||||
|
||||
#define GDT_GET_G(x) (__u8)(((x) & 0x0080000000000000) >> 55)
|
||||
#define GDT_GET_DB(x) (__u8)(((x) & 0x0040000000000000) >> 54)
|
||||
#define GDT_GET_L(x) (__u8)(((x) & 0x0020000000000000) >> 53)
|
||||
#define GDT_GET_AVL(x) (__u8)(((x) & 0x0010000000000000) >> 52)
|
||||
#define GDT_GET_P(x) (__u8)(((x) & 0x0000800000000000) >> 47)
|
||||
#define GDT_GET_DPL(x) (__u8)(((x) & 0x0000600000000000) >> 45)
|
||||
#define GDT_GET_S(x) (__u8)(((x) & 0x0000100000000000) >> 44)
|
||||
#define GDT_GET_TYPE(x)(__u8)(((x) & 0x00000F0000000000) >> 40)
|
||||
|
||||
#define GDT_TO_KVM_SEGMENT(seg, gdt_table, sel) \
|
||||
do { \
|
||||
__u64 gdt_ent = gdt_table[sel]; \
|
||||
seg.base = GDT_GET_BASE(gdt_ent); \
|
||||
seg.limit = GDT_GET_LIMIT(gdt_ent); \
|
||||
seg.selector = sel * 8; \
|
||||
seg.type = GDT_GET_TYPE(gdt_ent); \
|
||||
seg.present = GDT_GET_P(gdt_ent); \
|
||||
seg.dpl = GDT_GET_DPL(gdt_ent); \
|
||||
seg.db = GDT_GET_DB(gdt_ent); \
|
||||
seg.s = GDT_GET_S(gdt_ent); \
|
||||
seg.l = GDT_GET_L(gdt_ent); \
|
||||
seg.g = GDT_GET_G(gdt_ent); \
|
||||
seg.avl = GDT_GET_AVL(gdt_ent); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
653
tools/uhyve.c
Normal file
653
tools/uhyve.c
Normal file
|
@ -0,0 +1,653 @@
|
|||
/* Copyright (c) 2015, IBM
|
||||
* Author(s): Dan Williams <djwillia@us.ibm.com>
|
||||
* Ricardo Koller <kollerr@us.ibm.com>
|
||||
* Copyright (c) 2017, RWTH Aachen University
|
||||
* Author(s): Stefan Lankes <slankes@eonerc.rwth-aachen.de>
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software
|
||||
* for any purpose with or without fee is hereby granted, provided
|
||||
* that the above copyright notice and this permission notice appear
|
||||
* in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
||||
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
||||
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
/* We used several existing projects as guides
|
||||
* kvmtest.c: http://lwn.net/Articles/658512/
|
||||
* lkvm: http://github.com/clearlinux/kvmtool
|
||||
*/
|
||||
|
||||
/*
|
||||
* 15.1.2017: extend original version (https://github.com/Solo5/solo5)
|
||||
* for HermitCore
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <sched.h>
|
||||
#include <signal.h>
|
||||
#include <limits.h>
|
||||
#include <assert.h>
|
||||
#include <pthread.h>
|
||||
#include <elf.h>
|
||||
#include <err.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <linux/const.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <asm/msr-index.h>
|
||||
|
||||
#include "uhyve-cpu.h"
|
||||
#include "proxy.h"
|
||||
|
||||
#define GUEST_OFFSET 0x0
|
||||
#define CPUID_FUNC_PERFMON 0x0A
|
||||
#define GUEST_PAGE_SIZE 0x200000 /* 2 MB pages in guest */
|
||||
|
||||
#define BOOT_GDT 0x1000
|
||||
#define BOOT_INFO 0x2000
|
||||
#define BOOT_PML4 0x10000
|
||||
#define BOOT_PDPTE 0x11000
|
||||
#define BOOT_PDE 0x12000
|
||||
|
||||
#define BOOT_GDT_NULL 0
|
||||
#define BOOT_GDT_CODE 1
|
||||
#define BOOT_GDT_DATA 2
|
||||
#define BOOT_GDT_MAX 3
|
||||
|
||||
#define KVM_32BIT_MAX_MEM_SIZE (1ULL << 32)
|
||||
#define KVM_32BIT_GAP_SIZE (768 << 20)
|
||||
#define KVM_32BIT_GAP_START (KVM_32BIT_MAX_MEM_SIZE - KVM_32BIT_GAP_SIZE)
|
||||
|
||||
#define UHYVE_PORT_WRITE 0x499
|
||||
#define UHYVE_PORT_OPEN 0x500
|
||||
#define UHYVE_PORT_CLOSE 0x501
|
||||
#define UHYVE_PORT_READ 0x502
|
||||
#define UHYVE_PORT_EXIT 0x503
|
||||
#define UHYVE_PORT_LSEEK 0x504
|
||||
|
||||
static int kvm = -1, vmfd = -1, vcpufd = 1;
|
||||
static uint8_t* guest_mem = NULL;
|
||||
static uint8_t* klog = NULL;
|
||||
static size_t guest_size = 0x20000000ULL;
|
||||
static uint64_t elf_entry;
|
||||
//static pthread_t vcpu_thread;
|
||||
static volatile uint8_t done = 0;
|
||||
|
||||
typedef struct {
|
||||
int fd;
|
||||
const char* buf;
|
||||
size_t len;
|
||||
} __attribute__((packed)) uhyve_write_t;
|
||||
|
||||
typedef struct {
|
||||
const char* name;
|
||||
int flags;
|
||||
int mode;
|
||||
int ret;
|
||||
} __attribute__((packed)) uhyve_open_t;
|
||||
|
||||
typedef struct {
|
||||
int fd;
|
||||
int ret;
|
||||
} __attribute__((packed)) uhyve_close_t;
|
||||
|
||||
typedef struct {
|
||||
int fd;
|
||||
char* buf;
|
||||
size_t len;
|
||||
ssize_t ret;
|
||||
} __attribute__((packed)) uhyve_read_t;
|
||||
|
||||
typedef struct {
|
||||
int fd;
|
||||
off_t offset;
|
||||
int whence;
|
||||
} __attribute__((packed)) uhyve_lseek_t;
|
||||
|
||||
static void uhyve_exit(void)
|
||||
{
|
||||
char* str = getenv("HERMIT_VERBOSE");
|
||||
|
||||
if (done == 0) {
|
||||
done = 1;
|
||||
//pthread_kill(vcpu_thread, SIGINT);
|
||||
}
|
||||
|
||||
if (klog && str && (strcmp(str, "0") != 0))
|
||||
{
|
||||
puts("\nDump kernel log:");
|
||||
puts("================\n");
|
||||
printf("%s\n", klog);
|
||||
}
|
||||
|
||||
if (vcpufd != -1)
|
||||
close(vcpufd);
|
||||
if (vmfd != -1)
|
||||
close(vmfd);
|
||||
if (kvm != -1)
|
||||
close(kvm);
|
||||
}
|
||||
|
||||
static uint32_t get_cpufreq(void)
|
||||
{
|
||||
#if 1
|
||||
char line[2048];
|
||||
uint32_t freq = 0;
|
||||
|
||||
FILE* fp = fopen("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", "r");
|
||||
if (!fp) {
|
||||
perror("Unable to open /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq\n");
|
||||
return freq;
|
||||
}
|
||||
|
||||
if (fgets(line, 2048, fp))
|
||||
freq = atoi(line) / 1000;
|
||||
|
||||
return freq;
|
||||
#else
|
||||
uint32_t freq = 0;
|
||||
char line[2048];
|
||||
char* match;
|
||||
char* point;
|
||||
|
||||
FILE* fp = fopen("/proc/cpuinfo", "r");
|
||||
if (!fp)
|
||||
return freq;
|
||||
|
||||
while(fgets(line, 2048, fp)) {
|
||||
if ((match = strstr(line, "cpu MHz")) == NULL)
|
||||
continue;
|
||||
|
||||
// scan strinf for the next number
|
||||
for(; (*match < 0x30) || (*match > 0x39); match++)
|
||||
;
|
||||
|
||||
for(point = match; ((*point != '.') && (*point != '\0')); point++)
|
||||
;
|
||||
*point = '\0';
|
||||
|
||||
freq = atoi(match);
|
||||
fclose(fp);
|
||||
|
||||
return freq;
|
||||
}
|
||||
|
||||
return freq;
|
||||
#endif
|
||||
}
|
||||
|
||||
static ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset)
|
||||
{
|
||||
ssize_t total = 0;
|
||||
char *p = buf;
|
||||
|
||||
if (count > SSIZE_MAX) {
|
||||
errno = E2BIG;
|
||||
return -1;
|
||||
}
|
||||
|
||||
while (count > 0) {
|
||||
ssize_t nr;
|
||||
|
||||
nr = pread(fd, p, count, offset);
|
||||
if (nr == 0)
|
||||
return total;
|
||||
else if (nr == -1 && errno == EINTR)
|
||||
continue;
|
||||
else if (nr == -1)
|
||||
return -1;
|
||||
|
||||
count -= nr;
|
||||
total += nr;
|
||||
p += nr;
|
||||
offset += nr;
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
static int load_kernel(uint8_t* mem, char* path)
|
||||
{
|
||||
Elf64_Ehdr hdr;
|
||||
Elf64_Phdr *phdr = NULL;
|
||||
size_t buflen;
|
||||
int fd, ret;
|
||||
|
||||
fd = open(path, O_RDONLY);
|
||||
if (fd == -1)
|
||||
{
|
||||
perror("Unable to open file");
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = pread_in_full(fd, &hdr, sizeof(hdr), 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
// check if the program is a HermitCore file
|
||||
if (hdr.e_ident[EI_MAG0] != ELFMAG0
|
||||
|| hdr.e_ident[EI_MAG1] != ELFMAG1
|
||||
|| hdr.e_ident[EI_MAG2] != ELFMAG2
|
||||
|| hdr.e_ident[EI_MAG3] != ELFMAG3
|
||||
|| hdr.e_ident[EI_CLASS] != ELFCLASS64
|
||||
|| hdr.e_ident[EI_OSABI] != HERMIT_ELFOSABI
|
||||
|| hdr.e_type != ET_EXEC || hdr.e_machine != EM_X86_64) {
|
||||
fprintf(stderr, "Inavlide HermitCore file!\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
elf_entry = hdr.e_entry;
|
||||
|
||||
buflen = hdr.e_phentsize * hdr.e_phnum;
|
||||
phdr = malloc(buflen);
|
||||
if (!phdr) {
|
||||
fprintf(stderr, "Not enough memory\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = pread_in_full(fd, phdr, buflen, hdr.e_phoff);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Load all segments with type "LOAD" from the file at offset
|
||||
* p_offset, and copy that into in memory.
|
||||
*/
|
||||
for (Elf64_Half ph_i = 0; ph_i < hdr.e_phnum; ph_i++)
|
||||
{
|
||||
uint64_t paddr = phdr[ph_i].p_paddr;
|
||||
size_t offset = phdr[ph_i].p_offset;
|
||||
size_t filesz = phdr[ph_i].p_filesz;
|
||||
size_t memsz = phdr[ph_i].p_memsz;
|
||||
|
||||
if (phdr[ph_i].p_type != PT_LOAD)
|
||||
continue;
|
||||
|
||||
//printf("Kernel location 0x%zx, file size 0x%zx\n", paddr, filesz);
|
||||
|
||||
ret = pread_in_full(fd, mem+paddr-GUEST_OFFSET, filesz, offset);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
memset(mem+paddr+filesz-GUEST_OFFSET, 0x00, memsz - filesz);
|
||||
if (!klog)
|
||||
klog = mem+paddr+0x5000-GUEST_OFFSET;
|
||||
|
||||
// initialize kernel
|
||||
*((uint64_t*) (mem+paddr-GUEST_OFFSET + 0x08)) = paddr; // physical start address
|
||||
*((uint64_t*) (mem+paddr-GUEST_OFFSET + 0x10)) = guest_size; // physical limit
|
||||
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x18)) = get_cpufreq();
|
||||
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x24)) = 1; // number of used cpus
|
||||
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x30)) = 0; // apicid
|
||||
*((uint64_t*) (mem+paddr-GUEST_OFFSET + 0x38)) = filesz;
|
||||
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x60)) = 1; // numa nodes
|
||||
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x94)) = 1; // announce uhyve
|
||||
}
|
||||
out:
|
||||
if (phdr)
|
||||
free(phdr);
|
||||
|
||||
close(fd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void filter_cpuid(struct kvm_cpuid2 *kvm_cpuid)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
/*
|
||||
* Filter CPUID functions that are not supported by the hypervisor.
|
||||
*/
|
||||
for (i = 0; i < kvm_cpuid->nent; i++) {
|
||||
struct kvm_cpuid_entry2 *entry = &kvm_cpuid->entries[i];
|
||||
|
||||
switch (entry->function) {
|
||||
case 1: // CPUID to define basic cpu features
|
||||
entry->ecx = entry->ecx | (1 << 31); // propagate that we are running on a hypervisor
|
||||
entry->ecx = entry->ecx & ~(1 << 21); // disable X2APIC support
|
||||
entry->edx = entry->edx | (1 << 5); // enable msr support
|
||||
break;
|
||||
case CPUID_FUNC_PERFMON:
|
||||
entry->eax = 0x00; /* disable it */
|
||||
break;
|
||||
default:
|
||||
/* Keep the CPUID function as -is */
|
||||
break;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
static void setup_system_64bit(struct kvm_sregs *sregs)
|
||||
{
|
||||
sregs->cr0 |= X86_CR0_PE;
|
||||
sregs->efer |= EFER_LME;
|
||||
}
|
||||
|
||||
|
||||
static void setup_system_page_tables(struct kvm_sregs *sregs, uint8_t *mem)
|
||||
{
|
||||
uint64_t *pml4 = (uint64_t *) (mem + BOOT_PML4);
|
||||
uint64_t *pdpte = (uint64_t *) (mem + BOOT_PDPTE);
|
||||
uint64_t *pde = (uint64_t *) (mem + BOOT_PDE);
|
||||
uint64_t paddr;
|
||||
|
||||
/*
|
||||
* For simplicity we currently use 2MB pages and only a single
|
||||
* PML4/PDPTE/PDE. Sanity check that the guest size is a multiple of the
|
||||
* page size and will fit in a single PDE (512 entries).
|
||||
*/
|
||||
assert((guest_size & (GUEST_PAGE_SIZE - 1)) == 0);
|
||||
assert(guest_size <= (GUEST_PAGE_SIZE * 512));
|
||||
|
||||
memset(pml4, 0x00, 4096);
|
||||
memset(pdpte, 0x00, 4096);
|
||||
memset(pde, 0x00, 4096);
|
||||
|
||||
*pml4 = BOOT_PDPTE | (X86_PDPT_P | X86_PDPT_RW);
|
||||
*pdpte = BOOT_PDE | (X86_PDPT_P | X86_PDPT_RW);
|
||||
for (paddr = 0; paddr < guest_size; paddr += GUEST_PAGE_SIZE, pde++)
|
||||
*pde = paddr | (X86_PDPT_P | X86_PDPT_RW | X86_PDPT_PS);
|
||||
|
||||
sregs->cr3 = BOOT_PML4;
|
||||
sregs->cr4 |= X86_CR4_PAE;
|
||||
sregs->cr0 |= X86_CR0_PG;
|
||||
}
|
||||
|
||||
static void setup_system_gdt(struct kvm_sregs *sregs,
|
||||
uint8_t *mem,
|
||||
uint64_t off)
|
||||
{
|
||||
uint64_t *gdt = (uint64_t *) (mem + off);
|
||||
struct kvm_segment data_seg, code_seg;
|
||||
|
||||
/* flags, base, limit */
|
||||
gdt[BOOT_GDT_NULL] = GDT_ENTRY(0, 0, 0);
|
||||
gdt[BOOT_GDT_CODE] = GDT_ENTRY(0xA09B, 0, 0xFFFFF);
|
||||
gdt[BOOT_GDT_DATA] = GDT_ENTRY(0xC093, 0, 0xFFFFF);
|
||||
|
||||
sregs->gdt.base = off;
|
||||
sregs->gdt.limit = (sizeof(uint64_t) * BOOT_GDT_MAX) - 1;
|
||||
|
||||
GDT_TO_KVM_SEGMENT(code_seg, gdt, BOOT_GDT_CODE);
|
||||
GDT_TO_KVM_SEGMENT(data_seg, gdt, BOOT_GDT_DATA);
|
||||
|
||||
sregs->cs = code_seg;
|
||||
sregs->ds = data_seg;
|
||||
sregs->es = data_seg;
|
||||
sregs->fs = data_seg;
|
||||
sregs->gs = data_seg;
|
||||
sregs->ss = data_seg;
|
||||
}
|
||||
|
||||
static void setup_system(int vcpufd, uint8_t *mem)
|
||||
{
|
||||
struct kvm_sregs sregs;
|
||||
int ret;
|
||||
|
||||
/* Set all cpu/mem system structures */
|
||||
ret = ioctl(vcpufd, KVM_GET_SREGS, &sregs);
|
||||
if (ret == -1)
|
||||
err(1, "KVM: ioctl (GET_SREGS) failed");
|
||||
|
||||
setup_system_gdt(&sregs, mem, BOOT_GDT);
|
||||
setup_system_page_tables(&sregs, mem);
|
||||
setup_system_64bit(&sregs);
|
||||
|
||||
ret = ioctl(vcpufd, KVM_SET_SREGS, &sregs);
|
||||
if (ret == -1)
|
||||
err(1, "KVM: ioctl (SET_SREGS) failed");
|
||||
}
|
||||
|
||||
|
||||
static void setup_cpuid(int kvm, int vcpufd)
|
||||
{
|
||||
struct kvm_cpuid2 *kvm_cpuid;
|
||||
int max_entries = 100;
|
||||
|
||||
kvm_cpuid = calloc(1, sizeof(*kvm_cpuid) + max_entries * sizeof(*kvm_cpuid->entries));
|
||||
kvm_cpuid->nent = max_entries;
|
||||
|
||||
if (ioctl(kvm, KVM_GET_SUPPORTED_CPUID, kvm_cpuid) < 0)
|
||||
err(1, "KVM: ioctl (GET_SUPPORTED_CPUID) failed");
|
||||
|
||||
filter_cpuid(kvm_cpuid);
|
||||
|
||||
if (ioctl(vcpufd, KVM_SET_CPUID2, kvm_cpuid) < 0)
|
||||
err(1, "KVM: ioctl (SET_CPUID2) failed");
|
||||
}
|
||||
|
||||
static void* vcpu_loop(struct kvm_run *run)
|
||||
{
|
||||
int ret;
|
||||
|
||||
while (!done) {
|
||||
ret = ioctl(vcpufd, KVM_RUN, NULL);
|
||||
if (ret == -1 && errno == EINTR)
|
||||
continue;
|
||||
if (ret == -1) {
|
||||
if (errno == EFAULT) {
|
||||
struct kvm_regs regs;
|
||||
|
||||
ret = ioctl(vcpufd, KVM_GET_REGS, ®s);
|
||||
if (ret == -1)
|
||||
err(1, "KVM: ioctl (GET_REGS) failed after guest fault");
|
||||
err(1, "KVM: host/guest translation fault: rip=0x%llx", regs.rip);
|
||||
} else err(1, "KVM: ioctl in vcpu_loop failed");
|
||||
}
|
||||
|
||||
/* TODO: handle requests */
|
||||
|
||||
switch (run->exit_reason) {
|
||||
case KVM_EXIT_HLT:
|
||||
fprintf(stderr, "KVM: unhandled KVM_EXIT_HLT\n");
|
||||
/* Guest has halted the CPU, this is considered as a normal exit. */
|
||||
return NULL;
|
||||
|
||||
case KVM_EXIT_MMIO:
|
||||
err(1, "KVM: unhandled KVM_EXIT_MMIO at 0x%llx", run->mmio.phys_addr);
|
||||
break;
|
||||
|
||||
case KVM_EXIT_IO:
|
||||
//printf("port 0x%x\n", run->io.port);
|
||||
switch (run->io.port) {
|
||||
case UHYVE_PORT_WRITE: {
|
||||
unsigned data = *((unsigned*)((size_t)run+run->io.data_offset));
|
||||
uhyve_write_t* uhyve_write = (uhyve_write_t*) (guest_mem+data);
|
||||
|
||||
uhyve_write->len = write(uhyve_write->fd, guest_mem+(size_t)uhyve_write->buf, uhyve_write->len);
|
||||
break;
|
||||
}
|
||||
|
||||
case UHYVE_PORT_READ: {
|
||||
unsigned data = *((unsigned*)((size_t)run+run->io.data_offset));
|
||||
uhyve_read_t* uhyve_read = (uhyve_read_t*) (guest_mem+data);
|
||||
|
||||
uhyve_read->ret = read(uhyve_read->fd, guest_mem+(size_t)uhyve_read->buf, uhyve_read->len);
|
||||
break;
|
||||
}
|
||||
|
||||
case UHYVE_PORT_EXIT: {
|
||||
unsigned data = *((unsigned*)((size_t)run+run->io.data_offset));
|
||||
|
||||
done = 1;
|
||||
exit(*(int*)(guest_mem+data));
|
||||
break;
|
||||
}
|
||||
|
||||
case UHYVE_PORT_OPEN: {
|
||||
unsigned data = *((unsigned*)((size_t)run+run->io.data_offset));
|
||||
uhyve_open_t* uhyve_open = (uhyve_open_t*) (guest_mem+data);
|
||||
|
||||
uhyve_open->ret = open((const char*)guest_mem+(size_t)uhyve_open->name, uhyve_open->flags, uhyve_open->mode);
|
||||
break;
|
||||
}
|
||||
|
||||
case UHYVE_PORT_CLOSE: {
|
||||
unsigned data = *((unsigned*)((size_t)run+run->io.data_offset));
|
||||
uhyve_close_t* uhyve_close = (uhyve_close_t*) (guest_mem+data);
|
||||
|
||||
uhyve_close->ret = close(uhyve_close->fd);
|
||||
break;
|
||||
}
|
||||
|
||||
case UHYVE_PORT_LSEEK: {
|
||||
unsigned data = *((unsigned*)((size_t)run+run->io.data_offset));
|
||||
uhyve_lseek_t* uhyve_lseek = (uhyve_lseek_t*) (guest_mem+data);
|
||||
|
||||
uhyve_lseek->offset = lseek(uhyve_lseek->fd, uhyve_lseek->offset, uhyve_lseek->whence);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
err(1, "KVM: unhandled KVM_EXIT_IO at port 0x%x, direction %d", run->io.port, run->io.direction);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case KVM_EXIT_FAIL_ENTRY:
|
||||
err(1, "KVM: entry failure: hw_entry_failure_reason=0x%llx",
|
||||
run->fail_entry.hardware_entry_failure_reason);
|
||||
break;
|
||||
|
||||
case KVM_EXIT_INTERNAL_ERROR:
|
||||
err(1, "KVM: internal error exit: suberror = 0x%x", run->internal.suberror);
|
||||
break;
|
||||
|
||||
case KVM_EXIT_SHUTDOWN:
|
||||
err(1, "KVM: receive shutdown command");
|
||||
break;
|
||||
|
||||
default:
|
||||
fprintf(stderr, "KVM: unhandled exit: exit_reason = 0x%x\n", run->exit_reason);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void* uhyve_thread(void* arg)
|
||||
{
|
||||
char* path = (char*) arg;
|
||||
int ret;
|
||||
size_t mmap_size;
|
||||
struct kvm_run *run;
|
||||
|
||||
// register routine to close the VM
|
||||
atexit(uhyve_exit);
|
||||
|
||||
char* str = getenv("HERMIT_MEM");
|
||||
if (str)
|
||||
printf("We want to use %s memory\n", str);
|
||||
|
||||
kvm = open("/dev/kvm", O_RDWR | O_CLOEXEC);
|
||||
if (kvm < 0)
|
||||
err(1, "Could not open: /dev/kvm");
|
||||
|
||||
/* Make sure we have the stable version of the API */
|
||||
ret = ioctl(kvm, KVM_GET_API_VERSION, NULL);
|
||||
if (ret < 0)
|
||||
err(1, "KVM: ioctl (GET_API_VERSION) failed");
|
||||
if (ret != 12)
|
||||
err(1, "KVM: API version is %d, uhyve requires version 12", ret);
|
||||
|
||||
vmfd = ioctl(kvm, KVM_CREATE_VM, 0);
|
||||
if (vmfd == -1)
|
||||
err(1, "KVM: unable to create VM");
|
||||
|
||||
// TODO: we have to create a gap for PCI
|
||||
assert(guest_size < KVM_32BIT_GAP_SIZE);
|
||||
|
||||
/* Allocate page-aligned guest memory. */
|
||||
guest_mem = mmap(NULL, guest_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
if (guest_mem == MAP_FAILED)
|
||||
err(1, "mmap failed");
|
||||
|
||||
ret = load_kernel(guest_mem, path);
|
||||
if (ret)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
/* Map it to the second page frame (to avoid the real-mode IDT at 0). */
|
||||
struct kvm_userspace_memory_region kvm_region = {
|
||||
.slot = 0,
|
||||
.guest_phys_addr = GUEST_OFFSET,
|
||||
.memory_size = guest_size,
|
||||
.userspace_addr = (uint64_t) guest_mem,
|
||||
};
|
||||
|
||||
ret = ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region);
|
||||
if (ret == -1)
|
||||
err(1, "KVM: set user memory failed");
|
||||
|
||||
ret = ioctl(vmfd, KVM_CREATE_IRQCHIP);
|
||||
if (ret < 0)
|
||||
err(1, "KVM_CREATE_IRQCHIP ioctl");
|
||||
|
||||
vcpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
|
||||
if (vcpufd == -1)
|
||||
err(1, "KVM: create vcpu failed");
|
||||
|
||||
/* Setup registers and memory. */
|
||||
setup_system(vcpufd, guest_mem);
|
||||
|
||||
/*
|
||||
* Initialize registers: instruction pointer for our code, addends,
|
||||
* and initial flags required by x86 architecture.
|
||||
* Arguments to the kernel main are passed using the x86_64 calling
|
||||
* convention: RDI, RSI, RDX, RCX, R8, and R9
|
||||
*/
|
||||
struct kvm_regs regs = {
|
||||
.rip = elf_entry,
|
||||
.rax = 2,
|
||||
.rbx = 2,
|
||||
.rdx = 0,
|
||||
.rflags = 0x2,
|
||||
};
|
||||
ret = ioctl(vcpufd, KVM_SET_REGS, ®s);
|
||||
if (ret == -1)
|
||||
err(1, "KVM: ioctl (SET_REGS) failed");
|
||||
|
||||
/* Map the shared kvm_run structure and following data. */
|
||||
ret = ioctl(kvm, KVM_GET_VCPU_MMAP_SIZE, NULL);
|
||||
if (ret == -1)
|
||||
err(1, "KVM: ioctl get VCPU_MMAP_SIZE failed");
|
||||
mmap_size = ret;
|
||||
if (mmap_size < sizeof(*run))
|
||||
err(1, "KVM: invalid VCPU_MMAP_SIZE: %zd", mmap_size);
|
||||
|
||||
run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpufd, 0);
|
||||
if (run == MAP_FAILED)
|
||||
err(1, "KVM: VCPU mmap failed");
|
||||
|
||||
setup_cpuid(kvm, vcpufd);
|
||||
|
||||
return vcpu_loop(run);
|
||||
}
|
||||
|
||||
int init_uhyve(char *path)
|
||||
{
|
||||
//pthread_create(&vcpu_thread, NULL, uhyve_thread, (void*)path);
|
||||
|
||||
uhyve_thread(path);
|
||||
exit(EXIT_SUCCESS);
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Reference in a new issue