diff --git a/.gitmodules b/.gitmodules index e0035903d..d74200d9c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -6,3 +6,6 @@ path = usr/libomp url = https://github.com/hermitcore/libomp_oss.git branch = hermit +[submodule "caves"] + path = caves + url = https://github.com/hermitcore/hermit-caves.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 0db5eed67..090812927 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,6 +127,9 @@ install(DIRECTORY include/hermit FILES_MATCHING PATTERN *.h) +install(FILES tools/init.sh + DESTINATION tools) + # provide custom target to only install libhermit without its runtimes which is # needed during the compilation of the cross toolchain add_custom_target(hermit-bootstrap-install @@ -150,7 +153,7 @@ add_custom_target(hermit # be relocated for installation ## HermitCore's own tools such as Qemu/KVM proxy -build_external(tools ${HERMIT_ROOT}/tools "") +build_external(caves ${HERMIT_ROOT}/caves "") if("${TARGET_ARCH}" STREQUAL "x86_64-hermit") diff --git a/caves b/caves new file mode 160000 index 000000000..155b31e13 --- /dev/null +++ b/caves @@ -0,0 +1 @@ +Subproject commit 155b31e13779b8d2446781b779bfa6a6ae46748c diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt deleted file mode 100644 index 3bf3892d7..000000000 --- a/tools/CMakeLists.txt +++ /dev/null @@ -1,53 +0,0 @@ -cmake_minimum_required(VERSION 3.7) -project(hermit_tools) - -include(CheckIncludeFiles) -include(../cmake/HermitCore-Paths.cmake) - -option(ENABLE_RDMA_MIGRATION "Migration support via RDMA" OFF) - - -add_compile_options(-std=c99) - -list(APPEND LIBS "-pthread") -set(SRC proxy.c - utils.c - uhyve.c - uhyve-net.c - uhyve-migration.c - uhyve-x86_64.c - uhyve-aarch64.c - uhyve-gdb-x86_64.c - uhyve-gdb-aarch64.c -) - -### Optional migration via RDMA -if(ENABLE_RDMA_MIGRATION) - add_definitions(-D__RDMA_MIGRATION__) - list(APPEND LIBS "-libverbs") - set(SRC ${SRC} uhyve-migration-rdma.c) -else() - remove_definitions(-D__RDMA_MIGRATION__) -endif() - -check_include_files(asm/msr-index.h HAVE_MSR_INDEX_H) - -if(HAVE_MSR_INDEX_H) -add_definitions(-DHAVE_MSR_INDEX_H=1) -endif() - -add_executable(proxy ${SRC}) - -target_compile_options(proxy PUBLIC ${LIBS}) -target_compile_options(proxy PUBLIC -DMAX_ARGC_ENVC=${MAX_ARGC_ENVC}) -target_link_libraries(proxy ${LIBS}) - -install(TARGETS proxy - DESTINATION bin) - -install(FILES init.sh - DESTINATION tools) - -# Show include files in IDE -file(GLOB_RECURSE TOOLS_INCLUDES "*.h") -add_custom_target(tools_includes_ide SOURCES ${TOOLS_INCLUDES}) diff --git a/tools/proxy.c b/tools/proxy.c deleted file mode 100644 index 408ebd846..000000000 --- a/tools/proxy.c +++ /dev/null @@ -1,1064 +0,0 @@ -/* - * Copyright (c) 2015, Stefan Lankes, RWTH Aachen University - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#define _GNU_SOURCE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "proxy.h" - -#define MAX_PATH 255 -#define MAX_ARGS 1024 -#define INADDR(a, b, c, d) (struct in_addr) { .s_addr = ((((((d) << 8) | (c)) << 8) | (b)) << 8) | (a) } - -#define HERMIT_PORT 0x494E -#define HERMIT_IP(isle) INADDR(192, 168, 28, isle + 2) -#define HERMIT_MAGIC 0x7E317 - -#define EVENT_SIZE (sizeof (struct inotify_event)) -#define BUF_LEN (1024 * (EVENT_SIZE + 16)) - -#if 0 -#define PROXY_DEBUG(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__); -#else -#define PROXY_DEBUG(fmt, ...) {} -#endif - -typedef enum { - BAREMETAL = 0, - QEMU, - UHYVE -} monitor_t; - -bool verbose = false; - -static monitor_t monitor = UHYVE; -static int sobufsize = 131072; -static unsigned int isle_nr = 0; -static unsigned int port = HERMIT_PORT; -static char pidname[] = "/tmp/hpid-XXXXXX"; -static char tmpname[] = "/tmp/hermit-XXXXXX"; -static char cmdline[MAX_PATH] = ""; - -extern char **environ; - -static void stop_hermit(void); -static void dump_log(void); -static int multi_init(char *path); -static int qemu_init(char *path); - -static void qemu_fini(void) -{ - FILE* fp = NULL; - - // try to kill qemu - if (monitor == QEMU) - fp = fopen(pidname, "r"); - if (fp) { - pid_t id = -1; - - int ret = fscanf(fp, "%d", &id); - if (ret <= 0) - fprintf(stderr, "Unable to read Qemu's pid\n"); - fclose(fp); - unlink(pidname); - - if (id >= 0) { - int ret; - - do { - ret = kill(id, SIGINT); - sched_yield(); - } while((ret < 0) && (errno == ESRCH)); - } - } - - dump_log(); - unlink(tmpname); -} - -static void multi_fini(void) -{ - dump_log(); - stop_hermit(); -} - -static void exit_handler(int sig) -{ - exit(0); -} - -static char* get_append_string(void) -{ - uint32_t freq = get_cpufreq(); - if (freq == 0) - return "-freq0 -proxy"; - - snprintf(cmdline, MAX_PATH, "\"-freq%u -proxy\"", freq); - - return cmdline; -} - -static int env_init(char *path) -{ - char* str; - struct sigaction sINT, sTERM; - - // define action for SIGINT - sINT.sa_handler = exit_handler; - sINT.sa_flags = 0; - if (sigaction(SIGINT, &sINT, NULL) < 0) - { - perror("sigaction"); - exit(1); - } - - // define action for SIGTERM - sTERM.sa_handler = exit_handler; - sTERM.sa_flags = 0; - if (sigaction(SIGTERM, &sTERM, NULL) < 0) - { - perror("sigaction"); - exit(1); - } - - str = getenv("HERMIT_ISLE"); - if (str) - { - if (strncmp(str, "qemu", 4) == 0) { - monitor = QEMU; - isle_nr = 0; - } else if (strncmp(str, "uhyve", 5) == 0) { - monitor = UHYVE; - isle_nr = 0; - } else { - isle_nr = atoi(str); - if (isle_nr > 254) - isle_nr = 0; - } - } - - str = getenv("HERMIT_PORT"); - if (str) - { - port = atoi(str); - if ((port == 0) || (port >= UINT16_MAX)) - port = HERMIT_PORT; - } - - if (monitor == QEMU) { - atexit(qemu_fini); - return qemu_init(path); - } else if (monitor == UHYVE) { - return uhyve_init(path); - } else { - atexit(multi_fini); - return multi_init(path); - } -} - -static int is_hermit_available(void) -{ - char* line = (char*) malloc(2048); - size_t n = 2048; - int ret = 0; - FILE* file; - - if (!line) { - fprintf(stderr, "Not enough memory\n"); - exit(1); - } - - if (monitor == QEMU) { - file = fopen(tmpname, "r"); - if (!file) { - PROXY_DEBUG("%s isn't available\n", tmpname); - } - } else { - char logname[MAX_PATH]; - - snprintf(logname, MAX_PATH, "/sys/hermit/isle%d/log", isle_nr); - file = fopen(logname, "r"); - } - - if (!file) - goto err; - - //PROXY_DEBUG("Open log file\n"); - - while(getline(&line, &n, file) > 0) { - if (strstr(line, "TCP server is listening.") != NULL) { - ret = 1; - break; - } - //PROXY_DEBUG("%s\n", line); - } - - fclose(file); - - err: - free(line); - return ret; -} - -// wait until HermitCore is sucessfully booted -static void wait_hermit_available(void) -{ - char buffer[BUF_LEN]; - int wd; - - if (is_hermit_available()) - return; - - int fd = inotify_init(); - if (fd < 0) { - perror( "inotify_init" ); - exit(1); - } - - if (monitor == QEMU) - wd = inotify_add_watch(fd, "/tmp", IN_MODIFY|IN_CREATE); - else - wd = inotify_add_watch(fd, "/sys/hermit", IN_MODIFY|IN_CREATE); - - if (wd < 0) { - perror("inotify_add_watch"); - exit(1); - } - - while(1) { - int length = read(fd, buffer, BUF_LEN); - - if (length < 0) { - perror("read"); - break; - } - - if (length != 0 && is_hermit_available()) - break; - } - - //printf("HermitCore is available\n"); - if (inotify_rm_watch(fd, wd) < 0) { - perror("inotify_rm_watch"); - exit(1); - } - close(fd); -} - -static int qemu_init(char *path) -{ - int kvm, i = 0; - char* str; - char loader_path[MAX_PATH]; - char hostfwd[MAX_PATH]; - char monitor_str[MAX_PATH]; - char chardev_file[MAX_PATH]; - char port_str[MAX_PATH]; - pid_t qemu_pid; - char* qemu_str = "qemu-system-x86_64"; - char* qemu_argv[] = {qemu_str, "-daemonize", "-display", "none", "-smp", "1", - "-m", "2G", "-pidfile", pidname, "-net", "nic,model=rtl8139", "-net", - hostfwd, "-chardev", chardev_file, "-device", "pci-serial,chardev=gnc0", - "-kernel", loader_path, "-initrd", path, "-append", get_append_string(), - "-no-acpi", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL}; - -#ifdef __aarch64__ - fprintf(stderr, "QEMU as hypervisor is currently not supported for aarch64\n"); - exit(1); -#endif - - str = getenv("HERMIT_CPUS"); - if (str) - qemu_argv[5] = str; - - str = getenv("HERMIT_MEM"); - if (str) - qemu_argv[7] = str; - - str = getenv("HERMIT_QEMU"); - if (str) - qemu_argv[0] = qemu_str = str; - - snprintf(hostfwd, MAX_PATH, "user,hostfwd=tcp:127.0.0.1:%u-:%u", port, port); - snprintf(monitor_str, MAX_PATH, "telnet:127.0.0.1:%d,server,nowait", port+1); - - if (mkstemp(pidname) < 0) - { - perror("mkstemp"); - exit(1); - } - - if (mkstemp(tmpname) < 0) - { - perror("mkstemp"); - exit(1); - } - snprintf(chardev_file, MAX_PATH, "file,id=gnc0,path=%s", tmpname); - - if (readlink("/proc/self/exe", loader_path, MAX_PATH) < 0) - { - perror("readlink"); - exit(1); - } - str = strstr(loader_path, "proxy"); - strncpy(str, "ldhermit.elf", MAX_PATH-strlen(loader_path)+5); - - str = getenv("HERMIT_APP_PORT"); - if (str) - { - int app_port = atoi(str); - - if (app_port > 0) { - for(; qemu_argv[i] != NULL; i++) - ; - - snprintf(port_str, MAX_PATH, "tcp:%u::%u", app_port, app_port); - - qemu_argv[i] = "-redir"; - qemu_argv[i+1] = port_str; - } - } - - str = getenv("HERMIT_KVM"); - if (str && (strcmp(str, "0") == 0)) - kvm = 0; - else - kvm = 1; - - if (kvm) - { - for(; qemu_argv[i] != NULL; i++) - ; - - qemu_argv[i] = "-machine"; - qemu_argv[i+1] = "accel=kvm"; - qemu_argv[i+2] = "-cpu"; - qemu_argv[i+3] = "host"; - } /*else { - for(; qemu_argv[i] != NULL; i++) - ; - - qemu_argv[i] = "-cpu"; - qemu_argv[i+1] = "SandyBridge"; - }*/ - - str = getenv("HERMIT_MONITOR"); - if (str && (strcmp(str, "0") != 0)) - { - for(; qemu_argv[i] != NULL; i++) - ; - - qemu_argv[i] = "-monitor"; - qemu_argv[i+1] = monitor_str; - } - - str = getenv("HERMIT_DEBUG"); - if (str && (strcmp(str, "0") != 0)) - { - for(; qemu_argv[i] != NULL; i++) - ; - - // add flag to start gdbserver on TCP port 1234 - qemu_argv[i] = "-s"; - } - - str = getenv("HERMIT_CAPTURE_NET"); - if (str && (strcmp(str, "0") != 0)) - { - for(; qemu_argv[i] != NULL; i++) - ; - - // add flags to capture the network traffic - qemu_argv[i] = "-net"; - qemu_argv[i+1] = "dump"; - } - - if (verbose) - { - printf("qemu startup command: "); - - for(i=0; qemu_argv[i] != NULL; i++) - printf("%s ", qemu_argv[i]); - printf("\n"); - fflush(stdout); - } - - qemu_pid = fork(); - if (qemu_pid == 0) - { - execvp(qemu_str, qemu_argv); - - fprintf(stderr, "Didn't find qemu\n"); - exit(1); - } else if (qemu_pid < 0) { - perror("fork"); - exit(1); - } - - PROXY_DEBUG("Create VM with pid %d\n", qemu_pid); - - // move the parent process to the end of the queue - // => child would be scheduled next - sched_yield(); - - // wait until HermitCore is sucessfully booted - wait_hermit_available(); - - PROXY_DEBUG("VM is available\n"); - - return 0; -} - -static int multi_init(char *path) -{ - int ret; - char* str; - FILE* file; - char isle_path[MAX_PATH]; - char* result; - -#ifdef __aarch64__ - fprintf(stderr, "The multi-kernel version is currently not supported for aarch64\n"); - exit(1); -#endif - - // set path to temporary file - snprintf(isle_path, MAX_PATH, "/sys/hermit/isle%d/path", isle_nr); - file = fopen(isle_path, "w"); - if (!file) { - perror("fopen"); - exit(1); - } - - fprintf(file, "%s", path); - fclose(file); - - // start application - snprintf(isle_path, MAX_PATH, "/sys/hermit/isle%d/cpus", isle_nr); - file = fopen(isle_path, "w"); - if (!file) { - perror("fopen"); - exit(1); - } - - str = getenv("HERMIT_CPUS"); - if (str) - fprintf(file, "%s", str); - else - fprintf(file, "%s", "1"); - - fclose(file); - - // check result - file = fopen(isle_path, "r"); - if (!file) { - perror("fopen"); - exit(1); - } - - result = NULL; - ret = fscanf(file, "%ms", &result); - - fclose(file); - - if (ret <= 0) { - fprintf(stderr, "Unable to check the boot process!\n"); - exit(1); - } - - if (strcmp(result, "-1") == 0) { - free(result); - fprintf(stderr, "Unable to boot cores %s\n", str ? str : "1"); - exit(1); - } - - free(result); - - // wait until HermitCore is sucessfully booted - //wait_hermit_available(); - - return 0; -} - -static void dump_log(void) -{ - FILE* file; - char line[2048]; - - if (!verbose) - return; - - if (monitor == BAREMETAL) - { - char isle_path[MAX_PATH]; - - snprintf(isle_path, MAX_PATH, "/sys/hermit/isle%d/log", isle_nr); - file = fopen(isle_path, "r"); - } else file = fopen(tmpname, "r"); - - if (!file) { - perror("fopen"); - return; - } - - puts("\nDump kernel log:"); - puts("================\n"); - - while(fgets(line, 2048, file)) { - printf("%s", line); - } - - fclose(file); -} - -static void stop_hermit(void) -{ - FILE* file; - char isle_path[MAX_PATH]; - - fflush(stdout); - fflush(stderr); - - snprintf(isle_path, MAX_PATH, "/sys/hermit/isle%d/cpus", isle_nr); - - file = fopen(isle_path, "w"); - if (!file) { - perror("fopen"); - return; - } - - fprintf(file, "-1"); - - fclose(file); -} - -/* - * in principle, HermitCore forwards basic system calls to - * this proxy, which mapped these call to Linux system calls. - */ -int handle_syscalls(int s) -{ - int sysnr; - ssize_t sret; - size_t j; - - while(1) - { - j = 0; - while(j < sizeof(sysnr)) { - sret = read(s, ((char*)&sysnr)+j, sizeof(sysnr)-j); - if (sret < 0) - goto out; - j += sret; - } - - switch(sysnr) - { - case __HERMIT_exit: { - int arg = 0; - - j = 0; - while(j < sizeof(arg)) { - sret = read(s, ((char*)&arg)+j, sizeof(arg)-j); - if (sret < 0) - goto out; - j += sret; - } - close(s); - - // already called by fini_env - //dump_log(); - //stop_hermit(); - - if (arg == -14) - fprintf(stderr, "Did HermitCore receive an exception?\n"); - exit(arg); - break; - } - case __HERMIT_write: { - int fd; - size_t len; - char* buff; - - j = 0; - while (j < sizeof(fd)) { - sret = read(s, ((char*)&fd)+j, sizeof(fd)-j); - if (sret < 0) - goto out; - j += sret; - } - - j = 0; - while (j < sizeof(len)) { - sret = read(s, ((char*)&len)+j, sizeof(len)-j); - if (sret < 0) - goto out; - j += sret; - } - - buff = malloc(len); - if (!buff) { - fprintf(stderr,"Proxy: not enough memory"); - return 1; - } - - j=0; - while(j < len) - { - sret = read(s, buff+j, len-j); - if (sret < 0) - goto out; - j += sret; - } - - if (fd > 2) { - sret = write(fd, buff, len); - j = 0; - while(j < sizeof(sret)) - { - int l = write(s, ((char*)&sret)+j, sizeof(sret)-j); - if (l < 0) - goto out; - j += l; - } - } else { - j = 0; - while(j < len) - { - sret = write(fd, buff+j, len-j); - if (sret < 0) - goto out; - j += sret; - } - } - - free(buff); - break; - } - case __HERMIT_open: { - size_t len; - char* fname; - int flags, mode, ret; - - j = 0; - while (j < sizeof(len)) - { - sret = read(s, ((char*)&len)+j, sizeof(len)-j); - if (sret < 0) - goto out; - j += sret; - } - - fname = malloc(len); - if (!fname) - goto out; - - j = 0; - while (j < len) - { - sret = read(s, fname+j, len-j); - if (sret < 0) - goto out; - j += sret; - } - - j = 0; - while (j < sizeof(flags)) - { - sret = read(s, ((char*)&flags)+j, sizeof(flags)-j); - if (sret < 0) - goto out; - j += sret; - } - - j = 0; - while (j < sizeof(mode)) - { - sret = read(s, ((char*)&mode)+j, sizeof(mode)-j); - if (sret < 0) - goto out; - j += sret; - } - - //printf("flags 0x%x, mode 0x%x\n", flags, mode); - - ret = open(fname, flags, mode); - j = 0; - while(j < sizeof(ret)) - { - sret = write(s, ((char*)&ret)+j, sizeof(ret)-j); - if (sret < 0) - goto out; - j += sret; - } - - free(fname); - break; - } - case __HERMIT_close: { - int fd, ret; - - j = 0; - while(j < sizeof(fd)) - { - sret = read(s, ((char*)&fd)+j, sizeof(fd)-j); - if (sret < 0) - goto out; - j += sret; - } - - if (fd > 2) - ret = close(fd); - else - ret = 0; - - j = 0; - while (j < sizeof(ret)) - { - sret = write(s, ((char*)&ret)+j, sizeof(ret)-j); - if (sret < 0) - goto out; - j += sret; - } - break; - } - case __HERMIT_read: { - int fd, flag; - size_t len; - ssize_t sj; - char* buff; - - j = 0; - while(j < sizeof(fd)) - { - sret = read(s, ((char*)&fd)+j, sizeof(fd)-j); - if (sret < 0) - goto out; - j += sret; - } - - j = 0; - while(j < sizeof(len)) - { - sret = read(s, ((char*)&len)+j, sizeof(len)-j); - if (sret < 0) - goto out; - j += sret; - } - - buff = malloc(len); - if (!buff) - goto out; - - sj = read(fd, buff, len); - - flag = 0; - setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (char *) &flag, sizeof(int)); - - j = 0; - while (j < sizeof(sj)) - { - sret = write(s, ((char*)&sj)+j, sizeof(sj)-j); - if (sret < 0) - goto out; - j += sret; - } - - if (sj > 0) - { - size_t i = 0; - - while (i < sj) - { - sret = write(s, buff+i, sj-i); - if (sret < 0) - goto out; - - i += sret; - } - } - - flag = 1; - setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (char *) &flag, sizeof(int)); - - free(buff); - break; - } - case __HERMIT_lseek: { - int fd, whence; - off_t offset; - - j = 0; - while (j < sizeof(fd)) - { - sret = read(s, ((char*)&fd)+j, sizeof(fd)-j); - if (sret < 0) - goto out; - j += sret; - } - - j = 0; - while (j < sizeof(offset)) - { - sret = read(s, ((char*)&offset)+j, sizeof(offset)-j); - if (sret < 0) - goto out; - j += sret; - } - - j = 0; - while (j < sizeof(whence)) - { - sret = read(s, ((char*)&whence)+j, sizeof(whence)-j); - if (sret < 0) - goto out; - j += sret; - } - - offset = lseek(fd, offset, whence); - - j = 0; - while (j < sizeof(offset)) - { - sret = write(s, ((char*)&offset)+j, sizeof(offset)-j); - if (sret < 0) - goto out; - j += sret; - } - break; - } - default: - fprintf(stderr, "Proxy: invalid syscall number %d, errno %d, ret %zd\n", sysnr, errno, sret); - close(s); - exit(1); - break; - } - } - -out: - perror("Proxy -- communication error"); - - return 1; -} - -int socket_loop(int argc, char **argv) -{ - int i, j, ret, s; - int32_t magic = HERMIT_MAGIC; - struct sockaddr_in serv_name; - -#if 0 - // check if mmnif interface is available - if (!qemu) { - struct ifreq ethreq; - - memset(ðreq, 0, sizeof(ethreq)); - strncpy(ethreq.ifr_name, "mmnif", IFNAMSIZ); - - while(1) { - /* this socket doesn't really matter, we just need a descriptor - * to perform the ioctl on */ - s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); - ioctl(s, SIOCGIFFLAGS, ðreq); - close(s); - - if (ethreq.ifr_flags & (IFF_UP|IFF_RUNNING)) - break; - } - sched_yield(); - } -#endif - - /* create a socket */ - s = socket(PF_INET, SOCK_STREAM, 0); - if (s < 0) - { - perror("Proxy: socket creation error"); - exit(1); - } - - setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *) &sobufsize, sizeof(sobufsize)); - setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *) &sobufsize, sizeof(sobufsize)); - i = 1; - setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (char *) &i, sizeof(i)); - i = 0; - setsockopt(s, SOL_SOCKET, SO_KEEPALIVE, (char *) &i, sizeof(i)); - - /* server address */ - memset((char *) &serv_name, 0x00, sizeof(serv_name)); - serv_name.sin_family = AF_INET; - if (monitor == QEMU) - serv_name.sin_addr = INADDR(127, 0, 0, 1); - else - serv_name.sin_addr = HERMIT_IP(isle_nr); - serv_name.sin_port = htons(port); - - i = 0; -retry: - ret = connect(s, (struct sockaddr*)&serv_name, sizeof(serv_name)); - if (ret < 0) - { - i++; - if (i <= 10) { - usleep(10000); - goto retry; - } - perror("Proxy -- connection error"); - close(s); - exit(1); - } - - ret = write(s, &magic, sizeof(magic)); - if (ret < 0) - goto out; - - // forward program arguments to HermitCore - // argv[0] is path of this proxy so we strip it - - argv++; - argc--; - - ret = write(s, &argc, sizeof(argc)); - if (ret < 0) - goto out; - - for(i=0; i -#include -#include - -#define HERMIT_ELFOSABI 0x42 - -#define __HERMIT_exit 0 -#define __HERMIT_write 1 -#define __HERMIT_open 2 -#define __HERMIT_close 3 -#define __HERMIT_read 4 -#define __HERMIT_lseek 5 - -int uhyve_init(char *path); -int uhyve_loop(int argc, char **argv); - -// define some helper functions -uint32_t get_cpufreq(void); -ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset); - -#endif diff --git a/tools/queue.h b/tools/queue.h deleted file mode 100644 index b8fda4265..000000000 --- a/tools/queue.h +++ /dev/null @@ -1,638 +0,0 @@ -/* $NetBSD: queue.h,v 1.68 2014/11/19 08:10:01 uebayasi Exp $ */ - -/* - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)queue.h 8.5 (Berkeley) 8/20/94 - */ - -#ifndef _UHYVE_QUEUE_H_ -#define _UHYVE_QUEUE_H_ - -/* - * This file defines five types of data structures: singly-linked lists, - * lists, simple queues, tail queues, and circular queues. - * - * A singly-linked list is headed by a single forward pointer. The - * elements are singly linked for minimum space and pointer manipulation - * overhead at the expense of O(n) removal for arbitrary elements. New - * elements can be added to the list after an existing element or at the - * head of the list. Elements being removed from the head of the list - * should use the explicit macro for this purpose for optimum - * efficiency. A singly-linked list may only be traversed in the forward - * direction. Singly-linked lists are ideal for applications with large - * datasets and few or no removals or for implementing a LIFO queue. - * - * A list is headed by a single forward pointer (or an array of forward - * pointers for a hash table header). The elements are doubly linked - * so that an arbitrary element can be removed without a need to - * traverse the list. New elements can be added to the list before - * or after an existing element or at the head of the list. A list - * may only be traversed in the forward direction. - * - * A simple queue is headed by a pair of pointers, one the head of the - * list and the other to the tail of the list. The elements are singly - * linked to save space, so elements can only be removed from the - * head of the list. New elements can be added to the list after - * an existing element, at the head of the list, or at the end of the - * list. A simple queue may only be traversed in the forward direction. - * - * A tail queue is headed by a pair of pointers, one to the head of the - * list and the other to the tail of the list. The elements are doubly - * linked so that an arbitrary element can be removed without a need to - * traverse the list. New elements can be added to the list before or - * after an existing element, at the head of the list, or at the end of - * the list. A tail queue may be traversed in either direction. - * - * A circle queue is headed by a pair of pointers, one to the head of the - * list and the other to the tail of the list. The elements are doubly - * linked so that an arbitrary element can be removed without a need to - * traverse the list. New elements can be added to the list before or after - * an existing element, at the head of the list, or at the end of the list. - * A circle queue may be traversed in either direction, but has a more - * complex end of list detection. - * - * For details on the use of these macros, see the queue(3) manual page. - */ - -/* - * Singly-linked List definitions. - */ -#define SLIST_HEAD(name, type) \ -struct name { \ - struct type *slh_first; /* first element */ \ -} - -#define SLIST_HEAD_INITIALIZER(head) \ - { NULL } - -#define SLIST_ENTRY(type) \ -struct { \ - struct type *sle_next; /* next element */ \ -} - -/* - * Singly-linked List access methods. - */ -#define SLIST_FIRST(head) ((head)->slh_first) -#define SLIST_END(head) NULL -#define SLIST_EMPTY(head) ((head)->slh_first == NULL) -#define SLIST_NEXT(elm, field) ((elm)->field.sle_next) - -#define SLIST_FOREACH(var, head, field) \ - for((var) = (head)->slh_first; \ - (var) != SLIST_END(head); \ - (var) = (var)->field.sle_next) - -#define SLIST_FOREACH_SAFE(var, head, field, tvar) \ - for ((var) = SLIST_FIRST((head)); \ - (var) != SLIST_END(head) && \ - ((tvar) = SLIST_NEXT((var), field), 1); \ - (var) = (tvar)) - -/* - * Singly-linked List functions. - */ -#define SLIST_INIT(head) do { \ - (head)->slh_first = SLIST_END(head); \ -} while (/*CONSTCOND*/0) - -#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \ - (elm)->field.sle_next = (slistelm)->field.sle_next; \ - (slistelm)->field.sle_next = (elm); \ -} while (/*CONSTCOND*/0) - -#define SLIST_INSERT_HEAD(head, elm, field) do { \ - (elm)->field.sle_next = (head)->slh_first; \ - (head)->slh_first = (elm); \ -} while (/*CONSTCOND*/0) - -#define SLIST_REMOVE_AFTER(slistelm, field) do { \ - (slistelm)->field.sle_next = \ - SLIST_NEXT(SLIST_NEXT((slistelm), field), field); \ -} while (/*CONSTCOND*/0) - -#define SLIST_REMOVE_HEAD(head, field) do { \ - (head)->slh_first = (head)->slh_first->field.sle_next; \ -} while (/*CONSTCOND*/0) - -#define SLIST_REMOVE(head, elm, type, field) do { \ - if ((head)->slh_first == (elm)) { \ - SLIST_REMOVE_HEAD((head), field); \ - } \ - else { \ - struct type *curelm = (head)->slh_first; \ - while(curelm->field.sle_next != (elm)) \ - curelm = curelm->field.sle_next; \ - curelm->field.sle_next = \ - curelm->field.sle_next->field.sle_next; \ - } \ -} while (/*CONSTCOND*/0) - - -/* - * List definitions. - */ -#define LIST_HEAD(name, type) \ -struct name { \ - struct type *lh_first; /* first element */ \ -} - -#define LIST_HEAD_INITIALIZER(head) \ - { NULL } - -#define LIST_ENTRY(type) \ -struct { \ - struct type *le_next; /* next element */ \ - struct type **le_prev; /* address of previous next element */ \ -} - -/* - * List access methods. - */ -#define LIST_FIRST(head) ((head)->lh_first) -#define LIST_END(head) NULL -#define LIST_EMPTY(head) ((head)->lh_first == LIST_END(head)) -#define LIST_NEXT(elm, field) ((elm)->field.le_next) - -#define LIST_FOREACH(var, head, field) \ - for ((var) = ((head)->lh_first); \ - (var) != LIST_END(head); \ - (var) = ((var)->field.le_next)) - -#define LIST_FOREACH_SAFE(var, head, field, tvar) \ - for ((var) = LIST_FIRST((head)); \ - (var) != LIST_END(head) && \ - ((tvar) = LIST_NEXT((var), field), 1); \ - (var) = (tvar)) - -#define LIST_MOVE(head1, head2) do { \ - LIST_INIT((head2)); \ - if (!LIST_EMPTY((head1))) { \ - (head2)->lh_first = (head1)->lh_first; \ - LIST_INIT((head1)); \ - } \ -} while (/*CONSTCOND*/0) - -/* - * List functions. - */ -#if defined(QUEUEDEBUG) -#define QUEUEDEBUG_LIST_INSERT_HEAD(head, elm, field) \ - if ((head)->lh_first && \ - (head)->lh_first->field.le_prev != &(head)->lh_first) \ - QUEUEDEBUG_ABORT("LIST_INSERT_HEAD %p %s:%d", (head), \ - __FILE__, __LINE__); -#define QUEUEDEBUG_LIST_OP(elm, field) \ - if ((elm)->field.le_next && \ - (elm)->field.le_next->field.le_prev != \ - &(elm)->field.le_next) \ - QUEUEDEBUG_ABORT("LIST_* forw %p %s:%d", (elm), \ - __FILE__, __LINE__); \ - if (*(elm)->field.le_prev != (elm)) \ - QUEUEDEBUG_ABORT("LIST_* back %p %s:%d", (elm), \ - __FILE__, __LINE__); -#define QUEUEDEBUG_LIST_POSTREMOVE(elm, field) \ - (elm)->field.le_next = (void *)1L; \ - (elm)->field.le_prev = (void *)1L; -#else -#define QUEUEDEBUG_LIST_INSERT_HEAD(head, elm, field) -#define QUEUEDEBUG_LIST_OP(elm, field) -#define QUEUEDEBUG_LIST_POSTREMOVE(elm, field) -#endif - -#define LIST_INIT(head) do { \ - (head)->lh_first = LIST_END(head); \ -} while (/*CONSTCOND*/0) - -#define LIST_INSERT_AFTER(listelm, elm, field) do { \ - QUEUEDEBUG_LIST_OP((listelm), field) \ - if (((elm)->field.le_next = (listelm)->field.le_next) != \ - LIST_END(head)) \ - (listelm)->field.le_next->field.le_prev = \ - &(elm)->field.le_next; \ - (listelm)->field.le_next = (elm); \ - (elm)->field.le_prev = &(listelm)->field.le_next; \ -} while (/*CONSTCOND*/0) - -#define LIST_INSERT_BEFORE(listelm, elm, field) do { \ - QUEUEDEBUG_LIST_OP((listelm), field) \ - (elm)->field.le_prev = (listelm)->field.le_prev; \ - (elm)->field.le_next = (listelm); \ - *(listelm)->field.le_prev = (elm); \ - (listelm)->field.le_prev = &(elm)->field.le_next; \ -} while (/*CONSTCOND*/0) - -#define LIST_INSERT_HEAD(head, elm, field) do { \ - QUEUEDEBUG_LIST_INSERT_HEAD((head), (elm), field) \ - if (((elm)->field.le_next = (head)->lh_first) != LIST_END(head))\ - (head)->lh_first->field.le_prev = &(elm)->field.le_next;\ - (head)->lh_first = (elm); \ - (elm)->field.le_prev = &(head)->lh_first; \ -} while (/*CONSTCOND*/0) - -#define LIST_REMOVE(elm, field) do { \ - QUEUEDEBUG_LIST_OP((elm), field) \ - if ((elm)->field.le_next != NULL) \ - (elm)->field.le_next->field.le_prev = \ - (elm)->field.le_prev; \ - *(elm)->field.le_prev = (elm)->field.le_next; \ - QUEUEDEBUG_LIST_POSTREMOVE((elm), field) \ -} while (/*CONSTCOND*/0) - -#define LIST_REPLACE(elm, elm2, field) do { \ - if (((elm2)->field.le_next = (elm)->field.le_next) != NULL) \ - (elm2)->field.le_next->field.le_prev = \ - &(elm2)->field.le_next; \ - (elm2)->field.le_prev = (elm)->field.le_prev; \ - *(elm2)->field.le_prev = (elm2); \ - QUEUEDEBUG_LIST_POSTREMOVE((elm), field) \ -} while (/*CONSTCOND*/0) - -/* - * Simple queue definitions. - */ -#define SIMPLEQ_HEAD(name, type) \ -struct name { \ - struct type *sqh_first; /* first element */ \ - struct type **sqh_last; /* addr of last next element */ \ -} - -#define SIMPLEQ_HEAD_INITIALIZER(head) \ - { NULL, &(head).sqh_first } - -#define SIMPLEQ_ENTRY(type) \ -struct { \ - struct type *sqe_next; /* next element */ \ -} - -/* - * Simple queue access methods. - */ -#define SIMPLEQ_FIRST(head) ((head)->sqh_first) -#define SIMPLEQ_END(head) NULL -#define SIMPLEQ_EMPTY(head) ((head)->sqh_first == SIMPLEQ_END(head)) -#define SIMPLEQ_NEXT(elm, field) ((elm)->field.sqe_next) - -#define SIMPLEQ_FOREACH(var, head, field) \ - for ((var) = ((head)->sqh_first); \ - (var) != SIMPLEQ_END(head); \ - (var) = ((var)->field.sqe_next)) - -#define SIMPLEQ_FOREACH_SAFE(var, head, field, next) \ - for ((var) = ((head)->sqh_first); \ - (var) != SIMPLEQ_END(head) && \ - ((next = ((var)->field.sqe_next)), 1); \ - (var) = (next)) - -/* - * Simple queue functions. - */ -#define SIMPLEQ_INIT(head) do { \ - (head)->sqh_first = NULL; \ - (head)->sqh_last = &(head)->sqh_first; \ -} while (/*CONSTCOND*/0) - -#define SIMPLEQ_INSERT_HEAD(head, elm, field) do { \ - if (((elm)->field.sqe_next = (head)->sqh_first) == NULL) \ - (head)->sqh_last = &(elm)->field.sqe_next; \ - (head)->sqh_first = (elm); \ -} while (/*CONSTCOND*/0) - -#define SIMPLEQ_INSERT_TAIL(head, elm, field) do { \ - (elm)->field.sqe_next = NULL; \ - *(head)->sqh_last = (elm); \ - (head)->sqh_last = &(elm)->field.sqe_next; \ -} while (/*CONSTCOND*/0) - -#define SIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do { \ - if (((elm)->field.sqe_next = (listelm)->field.sqe_next) == NULL)\ - (head)->sqh_last = &(elm)->field.sqe_next; \ - (listelm)->field.sqe_next = (elm); \ -} while (/*CONSTCOND*/0) - -#define SIMPLEQ_REMOVE_HEAD(head, field) do { \ - if (((head)->sqh_first = (head)->sqh_first->field.sqe_next) == NULL) \ - (head)->sqh_last = &(head)->sqh_first; \ -} while (/*CONSTCOND*/0) - -#define SIMPLEQ_REMOVE_AFTER(head, elm, field) do { \ - if (((elm)->field.sqe_next = (elm)->field.sqe_next->field.sqe_next) \ - == NULL) \ - (head)->sqh_last = &(elm)->field.sqe_next; \ -} while (/*CONSTCOND*/0) - -#define SIMPLEQ_REMOVE(head, elm, type, field) do { \ - if ((head)->sqh_first == (elm)) { \ - SIMPLEQ_REMOVE_HEAD((head), field); \ - } else { \ - struct type *curelm = (head)->sqh_first; \ - while (curelm->field.sqe_next != (elm)) \ - curelm = curelm->field.sqe_next; \ - if ((curelm->field.sqe_next = \ - curelm->field.sqe_next->field.sqe_next) == NULL) \ - (head)->sqh_last = &(curelm)->field.sqe_next; \ - } \ -} while (/*CONSTCOND*/0) - -#define SIMPLEQ_CONCAT(head1, head2) do { \ - if (!SIMPLEQ_EMPTY((head2))) { \ - *(head1)->sqh_last = (head2)->sqh_first; \ - (head1)->sqh_last = (head2)->sqh_last; \ - SIMPLEQ_INIT((head2)); \ - } \ -} while (/*CONSTCOND*/0) - -#define SIMPLEQ_LAST(head, type, field) \ - (SIMPLEQ_EMPTY((head)) ? \ - NULL : \ - ((struct type *)(void *) \ - ((char *)((head)->sqh_last) - offsetof(struct type, field)))) - -/* - * Tail queue definitions. - */ -#define _TAILQ_HEAD(name, type, qual) \ -struct name { \ - qual type *tqh_first; /* first element */ \ - qual type *qual *tqh_last; /* addr of last next element */ \ -} -#define TAILQ_HEAD(name, type) _TAILQ_HEAD(name, struct type,) - -#define TAILQ_HEAD_INITIALIZER(head) \ - { TAILQ_END(head), &(head).tqh_first } - -#define _TAILQ_ENTRY(type, qual) \ -struct { \ - qual type *tqe_next; /* next element */ \ - qual type *qual *tqe_prev; /* address of previous next element */\ -} -#define TAILQ_ENTRY(type) _TAILQ_ENTRY(struct type,) - -/* - * Tail queue access methods. - */ -#define TAILQ_FIRST(head) ((head)->tqh_first) -#define TAILQ_END(head) (NULL) -#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) -#define TAILQ_LAST(head, headname) \ - (*(((struct headname *)((head)->tqh_last))->tqh_last)) -#define TAILQ_PREV(elm, headname, field) \ - (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) -#define TAILQ_EMPTY(head) (TAILQ_FIRST(head) == TAILQ_END(head)) - - -#define TAILQ_FOREACH(var, head, field) \ - for ((var) = ((head)->tqh_first); \ - (var) != TAILQ_END(head); \ - (var) = ((var)->field.tqe_next)) - -#define TAILQ_FOREACH_SAFE(var, head, field, next) \ - for ((var) = ((head)->tqh_first); \ - (var) != TAILQ_END(head) && \ - ((next) = TAILQ_NEXT(var, field), 1); (var) = (next)) - -#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \ - for ((var) = (*(((struct headname *)((head)->tqh_last))->tqh_last));\ - (var) != TAILQ_END(head); \ - (var) = (*(((struct headname *)((var)->field.tqe_prev))->tqh_last))) - -#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, prev) \ - for ((var) = TAILQ_LAST((head), headname); \ - (var) != TAILQ_END(head) && \ - ((prev) = TAILQ_PREV((var), headname, field), 1); (var) = (prev)) - -/* - * Tail queue functions. - */ -#if defined(QUEUEDEBUG) -#define QUEUEDEBUG_TAILQ_INSERT_HEAD(head, elm, field) \ - if ((head)->tqh_first && \ - (head)->tqh_first->field.tqe_prev != &(head)->tqh_first) \ - QUEUEDEBUG_ABORT("TAILQ_INSERT_HEAD %p %s:%d", (head), \ - __FILE__, __LINE__); -#define QUEUEDEBUG_TAILQ_INSERT_TAIL(head, elm, field) \ - if (*(head)->tqh_last != NULL) \ - QUEUEDEBUG_ABORT("TAILQ_INSERT_TAIL %p %s:%d", (head), \ - __FILE__, __LINE__); -#define QUEUEDEBUG_TAILQ_OP(elm, field) \ - if ((elm)->field.tqe_next && \ - (elm)->field.tqe_next->field.tqe_prev != \ - &(elm)->field.tqe_next) \ - QUEUEDEBUG_ABORT("TAILQ_* forw %p %s:%d", (elm), \ - __FILE__, __LINE__); \ - if (*(elm)->field.tqe_prev != (elm)) \ - QUEUEDEBUG_ABORT("TAILQ_* back %p %s:%d", (elm), \ - __FILE__, __LINE__); -#define QUEUEDEBUG_TAILQ_PREREMOVE(head, elm, field) \ - if ((elm)->field.tqe_next == NULL && \ - (head)->tqh_last != &(elm)->field.tqe_next) \ - QUEUEDEBUG_ABORT("TAILQ_PREREMOVE head %p elm %p %s:%d",\ - (head), (elm), __FILE__, __LINE__); -#define QUEUEDEBUG_TAILQ_POSTREMOVE(elm, field) \ - (elm)->field.tqe_next = (void *)1L; \ - (elm)->field.tqe_prev = (void *)1L; -#else -#define QUEUEDEBUG_TAILQ_INSERT_HEAD(head, elm, field) -#define QUEUEDEBUG_TAILQ_INSERT_TAIL(head, elm, field) -#define QUEUEDEBUG_TAILQ_OP(elm, field) -#define QUEUEDEBUG_TAILQ_PREREMOVE(head, elm, field) -#define QUEUEDEBUG_TAILQ_POSTREMOVE(elm, field) -#endif - -#define TAILQ_INIT(head) do { \ - (head)->tqh_first = TAILQ_END(head); \ - (head)->tqh_last = &(head)->tqh_first; \ -} while (/*CONSTCOND*/0) - -#define TAILQ_INSERT_HEAD(head, elm, field) do { \ - QUEUEDEBUG_TAILQ_INSERT_HEAD((head), (elm), field) \ - if (((elm)->field.tqe_next = (head)->tqh_first) != TAILQ_END(head))\ - (head)->tqh_first->field.tqe_prev = \ - &(elm)->field.tqe_next; \ - else \ - (head)->tqh_last = &(elm)->field.tqe_next; \ - (head)->tqh_first = (elm); \ - (elm)->field.tqe_prev = &(head)->tqh_first; \ -} while (/*CONSTCOND*/0) - -#define TAILQ_INSERT_TAIL(head, elm, field) do { \ - QUEUEDEBUG_TAILQ_INSERT_TAIL((head), (elm), field) \ - (elm)->field.tqe_next = TAILQ_END(head); \ - (elm)->field.tqe_prev = (head)->tqh_last; \ - *(head)->tqh_last = (elm); \ - (head)->tqh_last = &(elm)->field.tqe_next; \ -} while (/*CONSTCOND*/0) - -#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ - QUEUEDEBUG_TAILQ_OP((listelm), field) \ - if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != \ - TAILQ_END(head)) \ - (elm)->field.tqe_next->field.tqe_prev = \ - &(elm)->field.tqe_next; \ - else \ - (head)->tqh_last = &(elm)->field.tqe_next; \ - (listelm)->field.tqe_next = (elm); \ - (elm)->field.tqe_prev = &(listelm)->field.tqe_next; \ -} while (/*CONSTCOND*/0) - -#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ - QUEUEDEBUG_TAILQ_OP((listelm), field) \ - (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ - (elm)->field.tqe_next = (listelm); \ - *(listelm)->field.tqe_prev = (elm); \ - (listelm)->field.tqe_prev = &(elm)->field.tqe_next; \ -} while (/*CONSTCOND*/0) - -#define TAILQ_REMOVE(head, elm, field) do { \ - QUEUEDEBUG_TAILQ_PREREMOVE((head), (elm), field) \ - QUEUEDEBUG_TAILQ_OP((elm), field) \ - if (((elm)->field.tqe_next) != TAILQ_END(head)) \ - (elm)->field.tqe_next->field.tqe_prev = \ - (elm)->field.tqe_prev; \ - else \ - (head)->tqh_last = (elm)->field.tqe_prev; \ - *(elm)->field.tqe_prev = (elm)->field.tqe_next; \ - QUEUEDEBUG_TAILQ_POSTREMOVE((elm), field); \ -} while (/*CONSTCOND*/0) - -#define TAILQ_REPLACE(head, elm, elm2, field) do { \ - if (((elm2)->field.tqe_next = (elm)->field.tqe_next) != \ - TAILQ_END(head)) \ - (elm2)->field.tqe_next->field.tqe_prev = \ - &(elm2)->field.tqe_next; \ - else \ - (head)->tqh_last = &(elm2)->field.tqe_next; \ - (elm2)->field.tqe_prev = (elm)->field.tqe_prev; \ - *(elm2)->field.tqe_prev = (elm2); \ - QUEUEDEBUG_TAILQ_POSTREMOVE((elm), field); \ -} while (/*CONSTCOND*/0) - -#define TAILQ_CONCAT(head1, head2, field) do { \ - if (!TAILQ_EMPTY(head2)) { \ - *(head1)->tqh_last = (head2)->tqh_first; \ - (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \ - (head1)->tqh_last = (head2)->tqh_last; \ - TAILQ_INIT((head2)); \ - } \ -} while (/*CONSTCOND*/0) - -/* - * Singly-linked Tail queue declarations. - */ -#define STAILQ_HEAD(name, type) \ -struct name { \ - struct type *stqh_first; /* first element */ \ - struct type **stqh_last; /* addr of last next element */ \ -} - -#define STAILQ_HEAD_INITIALIZER(head) \ - { NULL, &(head).stqh_first } - -#define STAILQ_ENTRY(type) \ -struct { \ - struct type *stqe_next; /* next element */ \ -} - -/* - * Singly-linked Tail queue access methods. - */ -#define STAILQ_FIRST(head) ((head)->stqh_first) -#define STAILQ_END(head) NULL -#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next) -#define STAILQ_EMPTY(head) (STAILQ_FIRST(head) == STAILQ_END(head)) - -/* - * Singly-linked Tail queue functions. - */ -#define STAILQ_INIT(head) do { \ - (head)->stqh_first = NULL; \ - (head)->stqh_last = &(head)->stqh_first; \ -} while (/*CONSTCOND*/0) - -#define STAILQ_INSERT_HEAD(head, elm, field) do { \ - if (((elm)->field.stqe_next = (head)->stqh_first) == NULL) \ - (head)->stqh_last = &(elm)->field.stqe_next; \ - (head)->stqh_first = (elm); \ -} while (/*CONSTCOND*/0) - -#define STAILQ_INSERT_TAIL(head, elm, field) do { \ - (elm)->field.stqe_next = NULL; \ - *(head)->stqh_last = (elm); \ - (head)->stqh_last = &(elm)->field.stqe_next; \ -} while (/*CONSTCOND*/0) - -#define STAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ - if (((elm)->field.stqe_next = (listelm)->field.stqe_next) == NULL)\ - (head)->stqh_last = &(elm)->field.stqe_next; \ - (listelm)->field.stqe_next = (elm); \ -} while (/*CONSTCOND*/0) - -#define STAILQ_REMOVE_HEAD(head, field) do { \ - if (((head)->stqh_first = (head)->stqh_first->field.stqe_next) == NULL) \ - (head)->stqh_last = &(head)->stqh_first; \ -} while (/*CONSTCOND*/0) - -#define STAILQ_REMOVE(head, elm, type, field) do { \ - if ((head)->stqh_first == (elm)) { \ - STAILQ_REMOVE_HEAD((head), field); \ - } else { \ - struct type *curelm = (head)->stqh_first; \ - while (curelm->field.stqe_next != (elm)) \ - curelm = curelm->field.stqe_next; \ - if ((curelm->field.stqe_next = \ - curelm->field.stqe_next->field.stqe_next) == NULL) \ - (head)->stqh_last = &(curelm)->field.stqe_next; \ - } \ -} while (/*CONSTCOND*/0) - -#define STAILQ_FOREACH(var, head, field) \ - for ((var) = ((head)->stqh_first); \ - (var); \ - (var) = ((var)->field.stqe_next)) - -#define STAILQ_FOREACH_SAFE(var, head, field, tvar) \ - for ((var) = STAILQ_FIRST((head)); \ - (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \ - (var) = (tvar)) - -#define STAILQ_CONCAT(head1, head2) do { \ - if (!STAILQ_EMPTY((head2))) { \ - *(head1)->stqh_last = (head2)->stqh_first; \ - (head1)->stqh_last = (head2)->stqh_last; \ - STAILQ_INIT((head2)); \ - } \ -} while (/*CONSTCOND*/0) - -#define STAILQ_LAST(head, type, field) \ - (STAILQ_EMPTY((head)) ? \ - NULL : \ - ((struct type *)(void *) \ - ((char *)((head)->stqh_last) - offsetof(struct type, field)))) - -#endif /* !_UHYVE_QUEUE_H_ */ diff --git a/tools/uhyve-aarch64.c b/tools/uhyve-aarch64.c deleted file mode 100644 index 7bc48b44e..000000000 --- a/tools/uhyve-aarch64.c +++ /dev/null @@ -1,503 +0,0 @@ -/* - * Copyright (c) 2018, Stefan Lankes, RWTH Aachen University - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifdef __aarch64__ - -#define _GNU_SOURCE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "uhyve.h" -#include "proxy.h" - -#define GUEST_OFFSET 0x0 - -#define GIC_SPI_IRQ_BASE 32 -#define GICD_BASE (1ULL << 39) -#define GICC_BASE (GICD_BASE + GICD_SIZE) -#define GIC_SIZE (GICD_SIZE + GICC_SIZE) -#define GICD_SIZE 0x10000ULL -#define GICC_SIZE 0x20000ULL - -#define KVM_GAP_SIZE (GIC_SIZE) -#define KVM_GAP_START GICD_BASE - -#define PAGE_SIZE 0x1000 - -#ifndef offsetof -#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) -#endif - -#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 |\ - KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) -#define ARM_CPU_ID 3, 0, 0, 0 -#define ARM_CPU_ID_MPIDR 5 - -static bool cap_irqfd = false; -static bool cap_read_only = false; -static int gic_fd = -1; - -extern size_t guest_size; -extern uint64_t elf_entry; -extern uint8_t* klog; -extern bool verbose; -extern uint32_t ncores; -extern uint8_t* guest_mem; -extern size_t guest_size; -extern int kvm, vmfd, netfd, efd; -extern uint8_t* mboot; -extern __thread struct kvm_run *run; -extern __thread int vcpufd; -extern __thread uint32_t cpuid; - -void print_registers(void) -{ - struct kvm_one_reg reg; - uint64_t data; - - fprintf(stderr, "\n Dump state of CPU %d\n\n", cpuid); - fprintf(stderr, " Registers\n"); - fprintf(stderr, " =========\n"); - - reg.addr = (uint64_t)&data; - reg.id = ARM64_CORE_REG(regs.pc); - kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®); - fprintf(stderr, " PC: 0x%016lx\n", data); - - reg.id = ARM64_CORE_REG(regs.pstate); - kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®); - fprintf(stderr, " PSTATE: 0x%016lx\n", data); - - reg.id = ARM64_CORE_REG(sp_el1); - kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®); - fprintf(stderr, " SP_EL1: 0x%016lx\n", data); - - reg.id = ARM64_CORE_REG(regs.regs[30]); - kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®); - fprintf(stderr, " LR: 0x%016lx\n", data); - - reg.id = ARM64_SYS_REG(ARM_CPU_ID, ARM_CPU_ID_MPIDR); - kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®); - fprintf(stderr, " MPIDR: 0x%016lx\n", data); - - for(int i=0; i<=29; i+=2) - { - reg.id = ARM64_CORE_REG(regs.regs[i]); - kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®); - fprintf(stderr, " X%d:\t 0x%016lx\t", i, data); - - reg.id = ARM64_CORE_REG(regs.regs[i+1]); - kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®); - fprintf(stderr, " X%d:\t0x%016lx\n", i+1, data); - } -} - - -vcpu_state_t read_cpu_state() -{ - err(1, "Migration is currently not supported!"); -} - -void* migration_handler(void* arg) -{ - err(1, "Migration is currently not supported!"); -} - -void timer_handler(int signum) -{ - err(1, "Checkpointing is currently not supported!"); -} - -void restore_cpu_state(vcpu_state_t state) -{ - err(1, "Checkpointing is currently not supported!"); -} - -vcpu_state_t save_cpu_state(void) -{ - err(1, "Checkpointing is currently not supported!"); -} - - -void write_cpu_state(void) -{ - err(1, "Checkpointing is currently not supported!"); -} - -int load_checkpoint(uint8_t* mem, char* path) -{ - err(1, "Checkpointing is currently not supported!"); -} - -int load_migration_data(uint8_t* mem) -{ - err(1, "Checkpointing is currently not supported!"); -} - -void wait_for_incomming_migration(migration_metadata_t *metadata, uint16_t listen_portno) -{ - err(1, "Checkpointing is currently not supported!"); -} - -void init_cpu_state(uint64_t elf_entry) -{ - struct kvm_vcpu_init vcpu_init = { - .features = 0, - }; - struct kvm_vcpu_init preferred_init; - - if (!ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, &preferred_init)) { - if ((preferred_init.target == KVM_ARM_TARGET_CORTEX_A57) || - (preferred_init.target == KVM_ARM_TARGET_CORTEX_A53)) { - vcpu_init.target = preferred_init.target; - } else { - vcpu_init.target = KVM_ARM_TARGET_GENERIC_V8; - } - } else { - vcpu_init.target = KVM_ARM_TARGET_GENERIC_V8; - } - - kvm_ioctl(vcpufd, KVM_ARM_VCPU_INIT, &vcpu_init); - - // be sure that the multiprocessor is runable - struct kvm_mp_state mp_state = { KVM_MP_STATE_RUNNABLE }; - kvm_ioctl(vcpufd, KVM_SET_MP_STATE, &mp_state); - - struct kvm_one_reg reg; - uint64_t data; - - /* pstate = all interrupts masked */ - data = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1h; - reg.id = ARM64_CORE_REG(regs.pstate); - reg.addr = (uint64_t)&data; - kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®); - -#if 0 - /* x0...x3 = 0 */ - data = 0; - reg.id = ARM64_CORE_REG(regs.regs[0]); - kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®); - - reg.id = ARM64_CORE_REG(regs.regs[1]); - kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®); - - reg.id = ARM64_CORE_REG(regs.regs[2]); - kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®); - - reg.id = ARM64_CORE_REG(regs.regs[3]); - kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®); -#endif - - /* set start address */ - data = elf_entry; - reg.id = ARM64_CORE_REG(regs.pc); - kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®); - - if (gic_fd > 0) { - int lines = 1; - uint32_t nr_irqs = lines * 32 + GIC_SPI_IRQ_BASE; - struct kvm_device_attr nr_irqs_attr = { - .group = KVM_DEV_ARM_VGIC_GRP_NR_IRQS, - .addr = (uint64_t)&nr_irqs, - }; - struct kvm_device_attr vgic_init_attr = { - .group = KVM_DEV_ARM_VGIC_GRP_CTRL, - .attr = KVM_DEV_ARM_VGIC_CTRL_INIT, - }; - - kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &nr_irqs_attr); - kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &vgic_init_attr); - } - - // only one core is able to enter startup code - // => the wait for the predecessor core - while (*((volatile uint32_t*) (mboot + 0x120)) < cpuid) - pthread_yield(); - *((volatile uint32_t*) (mboot + 0x130)) = cpuid; -} - -void init_kvm_arch(void) -{ - guest_mem = mmap(NULL, guest_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (guest_mem == MAP_FAILED) - err(1, "mmap failed"); - - const char* merge = getenv("HERMIT_MERGEABLE"); - if (merge && (strcmp(merge, "0") != 0)) { - /* - * The KSM feature is intended for applications that generate - * many instances of the same data (e.g., virtualization systems - * such as KVM). It can consume a lot of processing power! - */ - madvise(guest_mem, guest_size, MADV_MERGEABLE); - if (verbose) - fprintf(stderr, "VM uses KSN feature \"mergeable\" to reduce the memory footprint.\n"); - } - - const char* hugepage = getenv("HERMIT_HUGEPAGE"); - if (merge && (strcmp(merge, "0") != 0)) { - madvise(guest_mem, guest_size, MADV_HUGEPAGE); - if (verbose) - fprintf(stderr, "VM uses huge pages to improve the performance.\n"); - } - - cap_read_only = kvm_ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_READONLY_MEM) <= 0 ? false : true; - if (!cap_read_only) - err(1, "the support of KVM_CAP_READONLY_MEM is curently required"); - - struct kvm_userspace_memory_region kvm_region = { - .slot = 0, - .guest_phys_addr = 0, - .memory_size = PAGE_SIZE, - .userspace_addr = (uint64_t) guest_mem, - .flags = KVM_MEM_READONLY, - }; - kvm_ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region); - - kvm_region = (struct kvm_userspace_memory_region) { - .slot = 1, - .guest_phys_addr = PAGE_SIZE, - .memory_size = guest_size - PAGE_SIZE, - .userspace_addr = (uint64_t) guest_mem + PAGE_SIZE, - #ifdef USE_DIRTY_LOG - .flags = KVM_MEM_LOG_DIRTY_PAGES, - #else - .flags = 0, - #endif - }; - kvm_ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region); - -#if 0 - /* Create interrupt controller GICv2 */ - uint64_t cpu_if_addr = GICC_BASE; - uint64_t dist_addr = GICD_BASE; - struct kvm_device_attr cpu_if_attr = { - .group = KVM_DEV_ARM_VGIC_GRP_ADDR, - .attr = KVM_VGIC_V2_ADDR_TYPE_CPU, - .addr = (uint64_t)&cpu_if_addr, - }; - struct kvm_create_device gic_device = { - .flags = 0, - .type = KVM_DEV_TYPE_ARM_VGIC_V2, - }; - struct kvm_device_attr dist_attr = { - .group = KVM_DEV_ARM_VGIC_GRP_ADDR, - .attr = KVM_VGIC_V2_ADDR_TYPE_DIST, - .addr = (uint64_t)&dist_addr, - }; - kvm_ioctl(vmfd, KVM_CREATE_DEVICE, &gic_device); - - gic_fd = gic_device.fd; - kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &cpu_if_attr); - kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &dist_attr); -#else - /* Create interrupt controller GICv2 */ - struct kvm_arm_device_addr gic_addr[] = { - [0] = { - .id = KVM_VGIC_V2_ADDR_TYPE_DIST | - (KVM_ARM_DEVICE_VGIC_V2 << KVM_ARM_DEVICE_ID_SHIFT), - .addr = GICD_BASE, - }, - [1] = { - .id = KVM_VGIC_V2_ADDR_TYPE_CPU | - (KVM_ARM_DEVICE_VGIC_V2 << KVM_ARM_DEVICE_ID_SHIFT), - .addr = GICC_BASE, - } - }; - - kvm_ioctl(vmfd, KVM_CREATE_IRQCHIP, NULL); - kvm_ioctl(vmfd, KVM_ARM_SET_DEVICE_ADDR, &gic_addr[0]); - kvm_ioctl(vmfd, KVM_ARM_SET_DEVICE_ADDR, &gic_addr[1]); -#endif - - //fprintf(stderr, "Create gicd at 0x%llx\n", GICD_BASE); - //fprintf(stderr, "Create gicc at 0x%llx\n", GICC_BASE); - - cap_irqfd = ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_IRQFD) <= 0 ? false : true; - if (!cap_irqfd) - err(1, "the support of KVM_CAP_IRQFD is curently required"); -} - -int load_kernel(uint8_t* mem, char* path) -{ - Elf64_Ehdr hdr; - Elf64_Phdr *phdr = NULL; - size_t buflen; - size_t pstart = 0; - int fd, ret; - - fd = open(path, O_RDONLY); - if (fd == -1) - { - perror("Unable to open file"); - return -1; - } - - ret = pread_in_full(fd, &hdr, sizeof(hdr), 0); - if (ret < 0) - goto out; - - // check if the program is a HermitCore file - if (hdr.e_ident[EI_MAG0] != ELFMAG0 - || hdr.e_ident[EI_MAG1] != ELFMAG1 - || hdr.e_ident[EI_MAG2] != ELFMAG2 - || hdr.e_ident[EI_MAG3] != ELFMAG3 - || hdr.e_ident[EI_CLASS] != ELFCLASS64 - || hdr.e_ident[EI_OSABI] != HERMIT_ELFOSABI - || hdr.e_type != ET_EXEC || hdr.e_machine != EM_AARCH64) { - fprintf(stderr, "Invalid HermitCore file!\n"); - ret = -1; - goto out; - } - - elf_entry = hdr.e_entry; - - buflen = hdr.e_phentsize * hdr.e_phnum; - phdr = malloc(buflen); - if (!phdr) { - fprintf(stderr, "Not enough memory\n"); - ret = -1; - goto out; - } - - ret = pread_in_full(fd, phdr, buflen, hdr.e_phoff); - if (ret < 0) - goto out; - - /* - * Load all segments with type "LOAD" from the file at offset - * p_offset, and copy that into in memory. - */ - for (Elf64_Half ph_i = 0; ph_i < hdr.e_phnum; ph_i++) - { - uint64_t paddr = phdr[ph_i].p_paddr; - size_t offset = phdr[ph_i].p_offset; - size_t filesz = phdr[ph_i].p_filesz; - size_t memsz = phdr[ph_i].p_memsz; - - if (phdr[ph_i].p_type != PT_LOAD) - continue; - - //fprintf(stderr, "Kernel location 0x%zx, file size 0x%zx, memory size 0x%zx\n", paddr, filesz, memsz); - - ret = pread_in_full(fd, mem+paddr-GUEST_OFFSET, filesz, offset); - if (ret < 0) - goto out; - if (!klog) - klog = mem+paddr+0x1000-GUEST_OFFSET; - if (!mboot) - mboot = mem+paddr-GUEST_OFFSET; - //fprintf(stderr, "mboot at %p, klog at %p\n", mboot, klog); - - if (!pstart) { - pstart = paddr; - - // initialize kernel - *((uint64_t*) (mem+paddr-GUEST_OFFSET + 0x100)) = paddr; // physical start address - *((uint64_t*) (mem+paddr-GUEST_OFFSET + 0x108)) = guest_size - PAGE_SIZE; // physical limit - *((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x110)) = get_cpufreq(); - *((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x128)) = ncores; // number of used cpus - *((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x130)) = 0; // cpuid - *((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x148)) = 1; // announce uhyve - - - char* str = getenv("HERMIT_IP"); - if (str) { - uint32_t ip[4]; - - sscanf(str, "%u.%u.%u.%u", ip+0, ip+1, ip+2, ip+3); - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB0)) = (uint8_t) ip[0]; - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB1)) = (uint8_t) ip[1]; - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB2)) = (uint8_t) ip[2]; - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB3)) = (uint8_t) ip[3]; - } - - str = getenv("HERMIT_GATEWAY"); - if (str) { - uint32_t ip[4]; - - sscanf(str, "%u.%u.%u.%u", ip+0, ip+1, ip+2, ip+3); - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB4)) = (uint8_t) ip[0]; - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB5)) = (uint8_t) ip[1]; - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB6)) = (uint8_t) ip[2]; - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB7)) = (uint8_t) ip[3]; - } - str = getenv("HERMIT_MASK"); - if (str) { - uint32_t ip[4]; - - sscanf(str, "%u.%u.%u.%u", ip+0, ip+1, ip+2, ip+3); - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB8)) = (uint8_t) ip[0]; - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB9)) = (uint8_t) ip[1]; - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xBA)) = (uint8_t) ip[2]; - *((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xBB)) = (uint8_t) ip[3]; - } - - *((uint64_t*) (mem+paddr-GUEST_OFFSET + 0xbc)) = (uint64_t) guest_mem; - if (verbose) - *((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x174)) = (uint32_t) UHYVE_UART_PORT; - } - *((uint64_t*) (mem+pstart-GUEST_OFFSET + 0x158)) = paddr + memsz - pstart; // total kernel size - } - - ret = 0; - -out: - if (phdr) - free(phdr); - - close(fd); - - return ret; -} -#endif diff --git a/tools/uhyve-gdb-aarch64.c b/tools/uhyve-gdb-aarch64.c deleted file mode 100644 index 236582adf..000000000 --- a/tools/uhyve-gdb-aarch64.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * This file was adapted from the solo5/ukvm code base, initial copyright block - * follows: - */ - -/* - * Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file - * - * This file is part of ukvm, a unikernel monitor. - * - * Permission to use, copy, modify, and/or distribute this software - * for any purpose with or without fee is hereby granted, provided - * that the above copyright notice and this permission notice appear - * in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL - * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE - * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS - * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -/* - * Based on binutils-gdb/gdb/stubs/i386-stub.c, which is: - * Not copyrighted. - */ - -#ifdef __aarch64__ -#define _GNU_SOURCE - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "uhyve.h" -#include "uhyve-gdb.h" -#include "queue.h" - -void uhyve_gdb_handle_exception(int vcpufd, int sigval) -{ - -} - -void uhyve_gdb_handle_term(void) -{ - -} - -int uhyve_gdb_init(int vcpufd) -{ - return -1; -} - -#endif diff --git a/tools/uhyve-gdb-aarch64.h b/tools/uhyve-gdb-aarch64.h deleted file mode 100644 index 5384bfbec..000000000 --- a/tools/uhyve-gdb-aarch64.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * This file was adapted from the solo5/ukvm code base, initial copyright block - * follows: - */ - -/* - * Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file - * - * This file is part of ukvm, a unikernel monitor. - * - * Permission to use, copy, modify, and/or distribute this software - * for any purpose with or without fee is hereby granted, provided - * that the above copyright notice and this permission notice appear - * in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL - * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE - * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS - * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef UHYVE_GDB_AARCH64_H -#define UHYVE_GDB_AARCH64_H - -#include -#include - - struct uhyve_gdb_regs { - uint64_t regs[31]; - uint64_t lr; - uint64_t pc; - uint64_t pstate; - uint64_t sp; - }; - -#endif /* UHYVE_GDB_AARCH64_H */ diff --git a/tools/uhyve-gdb-x86_64.c b/tools/uhyve-gdb-x86_64.c deleted file mode 100644 index 83544243c..000000000 --- a/tools/uhyve-gdb-x86_64.c +++ /dev/null @@ -1,993 +0,0 @@ -/* - * This file was adapted from the solo5/ukvm code base, initial copyright block - * follows: - */ - -/* - * Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file - * - * This file is part of ukvm, a unikernel monitor. - * - * Permission to use, copy, modify, and/or distribute this software - * for any purpose with or without fee is hereby granted, provided - * that the above copyright notice and this permission notice appear - * in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL - * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE - * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS - * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -/* - * Based on binutils-gdb/gdb/stubs/i386-stub.c, which is: - * Not copyrighted. - */ - -#ifdef __x86_64__ -#define _GNU_SOURCE - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "uhyve.h" -#include "uhyve-gdb.h" -#include "queue.h" - -struct breakpoint_t { - gdb_breakpoint_type type; - uint64_t addr; - size_t len; - uint32_t refcount; - uint8_t saved_insn; /* for software breakpoints */ - - SLIST_ENTRY(breakpoint_t) entries; -}; - -SLIST_HEAD(breakpoints_head, breakpoint_t); -static struct breakpoints_head sw_breakpoints; -static struct breakpoints_head hw_breakpoints; - -/* The Intel SDM specifies that the DR7 has space for 4 breakpoints. */ -#define MAX_HW_BREAKPOINTS 4 -static uint32_t nr_hw_breakpoints = 0; - -/* Stepping is disabled by default. */ -static bool stepping = false; -/* This is the trap instruction used for software breakpoints. */ -static const uint8_t int3 = 0xcc; - -static int socket_fd = 0; -static int portno = 1234; /* Default port number */ -static const char hexchars[] = "0123456789abcdef"; - -#define BUFMAX 4096 -static char in_buffer[BUFMAX]; -static unsigned char registers[BUFMAX]; - -/* uhyve variables */ -extern size_t guest_size; -extern uint8_t *guest_mem; - -void *uhyve_checked_gpa_p(uint64_t gpa, size_t sz, uint8_t * chk_guest_mem, - size_t chk_guest_size, const char *file, int line); - -/* The actual error code is ignored by GDB, so any number will do. */ -#define GDB_ERROR_MSG "E01" - -static int hex(unsigned char ch) -{ - if ((ch >= 'a') && (ch <= 'f')) - return (ch - 'a' + 10); - if ((ch >= '0') && (ch <= '9')) - return (ch - '0'); - if ((ch >= 'A') && (ch <= 'F')) - return (ch - 'A' + 10); - - return -1; -} - -/* - * Converts the (count) bytes of memory pointed to by mem into an hex string in - * buf. Returns a pointer to the last char put in buf (null). - */ -static char *mem2hex(const unsigned char *mem, char *buf, size_t count) -{ - size_t i; - unsigned char ch; - - for (i = 0; i < count; i++) { - ch = *mem++; - *buf++ = hexchars[ch >> 4]; - *buf++ = hexchars[ch % 16]; - } - *buf = 0; - return buf; -} - -/* - * Converts the hex string in buf into binary in mem. - * Returns a pointer to the character AFTER the last byte written. - */ -static unsigned char *hex2mem(const char *buf, unsigned char *mem, size_t count) -{ - size_t i; - unsigned char ch; - - assert(strlen(buf) >= (2 * count)); - - for (i = 0; i < count; i++) { - ch = hex(*buf++) << 4; - ch = ch + hex(*buf++); - *mem++ = ch; - } - return mem; -} - -static int wait_for_connect(void) -{ - int listen_socket_fd; - struct sockaddr_in server_addr, client_addr; - struct protoent *protoent; - struct in_addr ip_addr; - socklen_t len; - int opt; - - listen_socket_fd = socket(AF_INET, SOCK_STREAM, 0); - if (listen_socket_fd == -1) { - err(1, "Could not create socket"); - return -1; - } - - opt = 1; - if (setsockopt(listen_socket_fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) == -1) - err(1, "setsockopt(SO_REUSEADDR) failed"); - - server_addr.sin_family = AF_INET; - server_addr.sin_addr.s_addr = htonl(INADDR_ANY); - server_addr.sin_port = htons(portno); - - if (bind(listen_socket_fd, (struct sockaddr *)&server_addr, - sizeof(server_addr)) == -1) { - err(1, "bind failed"); - return -1; - } - - if (listen(listen_socket_fd, 0) == -1) { - err(1, "listen failed"); - return -1; - } - - warnx("Waiting for a debugger. Connect to it like this:"); - warnx("\tgdb --ex=\"target remote localhost:%d\" UNIKERNEL", portno); - - len = sizeof(client_addr); - socket_fd = - accept(listen_socket_fd, (struct sockaddr *)&client_addr, &len); - if (socket_fd == -1) { - err(1, "accept failed"); - return -1; - } - - close(listen_socket_fd); - - protoent = getprotobyname("tcp"); - if (!protoent) { - err(1, "getprotobyname (\"tcp\") failed"); - return -1; - } - - opt = 1; - if (setsockopt(socket_fd, protoent->p_proto, TCP_NODELAY, &opt, - sizeof(opt)) == -1) - err(1, "setsockopt(TCP_NODELAY) failed"); - - ip_addr.s_addr = client_addr.sin_addr.s_addr; - warnx("Connection from debugger at %s", inet_ntoa(ip_addr)); - - return 0; -} - -static inline int send_char(char ch) -{ - /* TCP is already buffering, so no need to buffer here as well. */ - return send(socket_fd, &ch, 1, 0); -} - -static char recv_char(void) -{ - unsigned char ch; - int ret; - - ret = recv(socket_fd, &ch, 1, 0); - if (ret < 0) { - return -1; - } else if (ret == 0) { - /* The peer has performed an orderly shutdown (from "man recv"). */ - warnx("GDB: Connection closed from client"); - close(socket_fd); - socket_fd = -1; - return -1; - } else { - assert(ret == 1); - } - - /* All GDB remote packets are encoded in ASCII. */ - assert(isascii(ch)); - - return (char)ch; -} - -/* - * Scan for the sequence $# - * Returns a null terminated string. - */ -static char *recv_packet(void) -{ - char *buffer = &in_buffer[0]; - unsigned char checksum; - unsigned char xmitcsum; - char ch; - int count; - - while (1) { - /* wait around for the start character, ignore all other characters */ - do { - ch = recv_char(); - if (ch == -1) - return NULL; - } - while (ch != '$'); - - retry: - checksum = 0; - xmitcsum = -1; - count = 0; - - /* now, read until a # or end of buffer is found */ - while (count < BUFMAX - 1) { - ch = recv_char(); - if (ch == -1) - return NULL; - if (ch == '$') - goto retry; - if (ch == '#') - break; - checksum = checksum + ch; - buffer[count] = ch; - count = count + 1; - } - /* Let's make this a C string. */ - buffer[count] = '\0'; - - if (ch == '#') { - ch = recv_char(); - if (ch == -1) - return NULL; - xmitcsum = hex(ch) << 4; - ch = recv_char(); - if (ch == -1) - return NULL; - xmitcsum += hex(ch); - - if (checksum != xmitcsum) { - warnx("Failed checksum from GDB. " - "My count = 0x%x, sent=0x%x. buf=%s", - checksum, xmitcsum, buffer); - if (send_char('-') == -1) - /* Unsuccessful reply to a failed checksum */ - err(1, - "GDB: Could not send an ACK to the debugger."); - } else { - if (send_char('+') == -1) - /* Unsuccessful reply to a successful transfer */ - err(1, - "GDB: Could not send an ACK to the debugger."); - - /* if a sequence char is present, reply the sequence ID */ - if (buffer[2] == ':') { - send_char(buffer[0]); - send_char(buffer[1]); - - return &buffer[3]; - } - - return &buffer[0]; - } - } - } -} - -/* - * Send packet of the form $# without waiting for an ACK - * from the debugger. Only send_response - */ -static void send_packet_no_ack(char *buffer) -{ - unsigned char checksum; - int count; - char ch; - - /* - * We ignore all send_char errors as we either: (1) care about sending our - * packet and we will keep sending it until we get a good ACK from the - * debugger, or (2) not care and just send it as a best-effort notification - * when dying. - */ - - send_char('$'); - checksum = 0; - count = 0; - - ch = buffer[count]; - while (ch) { - send_char(ch); - checksum += ch; - count += 1; - ch = buffer[count]; - } - - send_char('#'); - send_char(hexchars[checksum >> 4]); - send_char(hexchars[checksum % 16]); -} - -/* - * Send a packet and wait for a successful ACK of '+' from the debugger. - * An ACK of '-' means that we have to resend. - */ -static void send_packet(char *buffer) -{ - char ch; - - for (;;) { - send_packet_no_ack(buffer); - ch = recv_char(); - if (ch == -1) - return; - if (ch == '+') - break; - } -} - -#define send_error_msg() do { send_packet(GDB_ERROR_MSG); } while (0) - -#define send_not_supported_msg() do { send_packet(""); } while (0) - -#define send_okay_msg() do { send_packet("OK"); } while (0) - -/* - * This is a response to 'c' and 's'. In other words, the VM was - * running and it stopped for some reason. This message is to tell the - * debugger that whe stopped (and why). The argument code can take these - * and some other values: - * - 'S AA' received signal AA - * - 'W AA' exited with return code AA - * - 'X AA' exited with signal AA - * https://sourceware.org/gdb/onlinedocs/gdb/Stop-Reply-Packets.html - */ -static void send_response(char code, int sigval, bool wait_for_ack) -{ - char obuf[BUFMAX]; - snprintf(obuf, sizeof(obuf), "%c%02x", code, sigval); - if (wait_for_ack) - send_packet(obuf); - else - send_packet_no_ack(obuf); -} - -static void gdb_handle_exception(int vcpufd, int sigval) -{ - char *packet; - char obuf[BUFMAX]; - - /* Notify the debugger of our last signal */ - send_response('S', sigval, true); - - for (;;) { - uint64_t addr = 0, result; - gdb_breakpoint_type type; - size_t len; - int command, ret; - - packet = recv_packet(); - if (packet == NULL) - /* Without a packet with instructions with what to do next there is - * really nothing we can do to recover. So, dying. */ - errx(1, - "GDB: Exiting as we could not receive the next command from " - "the debugger."); - - /* - * From the GDB manual: - * "At a minimum, a stub is required to support the ‘g’ and ‘G’ - * commands for register access, and the ‘m’ and ‘M’ commands - * for memory access. Stubs that only control single-threaded - * targets can implement run control with the ‘c’ (continue), - * and ‘s’ (step) commands." - */ - command = packet[0]; - switch (command) { - case 's': - { - /* Step */ - if (sscanf(packet, "s%" PRIx64, &addr) == 1) { - /* not supported, but that's OK as GDB will retry with the - * slower version of this: update all registers. */ - send_not_supported_msg(); - break; /* Wait for another command. */ - } - if (uhyve_gdb_enable_ss(vcpufd) == -1) { - send_error_msg(); - break; /* Wait for another command. */ - } - return; /* Continue with program */ - } - - case 'c': - { - /* Continue (and disable stepping for the next instruction) */ - if (sscanf(packet, "c%" PRIx64, &addr) == 1) { - /* not supported, but that's OK as GDB will retry with the - * slower version of this: update all registers. */ - send_not_supported_msg(); - break; /* Wait for another command. */ - } - if (uhyve_gdb_disable_ss(vcpufd) == -1) { - send_error_msg(); - break; /* Wait for another command. */ - } - return; /* Continue with program */ - } - - case 'm': - { - /* Read memory content */ - if (sscanf(packet, "m%" PRIx64 ",%zx", &addr, &len) != 2) { - send_error_msg(); - break; - } - /* translate addr into guest phys first. it is - * needed if the address falls into the non directly mapped - * part of the virtual address space (ex: heap/stack) */ - uint64_t phys_addr; - - if (uhyve_gdb_guest_virt_to_phys(vcpufd, addr, &phys_addr)) { - send_error_msg(); - } else { - mem2hex(guest_mem + phys_addr, obuf, len); - send_packet(obuf); - } - break; /* Wait for another command. */ - } - - case 'M': - { - /* Write memory content */ - uint64_t phys_addr; - - assert(strlen(packet) <= sizeof(obuf)); - if (sscanf(packet, "M%" PRIx64 ",%zx:%s", &addr, &len, obuf) != 3) { - send_error_msg(); - break; - } - - /* translate to guest physical address first */ - if (uhyve_gdb_guest_virt_to_phys(vcpufd, addr, &phys_addr)) { - send_error_msg(); - } else { - hex2mem(obuf, guest_mem + phys_addr, - len); - send_okay_msg(); - } - break; /* Wait for another command. */ - } - - case 'g': - { - /* Read general registers */ - len = BUFMAX; - if (uhyve_gdb_read_registers(vcpufd, registers, &len) == -1) { - send_error_msg(); - } else { - mem2hex(registers, obuf, len); - send_packet(obuf); - } - break; /* Wait for another command. */ - } - - case 'G': - { - /* Write general registers */ - len = BUFMAX; - /* Call read_registers just to get len (not very efficient). */ - if (uhyve_gdb_read_registers(vcpufd, registers, &len) == -1) { - send_error_msg(); - break; - } - /* Packet looks like 'Gxxxxx', so we have to skip the first char */ - hex2mem(packet + 1, registers, len); - if (uhyve_gdb_write_registers(vcpufd, registers, len) == -1) { - send_error_msg(); - break; - } - send_okay_msg(); - break; /* Wait for another command. */ - } - - case '?': - { - /* Return last signal */ - send_response('S', sigval, true); - break; /* Wait for another command. */ - } - - case 'Z': - /* Insert a breakpoint */ - case 'z': - { - /* Remove a breakpoint */ - packet++; - if (sscanf(packet, "%" PRIx32 ",%" PRIx64 ",%zx", - &type, &addr, &len) != 3) { - send_error_msg(); - break; - } - uint64_t phys_addr; - if (uhyve_gdb_guest_virt_to_phys(vcpufd, addr, &phys_addr)) { - send_error_msg(); - } else { - if (command == 'Z') - ret = uhyve_gdb_add_breakpoint(vcpufd, type, phys_addr, len); - else - ret = uhyve_gdb_remove_breakpoint(vcpufd, type, phys_addr, len); - - if (ret == -1) - send_error_msg(); - else - send_okay_msg(); - } - break; - } - - case 'k': - { - warnx("Debugger asked us to quit"); - send_okay_msg(); - break; - } - - case 'D': - { - warnx("Debugger detached"); - send_okay_msg(); - return; - } - - default: - send_not_supported_msg(); - break; - } - } -} - -void uhyve_gdb_handle_exception(int vcpufd, int sigval) -{ - gdb_handle_exception(vcpufd, sigval); -} - -static void gdb_stub_start(int vcpufd) -{ - wait_for_connect(); - gdb_handle_exception(vcpufd, GDB_SIGNAL_FIRST); -} - -int uhyve_gdb_init(int vcpufd) -{ - /* - * GDB clients can change memory, and software breakpoints work by - * replacing instructions with int3's. - */ - if (mprotect(guest_mem, guest_size, PROT_READ | PROT_WRITE | PROT_EXEC) == -1) - err(1, "GDB: Cannot remove guest memory protection"); - - /* Notify the debugger that we are dying. */ - atexit(uhyve_gdb_handle_term); - - gdb_stub_start(vcpufd); - - return 0; -} - -void uhyve_gdb_handle_term(void) -{ - /* TODO: this is graceful shutdown forcing the return value to zero, - * any way to pass an error code when things go wrong ? */ - send_response('W', 0, true); -} - -static int kvm_arch_insert_sw_breakpoint(struct breakpoint_t *bp) -{ - uint8_t *insn = bp->addr + guest_mem; - bp->saved_insn = *insn; - /* - * We just modify the first byte even if the instruction is multi-byte. - * The debugger keeps track of the length of the instruction. The - * consequence of this is that we don't have to set all other bytes as - * NOP's. - */ - *insn = int3; - return 0; -} - -static int kvm_arch_remove_sw_breakpoint(struct breakpoint_t *bp) -{ - uint8_t *insn = bp->addr + guest_mem; - assert(*insn == int3); - *insn = bp->saved_insn; - return 0; -} - -static int uhyve_gdb_update_guest_debug(int vcpufd) -{ - struct kvm_guest_debug dbg = { 0 }; - struct breakpoint_t *bp; - const uint8_t type_code[] = { - /* Break on instruction execution only. */ - [GDB_BREAKPOINT_HW] = 0x0, - /* Break on data writes only. */ - [GDB_WATCHPOINT_WRITE] = 0x1, - /* Break on data reads only. */ - [GDB_WATCHPOINT_READ] = 0x2, - /* Break on data reads or writes but not instruction fetches. */ - [GDB_WATCHPOINT_ACCESS] = 0x3 - }; - const uint8_t len_code[] = { - /* - * 00 — 1-byte length. - * 01 — 2-byte length. - * 10 — 8-byte length. - * 11 — 4-byte length. - */ - [1] = 0x0,[2] = 0x1,[4] = 0x3,[8] = 0x2 - }; - int n = 0; - - if (stepping) - dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP; - - if (!SLIST_EMPTY(&sw_breakpoints)) - dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; - - if (!SLIST_EMPTY(&hw_breakpoints)) { - dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; - - /* Enable global breakpointing (across all threads) on the control - * debug register. */ - dbg.arch.debugreg[7] = 1 << 9; - dbg.arch.debugreg[7] |= 1 << 10; - SLIST_FOREACH(bp, &hw_breakpoints, entries) { - assert(bp->type != GDB_BREAKPOINT_SW); - dbg.arch.debugreg[n] = bp->addr; - /* global breakpointing */ - dbg.arch.debugreg[7] |= (2 << (n * 2)); - /* read/write fields */ - dbg.arch.debugreg[7] |= - (type_code[bp->type] << (16 + n * 4)); - /* Length fields */ - dbg.arch.debugreg[7] |= - ((uint32_t) len_code[bp->len] << (18 + n * 4)); - n++; - } - } - - kvm_ioctl(vcpufd, KVM_SET_GUEST_DEBUG, &dbg); - - return 0; -} - -static struct breakpoint_t *bp_list_find(gdb_breakpoint_type type, - uint64_t addr, size_t len) -{ - struct breakpoint_t *bp; - - switch (type) { - case GDB_BREAKPOINT_SW: - SLIST_FOREACH(bp, &sw_breakpoints, entries) { - if (bp->addr == addr && bp->len == len) - return bp; - } - break; - - case GDB_BREAKPOINT_HW: - case GDB_WATCHPOINT_WRITE: - case GDB_WATCHPOINT_READ: - case GDB_WATCHPOINT_ACCESS: - /* We only support hardware watchpoints. */ - SLIST_FOREACH(bp, &hw_breakpoints, entries) { - if (bp->addr == addr && bp->len == len) - return bp; - } - break; - - default: - assert(0); - } - - return NULL; -} - -/* - * Adds a new breakpoint to the list of breakpoints. Returns the found or - * created breakpoint. Returns NULL in case of failure or if we reached the max - * number of allowed hardware breakpoints (4). - */ -static struct breakpoint_t *bp_list_insert(gdb_breakpoint_type type, - uint64_t addr, size_t len) -{ - struct breakpoint_t *bp; - - bp = bp_list_find(type, addr, len); - if (bp) { - bp->refcount++; - return bp; - } - - bp = malloc(sizeof(struct breakpoint_t)); - if (bp == NULL) - return NULL; - - bp->addr = addr; - bp->type = type; - bp->len = len; - bp->refcount = 1; - - switch (type) { - case GDB_BREAKPOINT_SW: - SLIST_INSERT_HEAD(&sw_breakpoints, bp, entries); - break; - - case GDB_BREAKPOINT_HW: - case GDB_WATCHPOINT_WRITE: - case GDB_WATCHPOINT_READ: - case GDB_WATCHPOINT_ACCESS: - /* We only support hardware watchpoints. */ - if (nr_hw_breakpoints == MAX_HW_BREAKPOINTS) - return NULL; - nr_hw_breakpoints++; - SLIST_INSERT_HEAD(&hw_breakpoints, bp, entries); - break; - - default: - assert(0); - } - - return bp; -} - -/* - * Removes a breakpoint from the list of breakpoints. - * Returns -1 if the breakpoint is not in the list. - */ -static int bp_list_remove(gdb_breakpoint_type type, uint64_t addr, size_t len) -{ - struct breakpoint_t *bp = NULL; - - bp = bp_list_find(type, addr, len); - if (!bp) - return -1; - - bp->refcount--; - if (bp->refcount > 0) - return 0; - - switch (type) { - case GDB_BREAKPOINT_SW: - SLIST_REMOVE(&sw_breakpoints, bp, breakpoint_t, entries); - break; - - case GDB_BREAKPOINT_HW: - case GDB_WATCHPOINT_WRITE: - case GDB_WATCHPOINT_READ: - case GDB_WATCHPOINT_ACCESS: - /* We only support hardware watchpoints. */ - SLIST_REMOVE(&hw_breakpoints, bp, breakpoint_t, entries); - nr_hw_breakpoints--; - break; - - default: - assert(0); - } - - free(bp); - - return 0; -} - -int uhyve_gdb_read_registers(int vcpufd, uint8_t * registers, size_t * len) -{ - struct kvm_regs kregs; - struct kvm_sregs sregs; - struct uhyve_gdb_regs *gregs = (struct uhyve_gdb_regs *)registers; - int ret; - - kvm_ioctl(vcpufd, KVM_GET_REGS, &kregs); - kvm_ioctl(vcpufd, KVM_GET_SREGS, &sregs); - - if (*len < sizeof(struct uhyve_gdb_regs)) - return -1; - - *len = sizeof(struct uhyve_gdb_regs); - - gregs->rax = kregs.rax; - gregs->rbx = kregs.rbx; - gregs->rcx = kregs.rcx; - gregs->rdx = kregs.rdx; - - gregs->rsi = kregs.rsi; - gregs->rdi = kregs.rdi; - gregs->rbp = kregs.rbp; - gregs->rsp = kregs.rsp; - - gregs->r8 = kregs.r8; - gregs->r9 = kregs.r9; - gregs->r10 = kregs.r10; - gregs->r11 = kregs.r11; - - gregs->rip = kregs.rip; - gregs->eflags = kregs.rflags; - - gregs->cs = sregs.cs.selector; - gregs->ss = sregs.ss.selector; - gregs->ds = sregs.ds.selector; - gregs->es = sregs.es.selector; - gregs->fs = sregs.fs.selector; - gregs->gs = sregs.gs.selector; - - return 0; -} - -int uhyve_gdb_write_registers(int vcpufd, uint8_t * registers, size_t len) -{ - struct kvm_regs kregs; - struct kvm_sregs sregs; - struct uhyve_gdb_regs *gregs = (struct uhyve_gdb_regs *)registers; - int ret; - - /* Let's read all registers just in case we miss filling one of them. */ - kvm_ioctl(vcpufd, KVM_GET_REGS, &kregs); - kvm_ioctl(vcpufd, KVM_GET_SREGS, &sregs); - - if (len < sizeof(struct uhyve_gdb_regs)) - return -1; - - kregs.rax = gregs->rax; - kregs.rbx = gregs->rbx; - kregs.rcx = gregs->rcx; - kregs.rdx = gregs->rdx; - - kregs.rsi = gregs->rsi; - kregs.rdi = gregs->rdi; - kregs.rbp = gregs->rbp; - kregs.rsp = gregs->rsp; - - kregs.r8 = gregs->r8; - kregs.r9 = gregs->r9; - kregs.r10 = gregs->r10; - kregs.r11 = gregs->r11; - - kregs.rip = gregs->rip; - kregs.rflags = gregs->eflags; - - /* XXX: not sure if just setting .selector is enough. */ - sregs.cs.selector = gregs->cs; - sregs.ss.selector = gregs->ss; - sregs.ds.selector = gregs->ds; - sregs.es.selector = gregs->es; - sregs.fs.selector = gregs->fs; - sregs.gs.selector = gregs->gs; - - kvm_ioctl(vcpufd, KVM_SET_REGS, &kregs); - kvm_ioctl(vcpufd, KVM_SET_SREGS, &sregs); - - return 0; -} - -int uhyve_gdb_add_breakpoint(int vcpufd, gdb_breakpoint_type type, - uint64_t addr, size_t len) -{ - struct breakpoint_t *bp; - - assert(type < GDB_BREAKPOINT_MAX); - - if (bp_list_find(type, addr, len)) - return 0; - - bp = bp_list_insert(type, addr, len); - if (bp == NULL) - return -1; - - if (type == GDB_BREAKPOINT_SW) - kvm_arch_insert_sw_breakpoint(bp); - - if (uhyve_gdb_update_guest_debug(vcpufd) == -1) - return -1; - - return 0; -} - -int uhyve_gdb_remove_breakpoint(int vcpufd, gdb_breakpoint_type type, - uint64_t addr, size_t len) -{ - struct breakpoint_t *bp; - - assert(type < GDB_BREAKPOINT_MAX); - - if (type == GDB_BREAKPOINT_SW) { - bp = bp_list_find(type, addr, len); - if (bp) - kvm_arch_remove_sw_breakpoint(bp); - } - - if (bp_list_remove(type, addr, len) == -1) - return -1; - - if (uhyve_gdb_update_guest_debug(vcpufd) == -1) - return -1; - - return 0; -} - -int uhyve_gdb_enable_ss(int vcpufd) -{ - stepping = true; - - if (uhyve_gdb_update_guest_debug(vcpufd) == -1) - return -1; - - return 0; -} - -int uhyve_gdb_disable_ss(int vcpufd) -{ - stepping = false; - - if (uhyve_gdb_update_guest_debug(vcpufd) == -1) - return -1; - - return 0; -} - -/* Convert a guest virtual address into the correspondign physical address */ -int uhyve_gdb_guest_virt_to_phys(int vcpufd, const uint64_t virt, uint64_t * phys) -{ - struct kvm_translation kt; - - kt.linear_address = virt; - kvm_ioctl(vcpufd, KVM_TRANSLATE, &kt); - - *phys = kt.physical_address; - return 0; -} - -#endif diff --git a/tools/uhyve-gdb-x86_64.h b/tools/uhyve-gdb-x86_64.h deleted file mode 100644 index fddee9569..000000000 --- a/tools/uhyve-gdb-x86_64.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * This file was adapted from the solo5/ukvm code base, initial copyright block - * follows: - */ - -/* - * Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file - * - * This file is part of ukvm, a unikernel monitor. - * - * Permission to use, copy, modify, and/or distribute this software - * for any purpose with or without fee is hereby granted, provided - * that the above copyright notice and this permission notice appear - * in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL - * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE - * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS - * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef UHYVE_GDB_X86_64_H -#define UHYVE_GDB_X86_64_H - -#include -#include - -/* - * X86_64 - * XXX: Can't find any gdb include file with the list of registers per - * architecture (something like ia64_regs.h). The closest I can get is a - * list of the registers from gdb (debugging an ordinary x86_64 binary): - * - * (gdb) info registers - * rax 0x0 0 - * rbx 0x0 0 - * rcx 0x0 0 - * ... - * fs 0x0 0 - * gs 0x0 0 - * (gdb) - */ - -struct uhyve_gdb_regs { - uint64_t rax; - uint64_t rbx; - uint64_t rcx; - uint64_t rdx; - uint64_t rsi; - uint64_t rdi; - uint64_t rbp; - uint64_t rsp; - uint64_t r8; - uint64_t r9; - uint64_t r10; - uint64_t r11; - uint64_t r12; - uint64_t r13; - uint64_t r14; - uint64_t r15; - uint64_t rip; - - uint32_t eflags; - uint32_t cs; - uint32_t ss; - uint32_t ds; - uint32_t es; - uint32_t fs; - uint32_t gs; - uint8_t st[8][10]; -}; - -#endif /* UHYVE_GDB_X86_64_H */ diff --git a/tools/uhyve-gdb.h b/tools/uhyve-gdb.h deleted file mode 100644 index 8bf693a82..000000000 --- a/tools/uhyve-gdb.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * This file was adapted from the solo5/ukvm code base, initial copyright block - * follows: - */ - -/* - * Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file - * - * This file is part of ukvm, a unikernel monitor. - * - * Permission to use, copy, modify, and/or distribute this software - * for any purpose with or without fee is hereby granted, provided - * that the above copyright notice and this permission notice appear - * in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL - * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE - * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS - * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef UHYVE_GDB_H -#define UHYVE_GDB_H - -#include -#include - -/* GDB breakpoint/watchpoint types */ -typedef enum _gdb_breakpoint_type { - /* Do not change these. The values have to match on the GDB client - * side. */ - GDB_BREAKPOINT_SW = 0, - GDB_BREAKPOINT_HW, - GDB_WATCHPOINT_WRITE, - GDB_WATCHPOINT_READ, - GDB_WATCHPOINT_ACCESS, - GDB_BREAKPOINT_MAX -} gdb_breakpoint_type; - -#define GDB_SIGNAL_FIRST 0 -#define GDB_SIGNAL_QUIT 3 -#define GDB_SIGNAL_KILL 9 -#define GDB_SIGNAL_TRAP 5 -#define GDB_SIGNAL_SEGV 11 -#define GDB_SIGNAL_TERM 15 -#define GDB_SIGNAL_IO 23 -#define GDB_SIGNAL_DEFAULT 144 - -/* prototypes */ -int uhyve_gdb_enable_ss(int vcpufd); -int uhyve_gdb_disable_ss(int vcpufd); -int uhyve_gdb_read_registers(int vcpufd, uint8_t *reg, size_t *len); -int uhyve_gdb_write_registers(int vcpufd, uint8_t *reg, size_t len); -int uhyve_gdb_add_breakpoint(int vcpufd, gdb_breakpoint_type type, - uint64_t addr, size_t len); -int uhyve_gdb_remove_breakpoint(int vcpufd, gdb_breakpoint_type type, - uint64_t addr, size_t len); -int uhyve_gdb_guest_virt_to_phys(int vcpufd, const uint64_t virt, - uint64_t *phys); - -/* interface with uhyve.c */ -void uhyve_gdb_handle_exception(int vcpufd, int sigval); -void uhyve_gdb_handle_term(void); -int uhyve_gdb_init(int vcpufd); - -#ifdef __x86_64__ -#include "uhyve-gdb-x86_64.h" -#else -#include "uhyve-gdb-aarch64.h" -#endif - -#endif /* UHYVE_GDB_H */ diff --git a/tools/uhyve-migration-rdma.c b/tools/uhyve-migration-rdma.c deleted file mode 100644 index 5c3242c7f..000000000 --- a/tools/uhyve-migration-rdma.c +++ /dev/null @@ -1,873 +0,0 @@ -/* - * Copyright (c) 2018, Simon Pickartz, RWTH Aachen University - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#define _GNU_SOURCE - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include "uhyve-migration.h" -#include "uhyve.h" - - -#ifdef __RDMA_MIGRATION__ -#define IB_USE_ODP (0) - -#define IB_CQ_ENTRIES (1) -#define IB_MAX_INLINE_DATA (0) -#define IB_MAX_DEST_RD_ATOMIC (1) -#define IB_MIN_RNR_TIMER (1) -#define IB_MAX_SEND_WR (8192) // TODO: should be - // com_hndl.dev_attr_ex.orig_attr.max_qp_wr - // fix for mlx_5 adapter -#define IB_MAX_RECV_WR (1) -#define IB_MAX_SEND_SGE (1) -#define IB_MAX_RECV_SGE (1) - -typedef enum ib_wr_ids { - IB_WR_NO_ID = 0, - IB_WR_WRITE_LAST_PAGE_ID, - IB_WR_RECV_LAST_PAGE_ID, - IB_WR_BASE_ID -} ib_wr_ids_t; - -uint64_t cur_wr_id = IB_WR_BASE_ID; - -typedef struct qp_info { - uint32_t qpn; - uint16_t lid; - uint16_t psn; - uint32_t *keys; - uint64_t addr; -} qp_info_t; - -typedef struct com_hndl { - struct ibv_context *ctx; /* device context */ - struct ibv_device_attr_ex dev_attr_ex; /* extended device attributes */ - struct ibv_port_attr port_attr; /* port attributes */ - struct ibv_pd *pd; /* protection domain */ - struct ibv_mr **mrs; /* memory regions */ - struct ibv_cq *cq; /* completion queue */ - struct ibv_qp *qp; /* queue pair */ - struct ibv_comp_channel *comp_chan; /* comp. event channel */ - qp_info_t loc_qp_info; - qp_info_t rem_qp_info; - uint8_t used_port; /* port of the IB device */ - uint8_t *buf; /* the guest memory (with potential gaps!) */ - size_t mr_cnt; /* number of memory regions */ -} com_hndl_t; - - -static com_hndl_t com_hndl; -static struct ibv_send_wr *send_list = NULL; -static struct ibv_send_wr *send_list_last = NULL; -static size_t send_list_length = 0; - -/** - * \brief Prints info of a send_wr - * - * \param id the ID of the send_wr - */ -static inline -void print_send_wr_info(uint64_t id) -{ - struct ibv_send_wr *search_wr = send_list; - - /* find send_wr with id */ - while(search_wr) { - if (search_wr->wr_id == id) { - fprintf(stderr, "[INFO] WR_ID: %llu; LADDR: 0x%llx; RADDR: 0x%llx; SIZE: %llu\n", - search_wr->wr_id, - search_wr->sg_list->addr, - search_wr->wr.rdma.remote_addr, - search_wr->sg_list->length); - - break; - } - - search_wr = search_wr->next; - } - - if (search_wr == NULL) { - fprintf(stderr, "[ERROR] Could not find send_wr with ID %llu\n", id); - } -} - - -/** - * \brief Initializes the IB communication structures - * - * \param com_hndl the structure containing all communication relevant infos - * \param buf the buffer that should be registrered with the QP - * - * This function sets up the IB communication channel. It registers the 'buf' - * with a new protection domain. On its termination there is a QP in the INIT - * state ready to be connected with the remote side. - */ -static void -init_com_hndl(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks) -{ - /* initialize com_hndl */ - memset(&com_hndl, 0, sizeof(com_hndl)); - - /* the guest physical memory is the communication buffer */ - com_hndl.buf = guest_mem; - com_hndl.mr_cnt = mem_chunk_cnt; - - struct ibv_device **device_list = NULL; - int num_devices = 0; - bool active_port_found = false; - - /* determine first available device */ - if ((device_list = ibv_get_device_list(&num_devices)) == NULL) { - fprintf(stderr, - "[ERROR] Could not determine available IB devices " - "- %d (%s). Abort!\n", - errno, - strerror(errno)); - exit(EXIT_FAILURE); - } - - /* find device with active port */ - size_t cur_dev = 0; - for (cur_dev=0; cur_devaddr = 0x%llx; com_hndl->mrs[%d].length = %llu\n", - i, - com_hndl.mrs[i]->addr, - i, - com_hndl.mrs[i]->length); - } - - /* create completion event channel */ - if ((com_hndl.comp_chan = - ibv_create_comp_channel(com_hndl.ctx)) == NULL) { - fprintf(stderr, - "[ERROR] Could not create the completion channel " - "- %d (%s). Abort!\n", - errno, - strerror(errno)); - exit(EXIT_FAILURE); - } - - /* create the completion queue */ - if ((com_hndl.cq = ibv_create_cq(com_hndl.ctx, - IB_CQ_ENTRIES, - NULL, - com_hndl.comp_chan, - 0)) == NULL) { - fprintf(stderr, - "[ERROR] Could not create the completion queue " - "- %d (%s). Abort!\n", - errno, - strerror(errno)); - exit(EXIT_FAILURE); - } - - /* create send and recv queue pair and initialize it */ - struct ibv_qp_init_attr init_attr = { - .send_cq = com_hndl.cq, - .recv_cq = com_hndl.cq, - .cap = { - .max_send_wr = IB_MAX_SEND_WR, - .max_recv_wr = IB_MAX_RECV_WR, - .max_send_sge = IB_MAX_SEND_SGE, - .max_recv_sge = IB_MAX_RECV_SGE, - .max_inline_data = IB_MAX_INLINE_DATA - }, - .qp_type = IBV_QPT_RC, - .sq_sig_all = 0 /* we do not want a CQE for each WR */ - }; - if ((com_hndl.qp = ibv_create_qp(com_hndl.pd, &init_attr)) == NULL) { - fprintf(stderr, - "[ERROR] Could not create the queue pair " - "- %d (%s). Abort!\n", - errno, - strerror(errno)); - exit(EXIT_FAILURE); - } - - struct ibv_qp_attr attr = { - .qp_state = IBV_QPS_INIT, - .pkey_index = 0, - .port_num = com_hndl.used_port, - .qp_access_flags = (IBV_ACCESS_REMOTE_WRITE) - }; - if (ibv_modify_qp(com_hndl.qp, - &attr, - IBV_QP_STATE | - IBV_QP_PKEY_INDEX | - IBV_QP_PORT | - IBV_QP_ACCESS_FLAGS) < 0) { - fprintf(stderr, - "[ERROR] Could not set QP into init state " - "- %d (%s). Abort!\n", - errno, - strerror(errno)); - exit(EXIT_FAILURE); - } - - /* fill in local qp_info */ - com_hndl.loc_qp_info.qpn = com_hndl.qp->qp_num; - com_hndl.loc_qp_info.psn = lrand48() & 0xffffff; - com_hndl.loc_qp_info.addr = (uint64_t)com_hndl.buf; - com_hndl.loc_qp_info.lid = com_hndl.port_attr.lid; - - com_hndl.loc_qp_info.keys = (uint32_t*)malloc(sizeof(uint32_t)*com_hndl.mr_cnt); - for (i=0; irkey; - } -} - -/** - * \brief Frees IB related resources - * - * \param com_hndl the structure containing all communication relevant infos - */ -static void -destroy_com_hndl(void) -{ - if (ibv_destroy_qp(com_hndl.qp) < 0) { - fprintf(stderr, - "[ERROR] Could not destroy the queue pair " - "- %d (%s). Abort!\n", - errno, - strerror(errno)); - exit(EXIT_FAILURE); - } - - if (ibv_destroy_cq(com_hndl.cq) < 0) { - fprintf(stderr, - "[ERROR] Could not deallocate the protection domain " - "- %d (%s). Abort!\n", - errno, - strerror(errno)); - exit(EXIT_FAILURE); - } - - if (ibv_destroy_comp_channel(com_hndl.comp_chan) < 0) { - fprintf(stderr, - "[ERROR] Could not destroy the completion channel " - "- %d (%s). Abort!\n", - errno, - strerror(errno)); - exit(EXIT_FAILURE); - } - - int i = 0; - for (i=0; inext = NULL; - send_wr->sg_list = sge; - send_wr->num_sge = 1; - send_wr->wr_id = ++cur_wr_id; - send_wr->opcode = IBV_WR_RDMA_WRITE; - - return send_wr; - -} - -/** - * \brief Creates an 'ibv_send_wr' and appends it to the send_list - * - * \param addr the page table entry of the memory page - * \param addr_size the size of the page table entry - * \param page the buffer to be send in this WR - * \param page_size the size of the buffer - * - * This function creates an 'ibv_send_wr' structure and appends this to the - * global send_list. It sets the source/destination information and sets the - * IBV_SEND_SIGNALED flag as appropriate. - */ -static void -create_send_list_entry (void *addr, size_t addr_size, void *page, size_t page_size) -{ - /* create work request */ - struct ibv_send_wr *send_wr = prepare_send_list_elem(); - - /* configure source buffer */ - int i = 0; - for (i=0; i= (uint64_t)com_hndl.mrs[i]->addr) && - ((uint64_t)page < ((uint64_t)com_hndl.mrs[i]->addr + (uint64_t)com_hndl.mrs[i]->length))) { - send_wr->sg_list->addr = (uintptr_t)page; - send_wr->sg_list->length = page_size; - send_wr->sg_list->lkey = com_hndl.mrs[i]->lkey; - - send_wr->wr.rdma.rkey = com_hndl.rem_qp_info.keys[i]; - - break; - } - } - - /* did we find the correct memory region? */ - if (i == com_hndl.mr_cnt) { - fprintf(stderr, "[ERROR] Could not find a valid MR for address 0x%llx!\n", page); - return; - } - - /* configure destination buffer */ - if (addr) { - send_wr->wr.rdma.remote_addr = com_hndl.rem_qp_info.addr + determine_dest_offset(*(size_t*)addr); - } else { - send_wr->wr.rdma.remote_addr = com_hndl.rem_qp_info.addr; - } - - /* apped work request to send list */ - if (send_list == NULL) { - send_list = send_list_last = send_wr; - } else { - send_list_last->next = send_wr; - send_list_last = send_list_last->next; - } - /* we have to request a CQE if max_send_wr is reached to avoid overflows */ - if ((++send_list_length%com_hndl.dev_attr_ex.orig_attr.max_qp_wr) == 0) { - send_list_last->send_flags = IBV_SEND_SIGNALED; - } -} - - -/** - * \brief Prepares a send_list containing all memory defined by com_hndl.mrs - * - * This function creates as many send_wr items as required to cover all - * com_hndl.mrs in accordance with the maximum message size that can be - * transmitted per send_sr (com_hndl.port_attr.max_msg_sz). - */ -static inline -void enqueue_all_mrs(void) -{ - uint64_t max_msg_sz = com_hndl.port_attr.max_msg_sz; - int i = 0; - - /* send all MRs */ - for (i=0; ilength; - - /* split the MR if it exceed the max_msg_sz */ - size_t cur_chunk = 0, max_chunks = cur_mr_length/max_msg_sz; - for (cur_chunk; cur_chunk < max_chunks; ++cur_chunk) { - size_t cur_offset = cur_chunk*max_msg_sz; - size_t cur_glob_offset = cur_offset + (uint64_t)com_hndl.mrs[i]->addr - (uint64_t)guest_mem; - create_send_list_entry((void*)&cur_glob_offset, 0, (void*)((uint64_t)com_hndl.mrs[i]->addr+cur_offset), max_msg_sz); - } - - /* do we have a remainder? */ - uint64_t remainder = cur_mr_length%max_msg_sz; - if (remainder) { - size_t cur_offset = cur_mr_length-remainder; - size_t cur_glob_offset = cur_offset + (uint64_t)com_hndl.mrs[i]->addr - (uint64_t)guest_mem; - create_send_list_entry((void*)&cur_glob_offset, 0, (void*)((uint64_t)com_hndl.mrs[i]->addr+cur_offset), remainder); - } - } -} - - -/** - * \brief Sends the guest memory to the destination - * - * \param mode MIG_MODE_COMPLETE_DUMP sends the complete memory and - * MIG_MODE_INCREMENTAL_DUMP only the mapped guest pages - */ -void send_guest_mem(mig_mode_t mode, bool final_dump, size_t mem_chunk_cnt, mem_chunk_t *mem_chunks) -{ - int res = 0, i = 0; - static bool ib_initialized = false; - - /* prepare IB channel */ - if (!ib_initialized) { - init_com_hndl(mem_chunk_cnt, mem_chunks); - exchange_qp_info(false); - con_com_buf(); - - ib_initialized = true; - } - - /* determine migration mode */ - switch (mode) { - case MIG_MODE_COMPLETE_DUMP: - enqueue_all_mrs(); - break; - case MIG_MODE_INCREMENTAL_DUMP: - /* iterate guest page tables */ - determine_dirty_pages(create_send_list_entry); - break; - default: - fprintf(stderr, "[ERROR] Unknown migration mode. Abort!\n"); - exit(EXIT_FAILURE); - } - - /* create a dumy WR request if there is nothing to send */ - if (send_list_length == 0) - create_send_list_entry(NULL, 0, NULL, 0); - - /* we have to wait for the last WR before informing dest */ - if ((mode == MIG_MODE_COMPLETE_DUMP) || final_dump) { - send_list_last->wr_id = IB_WR_WRITE_LAST_PAGE_ID; - send_list_last->opcode = IBV_WR_RDMA_WRITE_WITH_IMM; - send_list_last->send_flags = IBV_SEND_SIGNALED | IBV_SEND_SOLICITED; - send_list_last->imm_data = htonl(0x1); - } else { - send_list_last->wr_id = IB_WR_WRITE_LAST_PAGE_ID; - send_list_last->send_flags = IBV_SEND_SIGNALED; - } - - printf("DEBUG: Send list length %d\n", send_list_length); - - /* we have to call ibv_post_send() as long as 'send_list' contains elements */ - struct ibv_wc wc; - struct ibv_send_wr *remaining_send_wr = NULL; - do { - /* send data */ - remaining_send_wr = NULL; - if (ibv_post_send(com_hndl.qp, send_list, &remaining_send_wr) && (errno != ENOMEM)) { - fprintf(stderr, - "[ERROR] Could not post send" - "- %d (%s). Abort!\n", - errno, - strerror(errno)); - exit(EXIT_FAILURE); - } - - /* wait for send WRs if CQ is full */ - do { - if ((res = ibv_poll_cq(com_hndl.cq, 1, &wc)) < 0) { - fprintf(stderr, - "[ERROR] Could not poll on CQ" - "- %d (%s). Abort!\n", - errno, - strerror(errno)); - exit(EXIT_FAILURE); - } - } while (res < 1); - if (wc.status != IBV_WC_SUCCESS) { - fprintf(stderr, - "[ERROR] WR failed status %s (%d) for wr_id %llu\n", - ibv_wc_status_str(wc.status), - wc.status, - wc.wr_id); - - print_send_wr_info(wc.wr_id); - } - send_list = remaining_send_wr; - } while (remaining_send_wr); - - - /* ensure that we receive the CQE for the last page */ - if (wc.wr_id != IB_WR_WRITE_LAST_PAGE_ID) { - fprintf(stderr, - "[ERROR] WR failed status %s (%d) for wr_id %d\n", - ibv_wc_status_str(wc.status), - wc.status, - (int)wc.wr_id); - } - - /* cleanup send_list */ - struct ibv_send_wr *cur_send_wr = send_list; - struct ibv_send_wr *tmp_send_wr = NULL; - while (cur_send_wr != NULL) { - free(cur_send_wr->sg_list); - tmp_send_wr = cur_send_wr; - cur_send_wr = cur_send_wr->next; - free(tmp_send_wr); - } - send_list_length = 0; - - /* do not close the channel in a pre-dump */ - if (!final_dump) - return; - - /* free IB-related resources */ - destroy_com_hndl(); - ib_initialized = false; - - fprintf(stderr, "Guest memory sent!\n"); -} - - - -/** - * \brief Receives the guest memory from the source - * - * The receive participates in the IB connection setup and waits for the - * 'solicited' event sent with the last WR issued by the sender. - */ -void recv_guest_mem(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks) -{ - int res = 0; - - /* prepare IB channel */ - init_com_hndl(mem_chunk_cnt, mem_chunks); - exchange_qp_info(true); - con_com_buf(); - - /* request notification on the event channel */ - if (ibv_req_notify_cq(com_hndl.cq, 1) < 0) { - fprintf(stderr, - "[ERROR] Could request notify for completion queue " - "- %d (%s). Abort!\n", - errno, - strerror(errno)); - exit(EXIT_FAILURE); - } - - /* post recv matching IBV_RDMA_WRITE_WITH_IMM */ - struct ibv_cq *ev_cq; - void *ev_ctx; - struct ibv_sge sg; - struct ibv_recv_wr recv_wr; - struct ibv_recv_wr *bad_wr; - uint32_t recv_buf = 0; - - memset(&sg, 0, sizeof(sg)); - sg.addr = (uintptr_t)&recv_buf; - sg.length = sizeof(recv_buf); - sg.lkey = com_hndl.mrs[0]->lkey; - - memset(&recv_wr, 0, sizeof(recv_wr)); - recv_wr.wr_id = 0; - recv_wr.sg_list = &sg; - recv_wr.num_sge = 1; - - if (ibv_post_recv(com_hndl.qp, &recv_wr, &bad_wr) < 0) { - fprintf(stderr, - "[ERROR] Could post recv - %d (%s). Abort!\n", - errno, - strerror(errno)); - exit(EXIT_FAILURE); - } - - /* wait for requested event */ - if (ibv_get_cq_event(com_hndl.comp_chan, &ev_cq, &ev_ctx) < 0) { - fprintf(stderr, - "[ERROR] Could get event from completion channel " - "- %d (%s). Abort!\n", - errno, - strerror(errno)); - exit(EXIT_FAILURE); - } - - /* acknowledge the event */ - ibv_ack_cq_events(com_hndl.cq, 1); - - /* free IB-related resources */ - destroy_com_hndl(); - - fprintf(stderr, "Guest memory received!\n"); -} -#endif /* __RDMA_MIGRATION__ */ diff --git a/tools/uhyve-migration.c b/tools/uhyve-migration.c deleted file mode 100644 index a8d1d098b..000000000 --- a/tools/uhyve-migration.c +++ /dev/null @@ -1,277 +0,0 @@ -/* - * Copyright (c) 2018, Simon Pickartz, RWTH Aachen University - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifdef __x86_64__ -#define _GNU_SOURCE - -#include -#include -#include -#include -#include -#include - -#include "uhyve-migration.h" -#include "uhyve.h" - -static struct sockaddr_in mig_server; -static int com_sock = 0; -static int listen_sock = 0; - -static mig_type_t mig_type = MIG_TYPE_COLD; - -/** - * \brief Returns the configured migration type - */ -mig_type_t -get_migration_type(void) -{ - return mig_type; -} - -/** - * \brief Sets the migration type - * - * \param mig_type_str A string defining the migration type - */ -void -set_migration_type(const char *mig_type_str) -{ - if (mig_type_str == NULL) - return; - - int i; - bool found_type = false; - for (i=0; i - -extern size_t guest_size; -extern uint8_t* guest_mem; - -#define MIGRATION_PORT 1337 - -typedef enum { - MIG_MODE_COMPLETE_DUMP = 1, - MIG_MODE_INCREMENTAL_DUMP, -} mig_mode_t; - -typedef enum { - MIG_TYPE_COLD = 0, - MIG_TYPE_LIVE, -} mig_type_t; - -const static struct { - mig_type_t mig_type; - const char *str; -} mig_type_conv [] = { - {MIG_TYPE_COLD, "cold"}, - {MIG_TYPE_LIVE, "live"}, -}; - -typedef struct _mem_chunk { - size_t size; - uint8_t *ptr; -} mem_chunk_t; - -typedef struct _migration_metadata { - uint32_t ncores; - size_t guest_size; - uint32_t no_checkpoint; - uint64_t elf_entry; - bool full_checkpoint; -} migration_metadata_t; - -void set_migration_type(const char *mig_type_str); -mig_type_t get_migration_type(void); - -void wait_for_client(uint16_t listen_portno); -void set_migration_target(const char *ip_str, int port); -void connect_to_server(void); -void close_migration_channel(void); - -int recv_data(void *buffer, size_t length); -int send_data(void *buffer, size_t length); - -void send_guest_mem(mig_mode_t mode, bool final_dump, size_t mem_chunk_cnt, mem_chunk_t *mem_chunks); -void recv_guest_mem(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks); -#endif /* __UHYVE_MIGRATION_H__ */ - - - diff --git a/tools/uhyve-net.c b/tools/uhyve-net.c deleted file mode 100755 index fbd1bf980..000000000 --- a/tools/uhyve-net.c +++ /dev/null @@ -1,189 +0,0 @@ -/* Copyright (c) 2015, IBM - * Author(s): Dan Williams - * Ricardo Koller - * Copyright (c) 2017, RWTH Aachen University - * Author(s): Tim van de Kamp - * - * Permission to use, copy, modify, and/or distribute this software - * for any purpose with or without fee is hereby granted, provided - * that the above copyright notice and this permission notice appear - * in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL - * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE - * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS - * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -/* We used several existing projects as guides - * kvmtest.c: http://lwn.net/Articles/658512/ - * lkvm: http://github.com/clearlinux/kvmtool - */ - -/* - * 15.1.2017: extend original version (https://github.com/Solo5/solo5) - * for HermitCore - */ - -#include "uhyve-net.h" -#include - -/* TODO: create an array or equal for more then one netif */ -static uhyve_netinfo_t netinfo; - -//-------------------------------------- ATTACH LINUX TAP -----------------------------------------// -int attach_linux_tap(const char *dev) -{ - struct ifreq ifr; - int fd, err; - - // @ indicates a pre-existing open fd onto the correct device. - if (dev[0] == '@') { - fd = atoi(&dev[1]); - - if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) - return -1; - return fd; - } - - fd = open("/dev/net/tun", O_RDWR | O_NONBLOCK); - - // Initialize interface request for TAP interface - memset(&ifr, 0x00, sizeof(ifr)); - - ifr.ifr_flags = IFF_TAP | IFF_NO_PI; - if (strlen(dev) > IFNAMSIZ) { - errno = EINVAL; - return -1; - } - strncpy(ifr.ifr_name, dev, IFNAMSIZ); - - // Try to create OR attach to an existing device. The Linux API has no way - // to differentiate between the two - - // create before a tap device with these commands: - // - // sudo ip tuntap add mode tap user - // sudo ip addr add 10.0.5.1/24 broadcast 10.0.5.255 - // sudo ip link set dev up - // - - if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) { - err = errno; - close(fd); - errno = err; - return -1; - } - - // If we got back a different device than the one requested, e.g. because - // the caller mistakenly passed in '%d' (yes, that's really in the Linux API) - // then fail - - if (strncmp(ifr.ifr_name, dev, IFNAMSIZ) != 0) { - close(fd); - errno = ENODEV; - return -1; - } - - // Attempt a zero-sized write to the device. If the device was freshly created - // (as opposed to attached to an existing ine) this will fail with EIO. Ignore - // any other error return since that may indicate the device is up - // - // If this check produces a false positive then caller's later writes to fd will - // fali with EIO, which is not great but at least we tried - - char buf[1] = { 0 }; - if (write(fd, buf, 0) == -1 && errno == EIO) { - close(fd); - errno = ENODEV; - return -1; - } - - return fd; -} - -//---------------------------------- GET MAC ----------------------------------------------// -char* uhyve_get_mac(void) -{ - return netinfo.mac_str; -} - -//---------------------------------- SET MAC ----------------------------------------------// - -int uhyve_set_mac(void) -{ - int mac_is_set = 0; - uint8_t guest_mac[6]; - - char* str = getenv("HERMIT_NETIF_MAC"); - if (str) - { - const char *macptr = str; - const char *v_macptr = macptr; - // checking str is a valid MAC address - int i = 0; - int s = 0; - while(*v_macptr) { - if(isxdigit(*v_macptr)) { - i++; - } else if (*v_macptr == ':') { - if (i / 2 - 1 != s++) - break; - } else { - s = -1; - } - v_macptr++; - } - if (i != 12 || s != 5) { - warnx("Malformed mac address: %s\n", macptr); - } else { - snprintf(netinfo.mac_str, sizeof(netinfo.mac_str), "%s", macptr); - mac_is_set = 1; - } - } - - if (!mac_is_set) { - int rfd = open("/dev/urandom", O_RDONLY); - if(rfd == -1) - err(1, "Could not open /dev/urandom\n"); - int ret; - ret = read(rfd, guest_mac, sizeof(guest_mac)); - // compare the number of bytes read with the size of guest_mac - assert(ret == sizeof(guest_mac)); - close(rfd); - - guest_mac[0] &= 0xfe; // creats a random MAC-address in the locally administered - guest_mac[0] |= 0x02; // address range which can be used without conflict with other public devices - // save the MAC address in the netinfo - snprintf(netinfo.mac_str, sizeof(netinfo.mac_str), - "%02x:%02x:%02x:%02x:%02x:%02x", - guest_mac[0], guest_mac[1], guest_mac[2], - guest_mac[3], guest_mac[4], guest_mac[5]); - } - - return 0; -} - -//-------------------------------------- SETUP NETWORK ---------------------------------------------// -int uhyve_net_init(const char *netif) -{ - if (netif == NULL) { - err(1, "ERROR: no netif defined\n"); - return -1; - } - - // attaching netif - netfd = attach_linux_tap(netif); - if (netfd < 0) { - err(1, "Could not attach interface: %s\n", netif); - exit(1); - } - - uhyve_set_mac(); - - return netfd; -} diff --git a/tools/uhyve-net.h b/tools/uhyve-net.h deleted file mode 100755 index 540616cf7..000000000 --- a/tools/uhyve-net.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef __UHYVE_NET_H__ -#define __UHYVE_NET_H__ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -/* network interface */ -#include -#include -#include -#include -#include -#include - -extern int netfd; - -// UHYVE_PORT_NETINFO -typedef struct { - /* OUT */ - char mac_str[18]; -} __attribute__((packed)) uhyve_netinfo_t; - -// UHYVE_PORT_NETWRITE -typedef struct { - /* IN */ - const void* data; - size_t len; - /* OUT */ - int ret; -} __attribute__((packed)) uhyve_netwrite_t; - -// UHYVE_PORT_NETREAD -typedef struct { - /* IN */ - void* data; - /* IN / OUT */ - size_t len; - /* OUT */ - int ret; -} __attribute__((packed)) uhyve_netread_t; - -// UHYVE_PORT_NETSTAT -typedef struct { - /* IN */ - int status; -} __attribute__((packed)) uhyve_netstat_t; - -int uhyve_net_init(const char *hermit_netif); -char* uhyve_get_mac(void); - -#endif diff --git a/tools/uhyve-syscalls.h b/tools/uhyve-syscalls.h deleted file mode 100644 index 7b83cdc01..000000000 --- a/tools/uhyve-syscalls.h +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright (c) 2017, RWTH Aachen University - * Author(s): Daniel Krebs - * - * Permission to use, copy, modify, and/or distribute this software - * for any purpose with or without fee is hereby granted, provided - * that the above copyright notice and this permission notice appear - * in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL - * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE - * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS - * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -#ifndef UHYVE_SYSCALLS_H -#define UHYVE_SYSCALLS_H - -#include -#include - -typedef struct { - int fd; - const char* buf; - size_t len; -} __attribute__((packed)) uhyve_write_t; - -typedef struct { - const char* name; - int flags; - int mode; - int ret; -} __attribute__((packed)) uhyve_open_t; - -typedef struct { - int fd; - int ret; -} __attribute__((packed)) uhyve_close_t; - -typedef struct { - int fd; - char* buf; - size_t len; - ssize_t ret; -} __attribute__((packed)) uhyve_read_t; - -typedef struct { - int fd; - off_t offset; - int whence; -} __attribute__((packed)) uhyve_lseek_t; - -#endif // UHYVE_SYSCALLS_H diff --git a/tools/uhyve-x86_64.c b/tools/uhyve-x86_64.c deleted file mode 100644 index ca0c724cd..000000000 --- a/tools/uhyve-x86_64.c +++ /dev/null @@ -1,1255 +0,0 @@ -/* - * Copyright (c) 2018, Stefan Lankes, RWTH Aachen University - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifdef __x86_64__ -#define _GNU_SOURCE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef HAVE_MSR_INDEX_H -#include -#else -/* x86-64 specific MSRs */ -#define MSR_EFER 0xc0000080 /* extended feature register */ -#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ -#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ -#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ -#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ -#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ -#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ -#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ -#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ - -#define MSR_IA32_CR_PAT 0x00000277 -#define MSR_PEBS_FRONTEND 0x000003f7 - -#define MSR_IA32_POWER_CTL 0x000001fc - -#define MSR_IA32_MC0_CTL 0x00000400 -#define MSR_IA32_MC0_STATUS 0x00000401 -#define MSR_IA32_MC0_ADDR 0x00000402 -#define MSR_IA32_MC0_MISC 0x00000403 - -#define MSR_IA32_SYSENTER_CS 0x00000174 -#define MSR_IA32_SYSENTER_ESP 0x00000175 -#define MSR_IA32_SYSENTER_EIP 0x00000176 - -#define MSR_IA32_APICBASE 0x0000001b -#define MSR_IA32_APICBASE_BSP (1<<8) -#define MSR_IA32_APICBASE_ENABLE (1<<11) -#define MSR_IA32_APICBASE_BASE (0xfffff<<12) - -#define MSR_IA32_MISC_ENABLE 0x000001a0 -#define MSR_IA32_TSC 0x00000010 - -/* EFER bits: */ -#define _EFER_SCE 0 /* SYSCALL/SYSRET */ -#define _EFER_LME 8 /* Long mode enable */ -#define _EFER_LMA 10 /* Long mode active (read-only) */ -#define _EFER_NX 11 /* No execute enable */ -#define _EFER_SVME 12 /* Enable virtualization */ -#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ -#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ - -#define EFER_SCE (1<<_EFER_SCE) -#define EFER_LME (1<<_EFER_LME) -#define EFER_LMA (1<<_EFER_LMA) -#define EFER_NX (1<<_EFER_NX) -#define EFER_SVME (1<<_EFER_SVME) -#define EFER_LMSLE (1<<_EFER_LMSLE) -#define EFER_FFXSR (1<<_EFER_FFXSR) -#endif -#include - -#include "uhyve.h" -#include "uhyve-x86_64.h" -#include "uhyve-syscalls.h" -#include "uhyve-migration.h" -#include "uhyve-net.h" -#include "proxy.h" - -// define this macro to create checkpoints with KVM's dirty log -//#define USE_DIRTY_LOG -#define MIG_ITERS 4 - -#define MAX_FNAME 256 - -#define GUEST_OFFSET 0x0 -#define CPUID_FUNC_PERFMON 0x0A -#define GUEST_PAGE_SIZE 0x200000 /* 2 MB pages in guest */ - -#define BOOT_GDT 0x1000 -#define BOOT_INFO 0x2000 -#define BOOT_PML4 0x10000 -#define BOOT_PDPTE 0x11000 -#define BOOT_PDE 0x12000 - -#define BOOT_GDT_NULL 0 -#define BOOT_GDT_CODE 1 -#define BOOT_GDT_DATA 2 -#define BOOT_GDT_MAX 3 - -#define KVM_32BIT_MAX_MEM_SIZE (1ULL << 32) -#define KVM_32BIT_GAP_SIZE (768 << 20) -#define KVM_32BIT_GAP_START (KVM_32BIT_MAX_MEM_SIZE - KVM_32BIT_GAP_SIZE) - -/// Page offset bits -#define PAGE_BITS 12 -#define PAGE_2M_BITS 21 -#define PAGE_SIZE (1L << PAGE_BITS) -/// Mask the page address without page map flags and XD flag -#if 0 -#define PAGE_MASK ((~0L) << PAGE_BITS) -#define PAGE_2M_MASK (~0L) << PAGE_2M_BITS) -#else -#define PAGE_MASK (((~0UL) << PAGE_BITS) & ~PG_XD) -#define PAGE_2M_MASK (((~0UL) << PAGE_2M_BITS) & ~PG_XD) -#endif - -// Page is present -#define PG_PRESENT (1 << 0) -// Page is read- and writable -#define PG_RW (1 << 1) -// Page is addressable from userspace -#define PG_USER (1 << 2) -// Page write through is activated -#define PG_PWT (1 << 3) -// Page cache is disabled -#define PG_PCD (1 << 4) -// Page was recently accessed (set by CPU) -#define PG_ACCESSED (1 << 5) -// Page is dirty due to recent write-access (set by CPU) -#define PG_DIRTY (1 << 6) -// Huge page: 4MB (or 2MB, 1GB) -#define PG_PSE (1 << 7) -// Page attribute table -#define PG_PAT PG_PSE -#if 1 -/* @brief Global TLB entry (Pentium Pro and later) - * - * HermitCore is a single-address space operating system - * => CR3 never changed => The flag isn't required for HermitCore - */ -#define PG_GLOBAL 0 -#else -#define PG_GLOBAL (1 << 8) -#endif -// This table is a self-reference and should skipped by page_map_copy() -#define PG_SELF (1 << 9) - -/// Disable execution for this page -#define PG_XD (1L << 63) - -#define BITS 64 -#define PHYS_BITS 52 -#define VIRT_BITS 48 -#define PAGE_MAP_BITS 9 -#define PAGE_LEVELS 4 - -#define IOAPIC_DEFAULT_BASE 0xfec00000 -#define APIC_DEFAULT_BASE 0xfee00000 - - -static bool cap_tsc_deadline = false; -static bool cap_irqchip = false; -static bool cap_adjust_clock_stable = false; -static bool cap_irqfd = false; -static bool cap_vapic = false; - -FILE *chk_file = NULL; - -extern size_t guest_size; -extern pthread_barrier_t barrier; -extern pthread_barrier_t migration_barrier; -extern pthread_t* vcpu_threads; -extern uint64_t elf_entry; -extern uint8_t* klog; -extern bool verbose; -extern bool full_checkpoint; -extern uint32_t no_checkpoint; -extern uint32_t ncores; -extern uint8_t* guest_mem; -extern size_t guest_size; -extern int kvm, vmfd, netfd, efd; -extern uint8_t* mboot; -extern __thread struct kvm_run *run; -extern __thread int vcpufd; -extern __thread uint32_t cpuid; - -extern vcpu_state_t *vcpu_thread_states; - -static inline void show_dtable(const char *name, struct kvm_dtable *dtable) -{ - fprintf(stderr, " %s %016zx %08hx\n", name, (size_t) dtable->base, (uint16_t) dtable->limit); -} - -static inline void show_segment(const char *name, struct kvm_segment *seg) -{ - fprintf(stderr, " %s %04hx %016zx %08x %02hhx %x %x %x %x %x %x %x\n", - name, (uint16_t) seg->selector, (size_t) seg->base, (uint32_t) seg->limit, - (uint8_t) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); -} - -static void show_registers(int id, struct kvm_regs* regs, struct kvm_sregs* sregs) -{ - size_t cr0, cr2, cr3; - size_t cr4, cr8; - size_t rax, rbx, rcx; - size_t rdx, rsi, rdi; - size_t rbp, r8, r9; - size_t r10, r11, r12; - size_t r13, r14, r15; - size_t rip, rsp; - size_t rflags; - int i; - - rflags = regs->rflags; - rip = regs->rip; rsp = regs->rsp; - rax = regs->rax; rbx = regs->rbx; rcx = regs->rcx; - rdx = regs->rdx; rsi = regs->rsi; rdi = regs->rdi; - rbp = regs->rbp; r8 = regs->r8; r9 = regs->r9; - r10 = regs->r10; r11 = regs->r11; r12 = regs->r12; - r13 = regs->r13; r14 = regs->r14; r15 = regs->r15; - - fprintf(stderr, "\n Dump state of CPU %d\n", id); - fprintf(stderr, "\n Registers:\n"); - fprintf(stderr, " ----------\n"); - fprintf(stderr, " rip: %016zx rsp: %016zx flags: %016zx\n", rip, rsp, rflags); - fprintf(stderr, " rax: %016zx rbx: %016zx rcx: %016zx\n", rax, rbx, rcx); - fprintf(stderr, " rdx: %016zx rsi: %016zx rdi: %016zx\n", rdx, rsi, rdi); - fprintf(stderr, " rbp: %016zx r8: %016zx r9: %016zx\n", rbp, r8, r9); - fprintf(stderr, " r10: %016zx r11: %016zx r12: %016zx\n", r10, r11, r12); - fprintf(stderr, " r13: %016zx r14: %016zx r15: %016zx\n", r13, r14, r15); - - cr0 = sregs->cr0; cr2 = sregs->cr2; cr3 = sregs->cr3; - cr4 = sregs->cr4; cr8 = sregs->cr8; - - fprintf(stderr, " cr0: %016zx cr2: %016zx cr3: %016zx\n", cr0, cr2, cr3); - fprintf(stderr, " cr4: %016zx cr8: %016zx\n", cr4, cr8); - fprintf(stderr, "\n Segment registers:\n"); - fprintf(stderr, " ------------------\n"); - fprintf(stderr, " register selector base limit type p dpl db s l g avl\n"); - show_segment("cs ", &sregs->cs); - show_segment("ss ", &sregs->ss); - show_segment("ds ", &sregs->ds); - show_segment("es ", &sregs->es); - show_segment("fs ", &sregs->fs); - show_segment("gs ", &sregs->gs); - show_segment("tr ", &sregs->tr); - show_segment("ldt", &sregs->ldt); - show_dtable("gdt", &sregs->gdt); - show_dtable("idt", &sregs->idt); - - fprintf(stderr, "\n APIC:\n"); - fprintf(stderr, " -----\n"); - fprintf(stderr, " efer: %016zx apic base: %016zx\n", - (size_t) sregs->efer, (size_t) sregs->apic_base); - - fprintf(stderr, "\n Interrupt bitmap:\n"); - fprintf(stderr, " -----------------\n"); - for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) - fprintf(stderr, " %016zx", (size_t) sregs->interrupt_bitmap[i]); - fprintf(stderr, "\n"); -} - -void print_registers(void) -{ - struct kvm_regs regs; - struct kvm_sregs sregs; - - kvm_ioctl(vcpufd, KVM_GET_SREGS, &sregs); - kvm_ioctl(vcpufd, KVM_GET_REGS, ®s); - - show_registers(cpuid, ®s, &sregs); -} - -/// Filter CPUID functions that are not supported by the hypervisor and enable -/// features according to our needs. -static void filter_cpuid(struct kvm_cpuid2 *kvm_cpuid) -{ - for (uint32_t i = 0; i < kvm_cpuid->nent; i++) { - struct kvm_cpuid_entry2 *entry = &kvm_cpuid->entries[i]; - - switch (entry->function) { - case 1: - // CPUID to define basic cpu features - entry->ecx |= (1U << 31); // propagate that we are running on a hypervisor - if (cap_tsc_deadline) - entry->ecx |= (1U << 24); // enable TSC deadline feature - entry->edx |= (1U << 5); // enable msr support - break; - - case CPUID_FUNC_PERFMON: - // disable it - entry->eax = 0x00; - break; - - default: - // Keep the CPUID function as-is - break; - }; - } -} - -static void setup_system_64bit(struct kvm_sregs *sregs) -{ - sregs->cr0 |= X86_CR0_PE; - sregs->cr4 |= X86_CR4_PAE; - sregs->efer |= EFER_LME|EFER_LMA; -} - -static void setup_system_page_tables(struct kvm_sregs *sregs, uint8_t *mem) -{ - uint64_t *pml4 = (uint64_t *) (mem + BOOT_PML4); - uint64_t *pdpte = (uint64_t *) (mem + BOOT_PDPTE); - uint64_t *pde = (uint64_t *) (mem + BOOT_PDE); - uint64_t paddr; - - /* - * For simplicity we currently use 2MB pages and only a single - * PML4/PDPTE/PDE. - */ - - memset(pml4, 0x00, 4096); - memset(pdpte, 0x00, 4096); - memset(pde, 0x00, 4096); - - *pml4 = BOOT_PDPTE | (X86_PDPT_P | X86_PDPT_RW); - *pdpte = BOOT_PDE | (X86_PDPT_P | X86_PDPT_RW); - for (paddr = 0; paddr < 0x20000000ULL; paddr += GUEST_PAGE_SIZE, pde++) - *pde = paddr | (X86_PDPT_P | X86_PDPT_RW | X86_PDPT_PS); - - sregs->cr3 = BOOT_PML4; - sregs->cr4 |= X86_CR4_PAE; - sregs->cr0 |= X86_CR0_PG; -} - -static void setup_system_gdt(struct kvm_sregs *sregs, - uint8_t *mem, - uint64_t off) -{ - uint64_t *gdt = (uint64_t *) (mem + off); - struct kvm_segment data_seg, code_seg; - - /* flags, base, limit */ - gdt[BOOT_GDT_NULL] = GDT_ENTRY(0, 0, 0); - gdt[BOOT_GDT_CODE] = GDT_ENTRY(0xA09B, 0, 0xFFFFF); - gdt[BOOT_GDT_DATA] = GDT_ENTRY(0xC093, 0, 0xFFFFF); - - sregs->gdt.base = off; - sregs->gdt.limit = (sizeof(uint64_t) * BOOT_GDT_MAX) - 1; - - GDT_TO_KVM_SEGMENT(code_seg, gdt, BOOT_GDT_CODE); - GDT_TO_KVM_SEGMENT(data_seg, gdt, BOOT_GDT_DATA); - - sregs->cs = code_seg; - sregs->ds = data_seg; - sregs->es = data_seg; - sregs->fs = data_seg; - sregs->gs = data_seg; - sregs->ss = data_seg; -} - -static void setup_system(int vcpufd, uint8_t *mem, uint32_t id) -{ - static struct kvm_sregs sregs; - - // all cores use the same startup code - // => all cores use the same sregs - // => only the boot processor has to initialize sregs - if (id == 0) { - kvm_ioctl(vcpufd, KVM_GET_SREGS, &sregs); - - /* Set all cpu/mem system structures */ - setup_system_gdt(&sregs, mem, BOOT_GDT); - setup_system_page_tables(&sregs, mem); - setup_system_64bit(&sregs); - } - - kvm_ioctl(vcpufd, KVM_SET_SREGS, &sregs); -} - -static void setup_cpuid(int kvm, int vcpufd) -{ - struct kvm_cpuid2 *kvm_cpuid; - unsigned int max_entries = 100; - - // allocate space for cpuid we get from KVM - kvm_cpuid = calloc(1, sizeof(*kvm_cpuid) + (max_entries * sizeof(kvm_cpuid->entries[0]))); - kvm_cpuid->nent = max_entries; - - kvm_ioctl(kvm, KVM_GET_SUPPORTED_CPUID, kvm_cpuid); - - // set features - filter_cpuid(kvm_cpuid); - kvm_ioctl(vcpufd, KVM_SET_CPUID2, kvm_cpuid); - - free(kvm_cpuid); -} - -static size_t prepare_mem_chunk_info(mem_chunk_t **mem_chunks) { - size_t mem_chunk_cnt = 0; - if (guest_size < KVM_32BIT_GAP_START) { - mem_chunk_cnt = 1; - *mem_chunks = (mem_chunk_t*)malloc(sizeof(mem_chunk_t)*mem_chunk_cnt); - (*mem_chunks)[0].ptr = guest_mem; - (*mem_chunks)[0].size = guest_size; - } else { - mem_chunk_cnt = 2; - *mem_chunks = (mem_chunk_t*)malloc(sizeof(mem_chunk_t)*mem_chunk_cnt); - (*mem_chunks)[0].ptr = guest_mem; - (*mem_chunks)[0].size = KVM_32BIT_GAP_START; - (*mem_chunks)[1].ptr = (uint8_t*)((uint64_t)guest_mem + (KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE)); - (*mem_chunks)[1].size = (uint64_t)guest_size - (KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE); - } - - return mem_chunk_cnt; -} - -size_t determine_dest_offset(size_t src_addr) -{ - size_t ret = 0; - if (src_addr & PG_PSE) { - ret = src_addr & PAGE_2M_MASK; - } else { - ret = src_addr & PAGE_MASK; - } - return ret; -} - -void init_cpu_state(uint64_t elf_entry) -{ - struct kvm_regs regs = { - .rip = elf_entry, // entry point to HermitCore - .rflags = 0x2, // POR value required by x86 architecture - }; - struct kvm_mp_state mp_state = { KVM_MP_STATE_RUNNABLE }; - struct { - struct kvm_msrs info; - struct kvm_msr_entry entries[MAX_MSR_ENTRIES]; - } msr_data; - struct kvm_msr_entry *msrs = msr_data.entries; - - run->apic_base = APIC_DEFAULT_BASE; - setup_cpuid(kvm, vcpufd); - - // be sure that the multiprocessor is runable - kvm_ioctl(vcpufd, KVM_SET_MP_STATE, &mp_state); - - // enable fast string operations - msrs[0].index = MSR_IA32_MISC_ENABLE; - msrs[0].data = 1; - msr_data.info.nmsrs = 1; - kvm_ioctl(vcpufd, KVM_SET_MSRS, &msr_data); - - // only one core is able to enter startup code - // => the wait for the predecessor core - while (*((volatile uint32_t*) (mboot + 0x20)) < cpuid) - pthread_yield(); - *((volatile uint32_t*) (mboot + 0x30)) = cpuid; - - /* Setup registers and memory. */ - setup_system(vcpufd, guest_mem, cpuid); - kvm_ioctl(vcpufd, KVM_SET_REGS, ®s); -} - -vcpu_state_t read_cpu_state(void) -{ - vcpu_state_t cpu_state; - char fname[MAX_FNAME]; - snprintf(fname, MAX_FNAME, "checkpoint/chk%u_core%u.dat", no_checkpoint, cpuid); - - FILE* f = fopen(fname, "r"); - if (f == NULL) - err(1, "fopen: unable to open file"); - - if (fread(&cpu_state, sizeof(cpu_state), 1, f) != 1) - err(1, "fread failed\n"); - - fclose(f); - - return cpu_state; -} - -void restore_cpu_state(vcpu_state_t cpu_state) -{ - cpu_state.mp_state.mp_state = KVM_MP_STATE_RUNNABLE; - - run->apic_base = APIC_DEFAULT_BASE; - setup_cpuid(kvm, vcpufd); - - - kvm_ioctl(vcpufd, KVM_SET_SREGS, &cpu_state.sregs); - kvm_ioctl(vcpufd, KVM_SET_REGS, &cpu_state.regs); - kvm_ioctl(vcpufd, KVM_SET_MSRS, &cpu_state.msr_data); - kvm_ioctl(vcpufd, KVM_SET_XCRS, &cpu_state.xcrs); - kvm_ioctl(vcpufd, KVM_SET_MP_STATE, &cpu_state.mp_state); - kvm_ioctl(vcpufd, KVM_SET_LAPIC, &cpu_state.lapic); - kvm_ioctl(vcpufd, KVM_SET_FPU, &cpu_state.fpu); - kvm_ioctl(vcpufd, KVM_SET_XSAVE, &cpu_state.xsave); - kvm_ioctl(vcpufd, KVM_SET_VCPU_EVENTS, &cpu_state.events); -} - -vcpu_state_t save_cpu_state(void) -{ - int n = 0; - vcpu_state_t cpu_state; - - /* define the list of required MSRs */ - cpu_state.msr_data.entries[n++].index = MSR_IA32_APICBASE; - cpu_state.msr_data.entries[n++].index = MSR_IA32_SYSENTER_CS; - cpu_state.msr_data.entries[n++].index = MSR_IA32_SYSENTER_ESP; - cpu_state.msr_data.entries[n++].index = MSR_IA32_SYSENTER_EIP; - cpu_state.msr_data.entries[n++].index = MSR_IA32_CR_PAT; - cpu_state.msr_data.entries[n++].index = MSR_IA32_MISC_ENABLE; - cpu_state.msr_data.entries[n++].index = MSR_IA32_TSC; - cpu_state.msr_data.entries[n++].index = MSR_CSTAR; - cpu_state.msr_data.entries[n++].index = MSR_STAR; - cpu_state.msr_data.entries[n++].index = MSR_EFER; - cpu_state.msr_data.entries[n++].index = MSR_LSTAR; - cpu_state.msr_data.entries[n++].index = MSR_GS_BASE; - cpu_state.msr_data.entries[n++].index = MSR_FS_BASE; - cpu_state.msr_data.entries[n++].index = MSR_KERNEL_GS_BASE; - //msrs[n++].index = MSR_IA32_FEATURE_CONTROL; - cpu_state.msr_data.info.nmsrs = n; - - kvm_ioctl(vcpufd, KVM_GET_SREGS, &cpu_state.sregs); - kvm_ioctl(vcpufd, KVM_GET_REGS, &cpu_state.regs); - kvm_ioctl(vcpufd, KVM_GET_MSRS, &cpu_state.msr_data); - kvm_ioctl(vcpufd, KVM_GET_XCRS, &cpu_state.xcrs); - kvm_ioctl(vcpufd, KVM_GET_LAPIC, &cpu_state.lapic); - kvm_ioctl(vcpufd, KVM_GET_FPU, &cpu_state.fpu); - kvm_ioctl(vcpufd, KVM_GET_XSAVE, &cpu_state.xsave); - kvm_ioctl(vcpufd, KVM_GET_VCPU_EVENTS, &cpu_state.events); - kvm_ioctl(vcpufd, KVM_GET_MP_STATE, &cpu_state.mp_state); - - return cpu_state; -} - -void write_cpu_state(void) -{ - vcpu_state_t cpu_state = save_cpu_state(); - char fname[MAX_FNAME]; - snprintf(fname, MAX_FNAME, "checkpoint/chk%u_core%u.dat", no_checkpoint, cpuid); - - FILE* f = fopen(fname, "w"); - if (f == NULL) { - err(1, "fopen: unable to open file\n"); - } - - if (fwrite(&cpu_state, sizeof(cpu_state), 1, f) != 1) - err(1, "fwrite failed\n"); - - fclose(f); -} - -void scan_dirty_log(void (*save_page)(void*, size_t, void*, size_t)) -{ - size_t slot_offset = 0; - static struct kvm_dirty_log dlog = { - .slot = 0, - .dirty_bitmap = NULL - }; - size_t dirty_log_size = (guest_size >> PAGE_BITS) / sizeof(size_t); - - // do we create our first checkpoint - if (dlog.dirty_bitmap == NULL) { - // besure that all paddings are zero - memset(&dlog, 0x00, sizeof(dlog)); - - dlog.dirty_bitmap = malloc(dirty_log_size * sizeof(size_t)); - if (dlog.dirty_bitmap == NULL) - err(1, "malloc failed!\n"); - } - memset(dlog.dirty_bitmap, 0x00, dirty_log_size * sizeof(size_t)); - - dlog.slot = 0; -nextslot: - kvm_ioctl(vmfd, KVM_GET_DIRTY_LOG, &dlog); - - for(size_t i=0; i KVM_32BIT_GAP_START - GUEST_OFFSET)) { - slot_offset = KVM_32BIT_MAX_MEM_SIZE; - dlog.slot = 1; - memset(dlog.dirty_bitmap, 0x00, dirty_log_size * sizeof(size_t)); - goto nextslot; - } -} - -void scan_page_tables(void (*save_page)(void*, size_t, void*, size_t)) -{ - const size_t flag = (!full_checkpoint && (no_checkpoint > 0)) ? PG_DIRTY : PG_ACCESSED; - - size_t* pml4 = (size_t*) (guest_mem+elf_entry+PAGE_SIZE); - for(size_t i=0; i<(1 << PAGE_MAP_BITS); i++) { - if ((pml4[i] & PG_PRESENT) != PG_PRESENT) - continue; - //printf("pml[%zd] 0x%zx\n", i, pml4[i]); - size_t* pdpt = (size_t*) (guest_mem+(pml4[i] & PAGE_MASK)); - for(size_t j=0; j<(1 << PAGE_MAP_BITS); j++) { - if ((pdpt[j] & PG_PRESENT) != PG_PRESENT) - continue; - //printf("\tpdpt[%zd] 0x%zx\n", j, pdpt[j]); - size_t* pgd = (size_t*) (guest_mem+(pdpt[j] & PAGE_MASK)); - for(size_t k=0; k<(1 << PAGE_MAP_BITS); k++) { - if ((pgd[k] & PG_PRESENT) != PG_PRESENT) - continue; - //printf("\t\tpgd[%zd] 0x%zx\n", k, pgd[k] & ~PG_XD); - if ((pgd[k] & PG_PSE) != PG_PSE) { - size_t* pgt = (size_t*) (guest_mem+(pgd[k] & PAGE_MASK)); - for(size_t l=0; l<(1 << PAGE_MAP_BITS); l++) { - if ((pgt[l] & (PG_PRESENT|flag)) == (PG_PRESENT|flag)) { - //printf("\t\t\t*pgt[%zd] 0x%zx, 4KB\n", l, pgt[l] & ~PG_XD); - if (!full_checkpoint) - pgt[l] = pgt[l] & ~(PG_DIRTY|PG_ACCESSED); - size_t pgt_entry = pgt[l] & ~PG_PSE; // because PAT use the same bit as PSE - - save_page(&pgt_entry, sizeof(size_t), (void*) (guest_mem + (pgt[l] & PAGE_MASK)), (1UL << PAGE_BITS)); - } - } - } else if ((pgd[k] & flag) == flag) { - //printf("\t\t*pgd[%zd] 0x%zx, 2MB\n", k, pgd[k] & ~PG_XD); - if (!full_checkpoint) - pgd[k] = pgd[k] & ~(PG_DIRTY|PG_ACCESSED); - - save_page(pgd+k, sizeof(size_t), (void*) (guest_mem + (pgd[k] & PAGE_2M_MASK)), (1UL << PAGE_2M_BITS)); - } - } - } - } -} -void open_chk_file(char *fname) -{ - chk_file = fopen(fname, "w"); - if (chk_file == NULL) { - err(1, "fopen: unable to open file"); - } -} - -void close_chk_file(void) -{ - fclose(chk_file); -} - -void write_chk_file(void *addr, size_t bytes) -{ - if (fwrite(addr, bytes, 1, chk_file) != 1) { - err(1, "fwrite failed"); - } -} - -void write_mem_page_to_chk_file(void *entry, size_t entry_size, void *page, size_t page_size) -{ - write_chk_file(entry, entry_size); - write_chk_file(page, page_size); -} - -void determine_dirty_pages(void (*save_page_handler)(void*, size_t, void*, size_t)) -{ -#ifdef USE_DIRTY_LOG - scan_dirty_log(save_page_handler); -#else - scan_page_tables(save_page_handler); -#endif - -} - -void timer_handler(int signum) -{ - - struct stat st = {0}; - char fname[MAX_FNAME]; - struct timeval begin, end; - - if (verbose) - gettimeofday(&begin, NULL); - - if (stat("checkpoint", &st) == -1) - mkdir("checkpoint", 0700); - - for(size_t i = 0; i < ncores; i++) - if (vcpu_threads[i] != pthread_self()) - pthread_kill(vcpu_threads[i], SIGTHRCHKP); - - pthread_barrier_wait(&barrier); - - write_cpu_state(); - - snprintf(fname, MAX_FNAME, "checkpoint/chk%u_mem.dat", no_checkpoint); - - open_chk_file(fname); - - /*struct kvm_irqchip irqchip = {}; - if (cap_irqchip) - kvm_ioctl(vmfd, KVM_GET_IRQCHIP, &irqchip); - else - memset(&irqchip, 0x00, sizeof(irqchip)); - if (fwrite(&irqchip, sizeof(irqchip), 1, f) != 1) - err(1, "fwrite failed");*/ - - struct kvm_clock_data clock = {}; - kvm_ioctl(vmfd, KVM_GET_CLOCK, &clock); - write_chk_file(&clock, sizeof(clock)); - -#if 0 - if (fwrite(guest_mem, guest_size, 1, f) != 1) - err(1, "fwrite failed"); -#else - determine_dirty_pages(write_mem_page_to_chk_file); -#endif - close_chk_file(); - pthread_barrier_wait(&barrier); - - // update configuration file - FILE *f = fopen("checkpoint/chk_config.txt", "w"); - if (f == NULL) { - err(1, "fopen: unable to open file"); - } - - fprintf(f, "number of cores: %u\n", ncores); - fprintf(f, "memory size: 0x%zx\n", guest_size); - fprintf(f, "checkpoint number: %u\n", no_checkpoint); - fprintf(f, "entry point: 0x%zx\n", elf_entry); - if (full_checkpoint) - fprintf(f, "full checkpoint: 1"); - else - fprintf(f, "full checkpoint: 0"); - - fclose(f); - - if (verbose) { - gettimeofday(&end, NULL); - size_t msec = (end.tv_sec - begin.tv_sec) * 1000; - msec += (end.tv_usec - begin.tv_usec) / 1000; - fprintf(stderr, "Create checkpoint %u in %zd ms\n", no_checkpoint, msec); - } - - no_checkpoint++; -} - -void *migration_handler(void *arg) -{ - sigset_t *signal_mask = (sigset_t *)arg; - int res = 0; - size_t i = 0; - - int sig_caught; /* signal caught */ - - /* Use same mask as the set of signals that we'd like to know about! */ - sigwait(signal_mask, &sig_caught); - connect_to_server(); - - /* send metadata */ - migration_metadata_t metadata = { - ncores, - guest_size, - 0, /* no_checkpoint */ - elf_entry, - full_checkpoint}; - - /* the guest size is calculated at the destination again */ - if ((guest_size-KVM_32BIT_GAP_SIZE) >= KVM_32BIT_GAP_START) { - metadata.guest_size -= KVM_32BIT_GAP_SIZE; - } - - res = send_data(&metadata, sizeof(migration_metadata_t)); - fprintf(stderr, "Metadata sent! (%d bytes)\n", res); - - /* prepare info concerning memory chunks */ - mem_chunk_t *mem_chunks = NULL; - size_t mem_chunk_cnt = prepare_mem_chunk_info(&mem_chunks); - - if (get_migration_type() == MIG_TYPE_LIVE) { - /* resend rounds */ - for (i=0; incores, metadata->guest_size, metadata->no_checkpoint, metadata->elf_entry, metadata->full_checkpoint); -} - -void init_kvm_arch(void) -{ - uint64_t identity_base = 0xfffbc000; - if (ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU) > 0) { - /* Allows up to 16M BIOSes. */ - identity_base = 0xfeffc000; - - kvm_ioctl(vmfd, KVM_SET_IDENTITY_MAP_ADDR, &identity_base); - } - kvm_ioctl(vmfd, KVM_SET_TSS_ADDR, identity_base + 0x1000); - - /* - * Allocate page-aligned guest memory. - * - * TODO: support of huge pages - */ - if (guest_size < KVM_32BIT_GAP_START) { - guest_mem = mmap(NULL, guest_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (guest_mem == MAP_FAILED) - err(1, "mmap failed"); - } else { - guest_size += KVM_32BIT_GAP_SIZE; - guest_mem = mmap(NULL, guest_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (guest_mem == MAP_FAILED) - err(1, "mmap failed"); - - /* - * We mprotect the gap PROT_NONE so that if we accidently write to it, we will know. - */ - mprotect(guest_mem + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE); - } - - const char* merge = getenv("HERMIT_MERGEABLE"); - if (merge && (strcmp(merge, "0") != 0)) { - /* - * The KSM feature is intended for applications that generate - * many instances of the same data (e.g., virtualization systems - * such as KVM). It can consume a lot of processing power! - */ - madvise(guest_mem, guest_size, MADV_MERGEABLE); - if (verbose) - fprintf(stderr, "VM uses KSN feature \"mergeable\" to reduce the memory footprint.\n"); - } - - const char* hugepage = getenv("HERMIT_HUGEPAGE"); - if (merge && (strcmp(merge, "0") != 0)) { - madvise(guest_mem, guest_size, MADV_HUGEPAGE); - if (verbose) - fprintf(stderr, "VM uses huge pages to improve the performance.\n"); - } - - struct kvm_userspace_memory_region kvm_region = { - .slot = 0, - .guest_phys_addr = GUEST_OFFSET, - .memory_size = guest_size, - .userspace_addr = (uint64_t) guest_mem, -#ifdef USE_DIRTY_LOG - .flags = KVM_MEM_LOG_DIRTY_PAGES, -#else - .flags = 0, -#endif - }; - - if (guest_size <= KVM_32BIT_GAP_START - GUEST_OFFSET) { - kvm_ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region); - } else { - kvm_region.memory_size = KVM_32BIT_GAP_START - GUEST_OFFSET; - kvm_ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region); - - kvm_region.slot = 1; - kvm_region.guest_phys_addr = KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE; - kvm_region.userspace_addr = (uint64_t) guest_mem + KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE; - kvm_region.memory_size = guest_size - KVM_32BIT_GAP_SIZE - KVM_32BIT_GAP_START + GUEST_OFFSET; - kvm_ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region); - } - - kvm_ioctl(vmfd, KVM_CREATE_IRQCHIP, NULL); - -#ifdef KVM_CAP_X2APIC_API - // enable x2APIC support - struct kvm_enable_cap cap = { - .cap = KVM_CAP_X2APIC_API, - .flags = 0, - .args[0] = KVM_X2APIC_API_USE_32BIT_IDS|KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK, - }; - kvm_ioctl(vmfd, KVM_ENABLE_CAP, &cap); -#endif - - // initialited IOAPIC with HermitCore's default settings - struct kvm_irqchip chip; - chip.chip_id = KVM_IRQCHIP_IOAPIC; - kvm_ioctl(vmfd, KVM_GET_IRQCHIP, &chip); - for(int i=0; i> 32) | \ - (((x) & 0x000000FF00000000) >> 16) | \ - (((x) & 0x00000000FFFF0000) >> 16)) - -#define GDT_GET_LIMIT(x) (__u32)( \ - (((x) & 0x000F000000000000) >> 32) | \ - (((x) & 0x000000000000FFFF))) - -/* Constructor for a conventional segment GDT (or LDT) entry */ -/* This is a macro so it can be used in initializers */ -#define GDT_ENTRY(flags, base, limit) \ - ((((base) & _AC(0xff000000, ULL)) << (56-24)) | \ - (((flags) & _AC(0x0000f0ff, ULL)) << 40) | \ - (((limit) & _AC(0x000f0000, ULL)) << (48-16)) | \ - (((base) & _AC(0x00ffffff, ULL)) << 16) | \ - (((limit) & _AC(0x0000ffff, ULL)))) - -#define GDT_GET_G(x) (__u8)(((x) & 0x0080000000000000) >> 55) -#define GDT_GET_DB(x) (__u8)(((x) & 0x0040000000000000) >> 54) -#define GDT_GET_L(x) (__u8)(((x) & 0x0020000000000000) >> 53) -#define GDT_GET_AVL(x) (__u8)(((x) & 0x0010000000000000) >> 52) -#define GDT_GET_P(x) (__u8)(((x) & 0x0000800000000000) >> 47) -#define GDT_GET_DPL(x) (__u8)(((x) & 0x0000600000000000) >> 45) -#define GDT_GET_S(x) (__u8)(((x) & 0x0000100000000000) >> 44) -#define GDT_GET_TYPE(x)(__u8)(((x) & 0x00000F0000000000) >> 40) - -#define GDT_TO_KVM_SEGMENT(seg, gdt_table, sel) \ - do { \ - __u64 gdt_ent = gdt_table[sel]; \ - seg.base = GDT_GET_BASE(gdt_ent); \ - seg.limit = GDT_GET_LIMIT(gdt_ent); \ - seg.selector = sel * 8; \ - seg.type = GDT_GET_TYPE(gdt_ent); \ - seg.present = GDT_GET_P(gdt_ent); \ - seg.dpl = GDT_GET_DPL(gdt_ent); \ - seg.db = GDT_GET_DB(gdt_ent); \ - seg.s = GDT_GET_S(gdt_ent); \ - seg.l = GDT_GET_L(gdt_ent); \ - seg.g = GDT_GET_G(gdt_ent); \ - seg.avl = GDT_GET_AVL(gdt_ent); \ - } while (0) - -#endif diff --git a/tools/uhyve.c b/tools/uhyve.c deleted file mode 100644 index 9a1a31790..000000000 --- a/tools/uhyve.c +++ /dev/null @@ -1,813 +0,0 @@ -/* Copyright (c) 2015, IBM - * Author(s): Dan Williams - * Ricardo Koller - * Copyright (c) 2017, RWTH Aachen University - * Author(s): Stefan Lankes - * - * Permission to use, copy, modify, and/or distribute this software - * for any purpose with or without fee is hereby granted, provided - * that the above copyright notice and this permission notice appear - * in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL - * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE - * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS - * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, - * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -/* We used several existing projects as guides - * kvmtest.c: http://lwn.net/Articles/658512/ - * Solo5: https://github.com/Solo5/solo5 - */ - -/* - * 15.1.2017: extend original version (https://github.com/Solo5/solo5) - * for HermitCore - * 25.2.2017: add SMP support to enable more than one core - * 24.4.2017: add checkpoint/restore support, - * remove memory limit - */ - -#define _GNU_SOURCE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "uhyve.h" -#include "uhyve-syscalls.h" -#include "uhyve-migration.h" -#include "uhyve-net.h" -#include "uhyve-gdb.h" -#include "proxy.h" - -static bool restart = false; -static bool migration = false; -static pthread_t net_thread; -static int* vcpu_fds = NULL; -static pthread_mutex_t kvm_lock = PTHREAD_MUTEX_INITIALIZER; - -extern bool verbose; - -static char* guest_path = NULL; -static bool uhyve_gdb_enabled = false; -size_t guest_size = 0x20000000ULL; -bool full_checkpoint = false; -pthread_barrier_t barrier; -pthread_barrier_t migration_barrier; -pthread_t* vcpu_threads = NULL; -uint8_t* klog = NULL; -uint8_t* guest_mem = NULL; -uint32_t no_checkpoint = 0; -uint32_t ncores = 1; -uint64_t elf_entry; -int kvm = -1, vmfd = -1, netfd = -1, efd = -1; -uint8_t* mboot = NULL; -__thread struct kvm_run *run = NULL; -__thread int vcpufd = -1; -__thread uint32_t cpuid = 0; -static sem_t net_sem; - -int uhyve_argc = -1; -int uhyve_envc = -1; -char **uhyve_argv = NULL; -extern char **environ; -char **uhyve_envp = NULL; - -vcpu_state_t *vcpu_thread_states = NULL; -static sigset_t signal_mask; - -typedef struct { - int argc; - int argsz[MAX_ARGC_ENVC]; - int envc; - int envsz[MAX_ARGC_ENVC]; -} __attribute__ ((packed)) uhyve_cmdsize_t; - -typedef struct { - char **argv; - char **envp; -} __attribute__ ((packed)) uhyve_cmdval_t; - -static uint64_t memparse(const char *ptr) -{ - // local pointer to end of parsed string - char *endptr; - - // parse number - uint64_t size = strtoull(ptr, &endptr, 0); - - // parse size extension, intentional fall-through - switch (*endptr) { - case 'E': - case 'e': - size <<= 10; - case 'P': - case 'p': - size <<= 10; - case 'T': - case 't': - size <<= 10; - case 'G': - case 'g': - size <<= 10; - case 'M': - case 'm': - size <<= 10; - case 'K': - case 'k': - size <<= 10; - endptr++; - default: - break; - } - - return size; -} - -// Just close file descriptor if not already done -static void close_fd(int* fd) -{ - if (*fd != -1) { - close(*fd); - *fd = -1; - } -} - -static void uhyve_exit(void* arg) -{ - //print_registers(); - - if (pthread_mutex_trylock(&kvm_lock)) - { - close_fd(&vcpufd); - return; - } - - // only the main thread will execute this - if (vcpu_threads) { - for(uint32_t i=0; i 0) - pthread_kill(net_thread, SIGTERM); - } - - close_fd(&vcpufd); -} - -static void uhyve_atexit(void) -{ - uhyve_exit(NULL); - - if (vcpu_threads) { - for(uint32_t i = 0; i < ncores; i++) { - if (pthread_self() == vcpu_threads[i]) - continue; - pthread_join(vcpu_threads[i], NULL); - } - - free(vcpu_threads); - } - - if (vcpu_fds) - free(vcpu_fds); - - // clean up and close KVM - close_fd(&vmfd); - close_fd(&kvm); -} - -static void* wait_for_packet(void* arg) -{ - int ret; - struct pollfd fds = { .fd = netfd, - .events = POLLIN, - .revents = 0}; - - while(1) - { - fds.revents = 0; - - ret = poll(&fds, 1, -1000); - - if (ret < 0 && errno == EINTR) - continue; - - if (ret < 0) - perror("poll()"); - else if (ret) { - uint64_t event_counter = 1; - write(efd, &event_counter, sizeof(event_counter)); - sem_wait(&net_sem); - } - } - - return NULL; -} - -static inline void check_network(void) -{ - // should we start the network thread? - if ((efd < 0) && (getenv("HERMIT_NETIF"))) { - struct kvm_irqfd irqfd = {}; - - efd = eventfd(0, 0); - irqfd.fd = efd; - irqfd.gsi = UHYVE_IRQ; - kvm_ioctl(vmfd, KVM_IRQFD, &irqfd); - - sem_init(&net_sem, 0, 0); - - if (pthread_create(&net_thread, NULL, wait_for_packet, NULL)) - err(1, "unable to create thread"); - } -} - -static int vcpu_loop(void) -{ - int ret; - - pthread_barrier_wait(&barrier); - - if (restart) { - vcpu_state_t cpu_state = read_cpu_state(); - restore_cpu_state(cpu_state); - } else if (vcpu_thread_states) { - restore_cpu_state(vcpu_thread_states[cpuid]); - } else { - init_cpu_state(elf_entry); - } - - if (cpuid == 0) { - if (restart) { - no_checkpoint++; - } else if (migration) { - free(vcpu_thread_states); - vcpu_thread_states = NULL; - } - } - - /* init uhyve gdb support */ - if (uhyve_gdb_enabled) { - if (cpuid == 0) - uhyve_gdb_init(vcpufd); - - pthread_barrier_wait(&barrier); - } - - while (1) { - ret = ioctl(vcpufd, KVM_RUN, NULL); - - if(ret == -1) { - switch(errno) { - case EINTR: - continue; - - case EFAULT: { - struct kvm_regs regs; - kvm_ioctl(vcpufd, KVM_GET_REGS, ®s); -#ifdef __x86_64__ - err(1, "KVM: host/guest translation fault: rip=0x%llx", regs.rip); -#else - err(1, "KVM: host/guest translation fault: elr_el1=0x%llx", regs.elr_el1); -#endif - } - - default: - err(1, "KVM: ioctl KVM_RUN in vcpu_loop for cpuid %d failed", cpuid); - break; - } - } - - uint64_t port = 0; - unsigned raddr = 0; - - /* handle requests */ - switch (run->exit_reason) { - case KVM_EXIT_HLT: - fprintf(stderr, "Guest has halted the CPU, this is considered as a normal exit.\n"); - if (uhyve_gdb_enabled) - uhyve_gdb_handle_term(); - return 0; - - case KVM_EXIT_MMIO: - port = run->mmio.phys_addr; - if (run->mmio.is_write) - memcpy(&raddr, run->mmio.data, sizeof(raddr) /*run->mmio.len*/); - //printf("KVM: handled KVM_EXIT_MMIO at 0x%lx (data %u)\n", port, raddr); - - case KVM_EXIT_IO: - if (!port) { - port = run->io.port; - raddr = *((unsigned*)((size_t)run+run->io.data_offset)); - } - - //printf("port 0x%x\n", run->io.port); - switch (port) { - case UHYVE_UART_PORT: - if (verbose) - putc((unsigned char) raddr, stderr); - break; - case UHYVE_PORT_WRITE: { - uhyve_write_t* uhyve_write = (uhyve_write_t*) (guest_mem+raddr); - - uhyve_write->len = write(uhyve_write->fd, guest_mem+(size_t)uhyve_write->buf, uhyve_write->len); - break; - } - - case UHYVE_PORT_READ: { - uhyve_read_t* uhyve_read = (uhyve_read_t*) (guest_mem+raddr); - - uhyve_read->ret = read(uhyve_read->fd, guest_mem+(size_t)uhyve_read->buf, uhyve_read->len); - break; - } - - case UHYVE_PORT_EXIT: { - if (cpuid) - pthread_exit((int*)(guest_mem+raddr)); - else - exit(*(int*)(guest_mem+raddr)); - break; - } - - case UHYVE_PORT_OPEN: { - uhyve_open_t* uhyve_open = (uhyve_open_t*) (guest_mem+raddr); - char rpath[PATH_MAX]; - - // forbid to open the kvm device - if (realpath((const char*)guest_mem+(size_t)uhyve_open->name, rpath) < 0) - uhyve_open->ret = -1; - else if (strcmp(rpath, "/dev/kvm") == 0) - uhyve_open->ret = -1; - else - uhyve_open->ret = open((const char*)guest_mem+(size_t)uhyve_open->name, uhyve_open->flags, uhyve_open->mode); - break; - } - - case UHYVE_PORT_CLOSE: { - uhyve_close_t* uhyve_close = (uhyve_close_t*) (guest_mem+raddr); - - if (uhyve_close->fd > 2) - uhyve_close->ret = close(uhyve_close->fd); - else - uhyve_close->ret = 0; - break; - } - - case UHYVE_PORT_NETINFO: { - uhyve_netinfo_t* uhyve_netinfo = (uhyve_netinfo_t*)(guest_mem+raddr); - memcpy(uhyve_netinfo->mac_str, uhyve_get_mac(), 18); - // guest configure the ethernet device => start network thread - check_network(); - break; - } - - case UHYVE_PORT_NETWRITE: { - uhyve_netwrite_t* uhyve_netwrite = (uhyve_netwrite_t*)(guest_mem + raddr); - uhyve_netwrite->ret = 0; - ret = write(netfd, guest_mem + (size_t)uhyve_netwrite->data, uhyve_netwrite->len); - if (ret >= 0) { - uhyve_netwrite->ret = 0; - uhyve_netwrite->len = ret; - } else { - uhyve_netwrite->ret = -1; - } - break; - } - - case UHYVE_PORT_NETREAD: { - uhyve_netread_t* uhyve_netread = (uhyve_netread_t*)(guest_mem + raddr); - ret = read(netfd, guest_mem + (size_t)uhyve_netread->data, uhyve_netread->len); - if (ret > 0) { - uhyve_netread->len = ret; - uhyve_netread->ret = 0; - } else { - uhyve_netread->ret = -1; - sem_post(&net_sem); - } - break; - } - - case UHYVE_PORT_NETSTAT: { - uhyve_netstat_t* uhyve_netstat = (uhyve_netstat_t*)(guest_mem + raddr); - char* str = getenv("HERMIT_NETIF"); - if (str) - uhyve_netstat->status = 1; - else - uhyve_netstat->status = 0; - break; - } - - case UHYVE_PORT_LSEEK: { - uhyve_lseek_t* uhyve_lseek = (uhyve_lseek_t*) (guest_mem+raddr); - - uhyve_lseek->offset = lseek(uhyve_lseek->fd, uhyve_lseek->offset, uhyve_lseek->whence); - break; - } - - case UHYVE_PORT_CMDSIZE: { - int i; - uhyve_cmdsize_t *val = (uhyve_cmdsize_t *) (guest_mem+raddr); - - val->argc = uhyve_argc; - for(i=0; iargsz[i] = strlen(uhyve_argv[i]) + 1; - - val->envc = uhyve_envc; - for(i=0; ienvsz[i] = strlen(uhyve_envp[i]) + 1; - - break; - } - - case UHYVE_PORT_CMDVAL: { - int i; - char **argv_ptr, **env_ptr; - uhyve_cmdval_t *val = (uhyve_cmdval_t *) (guest_mem+raddr); - - /* argv */ - argv_ptr = (char **)(guest_mem + (size_t)val->argv); - for(i=0; ienvp); - for(i=0; ifail_entry.hardware_entry_failure_reason); - break; - - case KVM_EXIT_INTERNAL_ERROR: - if (uhyve_gdb_enabled) - uhyve_gdb_handle_exception(vcpufd, GDB_SIGNAL_SEGV); - err(1, "KVM: internal error exit: suberror = 0x%x\n", run->internal.suberror); - break; - - case KVM_EXIT_SHUTDOWN: - fprintf(stderr, "KVM: receive shutdown command\n"); - - case KVM_EXIT_DEBUG: - if (uhyve_gdb_enabled) { - uhyve_gdb_handle_exception(vcpufd, GDB_SIGNAL_TRAP); - break; - } else print_registers(); - exit(EXIT_FAILURE); - - default: - fprintf(stderr, "KVM: unhandled exit: exit_reason = 0x%x\n", run->exit_reason); - exit(EXIT_FAILURE); - } - } - - close(vcpufd); - vcpufd = -1; - - return 0; -} - -static int vcpu_init(void) -{ - vcpu_fds[cpuid] = vcpufd = kvm_ioctl(vmfd, KVM_CREATE_VCPU, cpuid); - - /* Map the shared kvm_run structure and following data. */ - size_t mmap_size = (size_t) kvm_ioctl(kvm, KVM_GET_VCPU_MMAP_SIZE, NULL); - - if (mmap_size < sizeof(*run)) - err(1, "KVM: invalid VCPU_MMAP_SIZE: %zd", mmap_size); - - run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpufd, 0); - if (run == MAP_FAILED) - err(1, "KVM: VCPU mmap failed"); - - return 0; -} - -static void sigusr_handler(int signum) -{ - pthread_barrier_wait(&barrier); - write_cpu_state(); - - pthread_barrier_wait(&barrier); -} - -static void vcpu_thread_mig_handler(int signum) -{ - /* memory should be allocated at this point */ - assert(vcpu_thread_states != NULL); - - /* ensure consistency among VCPUs */ - pthread_barrier_wait(&barrier); - - /* save state */ - vcpu_thread_states[cpuid] = save_cpu_state(); - - /* synchronize with migration thread */ - pthread_barrier_wait(&migration_barrier); - - /* wait to be killed */ - pthread_barrier_wait(&migration_barrier); -} - -static void* uhyve_thread(void* arg) -{ - size_t ret; - struct sigaction sa; - - pthread_cleanup_push(uhyve_exit, NULL); - - cpuid = (size_t) arg; - - /* install signal handler for checkpoint */ - memset(&sa, 0x00, sizeof(sa)); - sa.sa_handler = &sigusr_handler; - sigaction(SIGTHRCHKP, &sa, NULL); - - /* install signal handler for migration */ - memset(&sa, 0x00, sizeof(sa)); - sa.sa_handler = &vcpu_thread_mig_handler; - sigaction(SIGTHRMIG, &sa, NULL); - - // create new cpu - vcpu_init(); - - pthread_barrier_wait(&barrier); - - // run cpu loop until thread gets killed - ret = vcpu_loop(); - - pthread_cleanup_pop(1); - - return (void*) ret; -} - -void sigterm_handler(int signum) -{ - pthread_exit(0); -} - -int uhyve_init(char *path) -{ - FILE *f = NULL; - guest_path = path; - - signal(SIGTERM, sigterm_handler); - - // register routine to close the VM - atexit(uhyve_atexit); - - const char *start_mig_server = getenv("HERMIT_MIGRATION_SERVER"); - - /* - * Three startups - * a) incoming migration - * b) load existing checkpoint - * c) normal run - */ - if (start_mig_server) { - migration = true; - migration_metadata_t metadata; - wait_for_incomming_migration(&metadata, MIGRATION_PORT); - - ncores = metadata.ncores; - guest_size = metadata.guest_size; - elf_entry = metadata.elf_entry; - full_checkpoint = metadata.full_checkpoint; - } else if ((f = fopen("checkpoint/chk_config.txt", "r")) != NULL) { - int tmp = 0; - restart = true; - - fscanf(f, "number of cores: %u\n", &ncores); - fscanf(f, "memory size: 0x%zx\n", &guest_size); - fscanf(f, "checkpoint number: %u\n", &no_checkpoint); - fscanf(f, "entry point: 0x%zx", &elf_entry); - fscanf(f, "full checkpoint: %d", &tmp); - full_checkpoint = tmp ? true : false; - - if (verbose) - fprintf(stderr, - "Restart from checkpoint %u " - "(ncores %d, mem size 0x%zx)\n", - no_checkpoint, ncores, guest_size); - fclose(f); - } else { - const char* hermit_memory = getenv("HERMIT_MEM"); - if (hermit_memory) - guest_size = memparse(hermit_memory); - - const char* hermit_cpus = getenv("HERMIT_CPUS"); - if (hermit_cpus) - ncores = (uint32_t) atoi(hermit_cpus); - - const char* full_chk = getenv("HERMIT_FULLCHECKPOINT"); - if (full_chk && (strcmp(full_chk, "0") != 0)) - full_checkpoint = true; - } - - vcpu_threads = (pthread_t*) calloc(ncores, sizeof(pthread_t)); - if (!vcpu_threads) - err(1, "Not enough memory"); - - vcpu_fds = (int*) calloc(ncores, sizeof(int)); - if (!vcpu_fds) - err(1, "Not enough memory"); - - kvm = open("/dev/kvm", O_RDWR | O_CLOEXEC); - if (kvm < 0) - err(1, "Could not open: /dev/kvm"); - - /* Make sure we have the stable version of the API */ - int kvm_api_version = kvm_ioctl(kvm, KVM_GET_API_VERSION, NULL); - if (kvm_api_version != 12) - err(1, "KVM: API version is %d, uhyve requires version 12", kvm_api_version); - - /* Create the virtual machine */ - vmfd = kvm_ioctl(kvm, KVM_CREATE_VM, 0); - -#ifdef __x86_64__ - init_kvm_arch(); - if (restart) { - if (load_checkpoint(guest_mem, path) != 0) - exit(EXIT_FAILURE); - } else if (start_mig_server) { - load_migration_data(guest_mem); - close_migration_channel(); - } else { - if (load_kernel(guest_mem, path) != 0) - exit(EXIT_FAILURE); - } -#endif - - pthread_barrier_init(&barrier, NULL, ncores); - pthread_barrier_init(&migration_barrier, NULL, ncores+1); - cpuid = 0; - - // create first CPU, it will be the boot processor by default - int ret = vcpu_init(); - - const char* netif_str = getenv("HERMIT_NETIF"); - if (netif_str) - { - // TODO: strncmp for different network interfaces - // for example tun/tap device or uhyvetap device - netfd = uhyve_net_init(netif_str); - if (netfd < 0) - err(1, "unable to initialized network"); - } - - return ret; -} - -int uhyve_loop(int argc, char **argv) -{ - const char* hermit_check = getenv("HERMIT_CHECKPOINT"); - const char* hermit_mig_support = getenv("HERMIT_MIGRATION_SUPPORT"); - const char* hermit_mig_type = getenv("HERMIT_MIGRATION_TYPE"); - const char* hermit_debug = getenv("HERMIT_DEBUG"); - int ts = 0, i = 0; - - if (hermit_debug && (atoi(hermit_debug) != 0)) - uhyve_gdb_enabled = true; - - /* argv[0] is 'proxy', do not count it */ - uhyve_argc = argc-1; - uhyve_argv = &argv[1]; - uhyve_envp = environ; - while(uhyve_envp[i] != NULL) - i++; - uhyve_envc = i; - - if (uhyve_argc > MAX_ARGC_ENVC) { - fprintf(stderr, "uhyve downsiize envc from %d to %d\n", uhyve_argc, MAX_ARGC_ENVC); - uhyve_argc = MAX_ARGC_ENVC; - } - - if (uhyve_envc > MAX_ARGC_ENVC-1) { - fprintf(stderr, "uhyve downsiize envc from %d to %d\n", uhyve_envc, MAX_ARGC_ENVC-1); - uhyve_envc = MAX_ARGC_ENVC-1; - } - - if (uhyve_argc > MAX_ARGC_ENVC || uhyve_envc > MAX_ARGC_ENVC) { - fprintf(stderr, "uhyve cannot forward more than %d command line " - "arguments or environment variables, please consider increasing " - "the MAX_ARGC_ENVP cmake argument\n", MAX_ARGC_ENVC); - return -1; - } - - if (hermit_check) - ts = atoi(hermit_check); - - if (hermit_mig_support) { - set_migration_target(hermit_mig_support, MIGRATION_PORT); - set_migration_type(hermit_mig_type); - - /* block SIGUSR1 in main thread */ - sigemptyset (&signal_mask); - sigaddset (&signal_mask, SIGUSR1); - pthread_sigmask (SIG_BLOCK, &signal_mask, NULL); - - /* start migration thread; handles SIGUSR1 */ - pthread_t sig_thr_id; - pthread_create (&sig_thr_id, NULL, migration_handler, (void *)&signal_mask); - - /* install signal handler for migration */ - struct sigaction sa; - memset(&sa, 0x00, sizeof(sa)); - sa.sa_handler = &vcpu_thread_mig_handler; - sigaction(SIGTHRMIG, &sa, NULL); - } - - - // First CPU is special because it will boot the system. Other CPUs will - // be booted linearily after the first one. - vcpu_threads[0] = pthread_self(); - - // start threads to create VCPUs - for(size_t i = 1; i < ncores; i++) - pthread_create(&vcpu_threads[i], NULL, uhyve_thread, (void*) i); - - pthread_barrier_wait(&barrier); - -#ifdef __aarch64__ - init_kvm_arch(); - if (restart) { - if (load_checkpoint(guest_mem, guest_path) != 0) - exit(EXIT_FAILURE); - } else { - if (load_kernel(guest_mem, guest_path) != 0) - exit(EXIT_FAILURE); - } -#endif - - *((uint32_t*) (mboot+0x24)) = ncores; - - if (ts > 0) - { - struct sigaction sa; - struct itimerval timer; - - /* Install timer_handler as the signal handler for SIGVTALRM. */ - memset(&sa, 0x00, sizeof(sa)); - sa.sa_handler = &timer_handler; - sigaction(SIGALRM, &sa, NULL); - - /* Configure the timer to expire after "ts" sec... */ - timer.it_value.tv_sec = ts; - timer.it_value.tv_usec = 0; - /* ... and every "ts" sec after that. */ - timer.it_interval.tv_sec = ts; - timer.it_interval.tv_usec = 0; - /* Start a virtual timer. It counts down whenever this process is executing. */ - setitimer(ITIMER_REAL, &timer, NULL); - } - - // Run first CPU - return vcpu_loop(); -} diff --git a/tools/uhyve.h b/tools/uhyve.h deleted file mode 100644 index addaed989..000000000 --- a/tools/uhyve.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2018, Stefan Lankes, RWTH Aachen University - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __UHYVE_H__ -#define __UHYVE_H__ - -#include -#include - -#define UHYVE_PORT_WRITE 0x400 -#define UHYVE_PORT_OPEN 0x440 -#define UHYVE_PORT_CLOSE 0x480 -#define UHYVE_PORT_READ 0x500 -#define UHYVE_PORT_EXIT 0x540 -#define UHYVE_PORT_LSEEK 0x580 - -// Networkports -#define UHYVE_PORT_NETINFO 0x600 -#define UHYVE_PORT_NETWRITE 0x640 -#define UHYVE_PORT_NETREAD 0x680 -#define UHYVE_PORT_NETSTAT 0x700 - -/* Ports and data structures for uhyve command line arguments and envp - * forwarding */ -#define UHYVE_PORT_CMDSIZE 0x740 -#define UHYVE_PORT_CMDVAL 0x780 - -#define UHYVE_UART_PORT 0x800 - -#define UHYVE_IRQ 11 - -#define SIGTHRCHKP (SIGRTMIN+0) -#define SIGTHRMIG (SIGRTMIN+1) - -#define kvm_ioctl(fd, cmd, arg) ({ \ - const int ret = ioctl(fd, cmd, arg); \ - if(ret == -1) \ - err(1, "KVM: ioctl " #cmd " failed"); \ - ret; \ - }) - -#ifdef __x86_64__ -#define MAX_MSR_ENTRIES 25 -struct msr_data { - struct kvm_msrs info; - struct kvm_msr_entry entries[MAX_MSR_ENTRIES]; -}; - -typedef struct _vcpu_state { - struct msr_data msr_data; - struct kvm_regs regs; - struct kvm_sregs sregs; - struct kvm_fpu fpu; - struct kvm_lapic_state lapic; - struct kvm_xsave xsave; - struct kvm_xcrs xcrs; - struct kvm_vcpu_events events; - struct kvm_mp_state mp_state; -} vcpu_state_t; -#else -typedef struct _vcpu_state { - int dummy; -} vcpu_state_t; -#endif - -typedef struct _migration_metadata migration_metadata_t; - -void print_registers(void); -void timer_handler(int signum); -void *migration_handler(void *arg); -void restore_cpu_state(vcpu_state_t cpu_state); -vcpu_state_t read_cpu_state(void); -vcpu_state_t save_cpu_state(void); -void write_cpu_state(void); -void init_cpu_state(uint64_t elf_entry); -int load_kernel(uint8_t* mem, char* path); -int load_checkpoint(uint8_t* mem, char* path); -int load_migration_data(uint8_t* mem); -void wait_for_incomming_migration(migration_metadata_t *metadata, uint16_t listen_portno); -void init_kvm_arch(void); -int load_kernel(uint8_t* mem, char* path); -size_t determine_dest_offset(size_t src_addr); -void determine_dirty_pages(void (*save_page_handler)(void*, size_t, void*, size_t)); - -#endif diff --git a/tools/utils.c b/tools/utils.c deleted file mode 100644 index 6ddd7b19d..000000000 --- a/tools/utils.c +++ /dev/null @@ -1,175 +0,0 @@ -/* -* Copyright (c) 2017, Stefan Lankes, RWTH Aachen University -* All rights reserved. -* -* Redistribution and use in source and binary forms, with or without -* modification, are permitted provided that the following conditions are met: -* * Redistributions of source code must retain the above copyright -* notice, this list of conditions and the following disclaimer. -* * Redistributions in binary form must reproduce the above copyright -* notice, this list of conditions and the following disclaimer in the -* documentation and/or other materials provided with the distribution. -* * Neither the name of the University nor the names of its contributors -* may be used to endorse or promote products derived from this -* software without specific prior written permission. -* -* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY -* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#define _GNU_SOURCE - -#include -#include -#include -#include -#include -#include - -#include "proxy.h" - -#ifdef __x86_64__ -inline static void __cpuid(uint32_t code, uint32_t* a, uint32_t* b, uint32_t* c, uint32_t* d) -{ - __asm volatile ("cpuid" : "=a"(*a), "=b"(*b), "=c"(*c), "=d"(*d) : "0"(code), "2"(*c)); -} - -// Try to determine the frequency from the CPU brand. -// Code is derived from the manual "Intel Processor -// Identification and the CPUID Instruction". -static uint32_t get_frequency_from_brand(void) -{ - char cpu_brand[4*3*sizeof(uint32_t)+1] = {[0 ... 4*3*sizeof(uint32_t)] = 0}; - uint32_t* bint = (uint32_t*) cpu_brand; - uint32_t index, multiplier = 0; - uint32_t cpu_freq = 0; - uint32_t extended; - - __cpuid(0x80000000, &extended, bint+1, bint+2, bint+3); - if (extended < 0x80000004) - return 0; - - __cpuid(0x80000002, bint+0, bint+1, bint+2, bint+3); - __cpuid(0x80000003, bint+4, bint+5, bint+6, bint+7); - __cpuid(0x80000004, bint+8, bint+9, bint+10, bint+11); - - for(index=0; index 0) { - uint32_t freq; - - // Compute frequency (in MHz) from brand string - if (cpu_brand[index-3] == '.') { // If format is “x.xx” - freq = (uint32_t)(cpu_brand[index-4] - '0') * multiplier; - freq += (uint32_t)(cpu_brand[index-2] - '0') * (multiplier / 10); - freq += (uint32_t)(cpu_brand[index-1] - '0') * (multiplier / 100); - } else { // If format is xxxx - freq = (uint32_t)(cpu_brand[index-4] - '0') * 1000; - freq += (uint32_t)(cpu_brand[index-3] - '0') * 100; - freq += (uint32_t)(cpu_brand[index-2] - '0') * 10; - freq += (uint32_t)(cpu_brand[index-1] - '0'); - freq *= multiplier; - } - - return freq; - } - } - - return 0; -} -#endif - -uint32_t get_cpufreq(void) -{ - char line[128]; - uint32_t freq = 0; - char* match; - -#ifdef __x86_64__ - freq = get_frequency_from_brand(); - if (freq > 0) - return freq; -#endif - - // TODO: fallback solution, on some systems is cpuinfo_max_freq the turbo frequency - // => wrong value - FILE* fp = fopen("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", "r"); - if (fp != NULL) { - if (fgets(line, sizeof(line), fp) != NULL) { - // cpuinfo_max_freq is in kHz - freq = (uint32_t) atoi(line) / 1000; - } - - fclose(fp); - } else if( (fp = fopen("/proc/cpuinfo", "r")) ) { - // Resorting to /proc/cpuinfo, however on most systems this will only - // return the current frequency that might change over time. - // Currently only needed when running inside a VM - - // read until we find the line indicating cpu frequency - while(fgets(line, sizeof(line), fp) != NULL) { - match = strstr(line, "cpu MHz"); - - if(match != NULL) { - // advance pointer to beginning of number - while( ((*match < '0') || (*match > '9')) && (*match != '\0') ) - match++; - - freq = (uint32_t) atoi(match); - break; - } - } - - fclose(fp); - } - - return freq; -} - -ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset) -{ - ssize_t total = 0; - char *p = buf; - - if (count > SSIZE_MAX) { - errno = E2BIG; - return -1; - } - - while (count > 0) { - ssize_t nr; - - nr = pread(fd, p, count, offset); - if (nr == 0) - return total; - else if (nr == -1 && errno == EINTR) - continue; - else if (nr == -1) - return -1; - - count -= nr; - total += nr; - p += nr; - offset += nr; - } - - return total; -}