move proxy and hypervisor to a new submodule

This commit is contained in:
Stefan Lankes 2018-07-08 22:21:07 +02:00
parent 69ed9f5354
commit d62fb9f762
24 changed files with 8 additions and 7567 deletions

3
.gitmodules vendored
View File

@ -6,3 +6,6 @@
path = usr/libomp
url = https://github.com/hermitcore/libomp_oss.git
branch = hermit
[submodule "caves"]
path = caves
url = https://github.com/hermitcore/hermit-caves.git

View File

@ -127,6 +127,9 @@ install(DIRECTORY include/hermit
FILES_MATCHING
PATTERN *.h)
install(FILES tools/init.sh
DESTINATION tools)
# provide custom target to only install libhermit without its runtimes which is
# needed during the compilation of the cross toolchain
add_custom_target(hermit-bootstrap-install
@ -150,7 +153,7 @@ add_custom_target(hermit
# be relocated for installation
## HermitCore's own tools such as Qemu/KVM proxy
build_external(tools ${HERMIT_ROOT}/tools "")
build_external(caves ${HERMIT_ROOT}/caves "")
if("${TARGET_ARCH}" STREQUAL "x86_64-hermit")

1
caves Submodule

@ -0,0 +1 @@
Subproject commit 155b31e13779b8d2446781b779bfa6a6ae46748c

View File

@ -1,53 +0,0 @@
cmake_minimum_required(VERSION 3.7)
project(hermit_tools)
include(CheckIncludeFiles)
include(../cmake/HermitCore-Paths.cmake)
option(ENABLE_RDMA_MIGRATION "Migration support via RDMA" OFF)
add_compile_options(-std=c99)
list(APPEND LIBS "-pthread")
set(SRC proxy.c
utils.c
uhyve.c
uhyve-net.c
uhyve-migration.c
uhyve-x86_64.c
uhyve-aarch64.c
uhyve-gdb-x86_64.c
uhyve-gdb-aarch64.c
)
### Optional migration via RDMA
if(ENABLE_RDMA_MIGRATION)
add_definitions(-D__RDMA_MIGRATION__)
list(APPEND LIBS "-libverbs")
set(SRC ${SRC} uhyve-migration-rdma.c)
else()
remove_definitions(-D__RDMA_MIGRATION__)
endif()
check_include_files(asm/msr-index.h HAVE_MSR_INDEX_H)
if(HAVE_MSR_INDEX_H)
add_definitions(-DHAVE_MSR_INDEX_H=1)
endif()
add_executable(proxy ${SRC})
target_compile_options(proxy PUBLIC ${LIBS})
target_compile_options(proxy PUBLIC -DMAX_ARGC_ENVC=${MAX_ARGC_ENVC})
target_link_libraries(proxy ${LIBS})
install(TARGETS proxy
DESTINATION bin)
install(FILES init.sh
DESTINATION tools)
# Show include files in IDE
file(GLOB_RECURSE TOOLS_INCLUDES "*.h")
add_custom_target(tools_includes_ide SOURCES ${TOOLS_INCLUDES})

File diff suppressed because it is too large Load Diff

View File

@ -1,55 +0,0 @@
/*
* Copyright (c) 2017, Stefan Lankes, RWTH Aachen University
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __PROXY_H__
#define __PROXY_H__
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#define HERMIT_ELFOSABI 0x42
#define __HERMIT_exit 0
#define __HERMIT_write 1
#define __HERMIT_open 2
#define __HERMIT_close 3
#define __HERMIT_read 4
#define __HERMIT_lseek 5
int uhyve_init(char *path);
int uhyve_loop(int argc, char **argv);
// define some helper functions
uint32_t get_cpufreq(void);
ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset);
#endif

View File

@ -1,638 +0,0 @@
/* $NetBSD: queue.h,v 1.68 2014/11/19 08:10:01 uebayasi Exp $ */
/*
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)queue.h 8.5 (Berkeley) 8/20/94
*/
#ifndef _UHYVE_QUEUE_H_
#define _UHYVE_QUEUE_H_
/*
* This file defines five types of data structures: singly-linked lists,
* lists, simple queues, tail queues, and circular queues.
*
* A singly-linked list is headed by a single forward pointer. The
* elements are singly linked for minimum space and pointer manipulation
* overhead at the expense of O(n) removal for arbitrary elements. New
* elements can be added to the list after an existing element or at the
* head of the list. Elements being removed from the head of the list
* should use the explicit macro for this purpose for optimum
* efficiency. A singly-linked list may only be traversed in the forward
* direction. Singly-linked lists are ideal for applications with large
* datasets and few or no removals or for implementing a LIFO queue.
*
* A list is headed by a single forward pointer (or an array of forward
* pointers for a hash table header). The elements are doubly linked
* so that an arbitrary element can be removed without a need to
* traverse the list. New elements can be added to the list before
* or after an existing element or at the head of the list. A list
* may only be traversed in the forward direction.
*
* A simple queue is headed by a pair of pointers, one the head of the
* list and the other to the tail of the list. The elements are singly
* linked to save space, so elements can only be removed from the
* head of the list. New elements can be added to the list after
* an existing element, at the head of the list, or at the end of the
* list. A simple queue may only be traversed in the forward direction.
*
* A tail queue is headed by a pair of pointers, one to the head of the
* list and the other to the tail of the list. The elements are doubly
* linked so that an arbitrary element can be removed without a need to
* traverse the list. New elements can be added to the list before or
* after an existing element, at the head of the list, or at the end of
* the list. A tail queue may be traversed in either direction.
*
* A circle queue is headed by a pair of pointers, one to the head of the
* list and the other to the tail of the list. The elements are doubly
* linked so that an arbitrary element can be removed without a need to
* traverse the list. New elements can be added to the list before or after
* an existing element, at the head of the list, or at the end of the list.
* A circle queue may be traversed in either direction, but has a more
* complex end of list detection.
*
* For details on the use of these macros, see the queue(3) manual page.
*/
/*
* Singly-linked List definitions.
*/
#define SLIST_HEAD(name, type) \
struct name { \
struct type *slh_first; /* first element */ \
}
#define SLIST_HEAD_INITIALIZER(head) \
{ NULL }
#define SLIST_ENTRY(type) \
struct { \
struct type *sle_next; /* next element */ \
}
/*
* Singly-linked List access methods.
*/
#define SLIST_FIRST(head) ((head)->slh_first)
#define SLIST_END(head) NULL
#define SLIST_EMPTY(head) ((head)->slh_first == NULL)
#define SLIST_NEXT(elm, field) ((elm)->field.sle_next)
#define SLIST_FOREACH(var, head, field) \
for((var) = (head)->slh_first; \
(var) != SLIST_END(head); \
(var) = (var)->field.sle_next)
#define SLIST_FOREACH_SAFE(var, head, field, tvar) \
for ((var) = SLIST_FIRST((head)); \
(var) != SLIST_END(head) && \
((tvar) = SLIST_NEXT((var), field), 1); \
(var) = (tvar))
/*
* Singly-linked List functions.
*/
#define SLIST_INIT(head) do { \
(head)->slh_first = SLIST_END(head); \
} while (/*CONSTCOND*/0)
#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \
(elm)->field.sle_next = (slistelm)->field.sle_next; \
(slistelm)->field.sle_next = (elm); \
} while (/*CONSTCOND*/0)
#define SLIST_INSERT_HEAD(head, elm, field) do { \
(elm)->field.sle_next = (head)->slh_first; \
(head)->slh_first = (elm); \
} while (/*CONSTCOND*/0)
#define SLIST_REMOVE_AFTER(slistelm, field) do { \
(slistelm)->field.sle_next = \
SLIST_NEXT(SLIST_NEXT((slistelm), field), field); \
} while (/*CONSTCOND*/0)
#define SLIST_REMOVE_HEAD(head, field) do { \
(head)->slh_first = (head)->slh_first->field.sle_next; \
} while (/*CONSTCOND*/0)
#define SLIST_REMOVE(head, elm, type, field) do { \
if ((head)->slh_first == (elm)) { \
SLIST_REMOVE_HEAD((head), field); \
} \
else { \
struct type *curelm = (head)->slh_first; \
while(curelm->field.sle_next != (elm)) \
curelm = curelm->field.sle_next; \
curelm->field.sle_next = \
curelm->field.sle_next->field.sle_next; \
} \
} while (/*CONSTCOND*/0)
/*
* List definitions.
*/
#define LIST_HEAD(name, type) \
struct name { \
struct type *lh_first; /* first element */ \
}
#define LIST_HEAD_INITIALIZER(head) \
{ NULL }
#define LIST_ENTRY(type) \
struct { \
struct type *le_next; /* next element */ \
struct type **le_prev; /* address of previous next element */ \
}
/*
* List access methods.
*/
#define LIST_FIRST(head) ((head)->lh_first)
#define LIST_END(head) NULL
#define LIST_EMPTY(head) ((head)->lh_first == LIST_END(head))
#define LIST_NEXT(elm, field) ((elm)->field.le_next)
#define LIST_FOREACH(var, head, field) \
for ((var) = ((head)->lh_first); \
(var) != LIST_END(head); \
(var) = ((var)->field.le_next))
#define LIST_FOREACH_SAFE(var, head, field, tvar) \
for ((var) = LIST_FIRST((head)); \
(var) != LIST_END(head) && \
((tvar) = LIST_NEXT((var), field), 1); \
(var) = (tvar))
#define LIST_MOVE(head1, head2) do { \
LIST_INIT((head2)); \
if (!LIST_EMPTY((head1))) { \
(head2)->lh_first = (head1)->lh_first; \
LIST_INIT((head1)); \
} \
} while (/*CONSTCOND*/0)
/*
* List functions.
*/
#if defined(QUEUEDEBUG)
#define QUEUEDEBUG_LIST_INSERT_HEAD(head, elm, field) \
if ((head)->lh_first && \
(head)->lh_first->field.le_prev != &(head)->lh_first) \
QUEUEDEBUG_ABORT("LIST_INSERT_HEAD %p %s:%d", (head), \
__FILE__, __LINE__);
#define QUEUEDEBUG_LIST_OP(elm, field) \
if ((elm)->field.le_next && \
(elm)->field.le_next->field.le_prev != \
&(elm)->field.le_next) \
QUEUEDEBUG_ABORT("LIST_* forw %p %s:%d", (elm), \
__FILE__, __LINE__); \
if (*(elm)->field.le_prev != (elm)) \
QUEUEDEBUG_ABORT("LIST_* back %p %s:%d", (elm), \
__FILE__, __LINE__);
#define QUEUEDEBUG_LIST_POSTREMOVE(elm, field) \
(elm)->field.le_next = (void *)1L; \
(elm)->field.le_prev = (void *)1L;
#else
#define QUEUEDEBUG_LIST_INSERT_HEAD(head, elm, field)
#define QUEUEDEBUG_LIST_OP(elm, field)
#define QUEUEDEBUG_LIST_POSTREMOVE(elm, field)
#endif
#define LIST_INIT(head) do { \
(head)->lh_first = LIST_END(head); \
} while (/*CONSTCOND*/0)
#define LIST_INSERT_AFTER(listelm, elm, field) do { \
QUEUEDEBUG_LIST_OP((listelm), field) \
if (((elm)->field.le_next = (listelm)->field.le_next) != \
LIST_END(head)) \
(listelm)->field.le_next->field.le_prev = \
&(elm)->field.le_next; \
(listelm)->field.le_next = (elm); \
(elm)->field.le_prev = &(listelm)->field.le_next; \
} while (/*CONSTCOND*/0)
#define LIST_INSERT_BEFORE(listelm, elm, field) do { \
QUEUEDEBUG_LIST_OP((listelm), field) \
(elm)->field.le_prev = (listelm)->field.le_prev; \
(elm)->field.le_next = (listelm); \
*(listelm)->field.le_prev = (elm); \
(listelm)->field.le_prev = &(elm)->field.le_next; \
} while (/*CONSTCOND*/0)
#define LIST_INSERT_HEAD(head, elm, field) do { \
QUEUEDEBUG_LIST_INSERT_HEAD((head), (elm), field) \
if (((elm)->field.le_next = (head)->lh_first) != LIST_END(head))\
(head)->lh_first->field.le_prev = &(elm)->field.le_next;\
(head)->lh_first = (elm); \
(elm)->field.le_prev = &(head)->lh_first; \
} while (/*CONSTCOND*/0)
#define LIST_REMOVE(elm, field) do { \
QUEUEDEBUG_LIST_OP((elm), field) \
if ((elm)->field.le_next != NULL) \
(elm)->field.le_next->field.le_prev = \
(elm)->field.le_prev; \
*(elm)->field.le_prev = (elm)->field.le_next; \
QUEUEDEBUG_LIST_POSTREMOVE((elm), field) \
} while (/*CONSTCOND*/0)
#define LIST_REPLACE(elm, elm2, field) do { \
if (((elm2)->field.le_next = (elm)->field.le_next) != NULL) \
(elm2)->field.le_next->field.le_prev = \
&(elm2)->field.le_next; \
(elm2)->field.le_prev = (elm)->field.le_prev; \
*(elm2)->field.le_prev = (elm2); \
QUEUEDEBUG_LIST_POSTREMOVE((elm), field) \
} while (/*CONSTCOND*/0)
/*
* Simple queue definitions.
*/
#define SIMPLEQ_HEAD(name, type) \
struct name { \
struct type *sqh_first; /* first element */ \
struct type **sqh_last; /* addr of last next element */ \
}
#define SIMPLEQ_HEAD_INITIALIZER(head) \
{ NULL, &(head).sqh_first }
#define SIMPLEQ_ENTRY(type) \
struct { \
struct type *sqe_next; /* next element */ \
}
/*
* Simple queue access methods.
*/
#define SIMPLEQ_FIRST(head) ((head)->sqh_first)
#define SIMPLEQ_END(head) NULL
#define SIMPLEQ_EMPTY(head) ((head)->sqh_first == SIMPLEQ_END(head))
#define SIMPLEQ_NEXT(elm, field) ((elm)->field.sqe_next)
#define SIMPLEQ_FOREACH(var, head, field) \
for ((var) = ((head)->sqh_first); \
(var) != SIMPLEQ_END(head); \
(var) = ((var)->field.sqe_next))
#define SIMPLEQ_FOREACH_SAFE(var, head, field, next) \
for ((var) = ((head)->sqh_first); \
(var) != SIMPLEQ_END(head) && \
((next = ((var)->field.sqe_next)), 1); \
(var) = (next))
/*
* Simple queue functions.
*/
#define SIMPLEQ_INIT(head) do { \
(head)->sqh_first = NULL; \
(head)->sqh_last = &(head)->sqh_first; \
} while (/*CONSTCOND*/0)
#define SIMPLEQ_INSERT_HEAD(head, elm, field) do { \
if (((elm)->field.sqe_next = (head)->sqh_first) == NULL) \
(head)->sqh_last = &(elm)->field.sqe_next; \
(head)->sqh_first = (elm); \
} while (/*CONSTCOND*/0)
#define SIMPLEQ_INSERT_TAIL(head, elm, field) do { \
(elm)->field.sqe_next = NULL; \
*(head)->sqh_last = (elm); \
(head)->sqh_last = &(elm)->field.sqe_next; \
} while (/*CONSTCOND*/0)
#define SIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do { \
if (((elm)->field.sqe_next = (listelm)->field.sqe_next) == NULL)\
(head)->sqh_last = &(elm)->field.sqe_next; \
(listelm)->field.sqe_next = (elm); \
} while (/*CONSTCOND*/0)
#define SIMPLEQ_REMOVE_HEAD(head, field) do { \
if (((head)->sqh_first = (head)->sqh_first->field.sqe_next) == NULL) \
(head)->sqh_last = &(head)->sqh_first; \
} while (/*CONSTCOND*/0)
#define SIMPLEQ_REMOVE_AFTER(head, elm, field) do { \
if (((elm)->field.sqe_next = (elm)->field.sqe_next->field.sqe_next) \
== NULL) \
(head)->sqh_last = &(elm)->field.sqe_next; \
} while (/*CONSTCOND*/0)
#define SIMPLEQ_REMOVE(head, elm, type, field) do { \
if ((head)->sqh_first == (elm)) { \
SIMPLEQ_REMOVE_HEAD((head), field); \
} else { \
struct type *curelm = (head)->sqh_first; \
while (curelm->field.sqe_next != (elm)) \
curelm = curelm->field.sqe_next; \
if ((curelm->field.sqe_next = \
curelm->field.sqe_next->field.sqe_next) == NULL) \
(head)->sqh_last = &(curelm)->field.sqe_next; \
} \
} while (/*CONSTCOND*/0)
#define SIMPLEQ_CONCAT(head1, head2) do { \
if (!SIMPLEQ_EMPTY((head2))) { \
*(head1)->sqh_last = (head2)->sqh_first; \
(head1)->sqh_last = (head2)->sqh_last; \
SIMPLEQ_INIT((head2)); \
} \
} while (/*CONSTCOND*/0)
#define SIMPLEQ_LAST(head, type, field) \
(SIMPLEQ_EMPTY((head)) ? \
NULL : \
((struct type *)(void *) \
((char *)((head)->sqh_last) - offsetof(struct type, field))))
/*
* Tail queue definitions.
*/
#define _TAILQ_HEAD(name, type, qual) \
struct name { \
qual type *tqh_first; /* first element */ \
qual type *qual *tqh_last; /* addr of last next element */ \
}
#define TAILQ_HEAD(name, type) _TAILQ_HEAD(name, struct type,)
#define TAILQ_HEAD_INITIALIZER(head) \
{ TAILQ_END(head), &(head).tqh_first }
#define _TAILQ_ENTRY(type, qual) \
struct { \
qual type *tqe_next; /* next element */ \
qual type *qual *tqe_prev; /* address of previous next element */\
}
#define TAILQ_ENTRY(type) _TAILQ_ENTRY(struct type,)
/*
* Tail queue access methods.
*/
#define TAILQ_FIRST(head) ((head)->tqh_first)
#define TAILQ_END(head) (NULL)
#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
#define TAILQ_LAST(head, headname) \
(*(((struct headname *)((head)->tqh_last))->tqh_last))
#define TAILQ_PREV(elm, headname, field) \
(*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
#define TAILQ_EMPTY(head) (TAILQ_FIRST(head) == TAILQ_END(head))
#define TAILQ_FOREACH(var, head, field) \
for ((var) = ((head)->tqh_first); \
(var) != TAILQ_END(head); \
(var) = ((var)->field.tqe_next))
#define TAILQ_FOREACH_SAFE(var, head, field, next) \
for ((var) = ((head)->tqh_first); \
(var) != TAILQ_END(head) && \
((next) = TAILQ_NEXT(var, field), 1); (var) = (next))
#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \
for ((var) = (*(((struct headname *)((head)->tqh_last))->tqh_last));\
(var) != TAILQ_END(head); \
(var) = (*(((struct headname *)((var)->field.tqe_prev))->tqh_last)))
#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, prev) \
for ((var) = TAILQ_LAST((head), headname); \
(var) != TAILQ_END(head) && \
((prev) = TAILQ_PREV((var), headname, field), 1); (var) = (prev))
/*
* Tail queue functions.
*/
#if defined(QUEUEDEBUG)
#define QUEUEDEBUG_TAILQ_INSERT_HEAD(head, elm, field) \
if ((head)->tqh_first && \
(head)->tqh_first->field.tqe_prev != &(head)->tqh_first) \
QUEUEDEBUG_ABORT("TAILQ_INSERT_HEAD %p %s:%d", (head), \
__FILE__, __LINE__);
#define QUEUEDEBUG_TAILQ_INSERT_TAIL(head, elm, field) \
if (*(head)->tqh_last != NULL) \
QUEUEDEBUG_ABORT("TAILQ_INSERT_TAIL %p %s:%d", (head), \
__FILE__, __LINE__);
#define QUEUEDEBUG_TAILQ_OP(elm, field) \
if ((elm)->field.tqe_next && \
(elm)->field.tqe_next->field.tqe_prev != \
&(elm)->field.tqe_next) \
QUEUEDEBUG_ABORT("TAILQ_* forw %p %s:%d", (elm), \
__FILE__, __LINE__); \
if (*(elm)->field.tqe_prev != (elm)) \
QUEUEDEBUG_ABORT("TAILQ_* back %p %s:%d", (elm), \
__FILE__, __LINE__);
#define QUEUEDEBUG_TAILQ_PREREMOVE(head, elm, field) \
if ((elm)->field.tqe_next == NULL && \
(head)->tqh_last != &(elm)->field.tqe_next) \
QUEUEDEBUG_ABORT("TAILQ_PREREMOVE head %p elm %p %s:%d",\
(head), (elm), __FILE__, __LINE__);
#define QUEUEDEBUG_TAILQ_POSTREMOVE(elm, field) \
(elm)->field.tqe_next = (void *)1L; \
(elm)->field.tqe_prev = (void *)1L;
#else
#define QUEUEDEBUG_TAILQ_INSERT_HEAD(head, elm, field)
#define QUEUEDEBUG_TAILQ_INSERT_TAIL(head, elm, field)
#define QUEUEDEBUG_TAILQ_OP(elm, field)
#define QUEUEDEBUG_TAILQ_PREREMOVE(head, elm, field)
#define QUEUEDEBUG_TAILQ_POSTREMOVE(elm, field)
#endif
#define TAILQ_INIT(head) do { \
(head)->tqh_first = TAILQ_END(head); \
(head)->tqh_last = &(head)->tqh_first; \
} while (/*CONSTCOND*/0)
#define TAILQ_INSERT_HEAD(head, elm, field) do { \
QUEUEDEBUG_TAILQ_INSERT_HEAD((head), (elm), field) \
if (((elm)->field.tqe_next = (head)->tqh_first) != TAILQ_END(head))\
(head)->tqh_first->field.tqe_prev = \
&(elm)->field.tqe_next; \
else \
(head)->tqh_last = &(elm)->field.tqe_next; \
(head)->tqh_first = (elm); \
(elm)->field.tqe_prev = &(head)->tqh_first; \
} while (/*CONSTCOND*/0)
#define TAILQ_INSERT_TAIL(head, elm, field) do { \
QUEUEDEBUG_TAILQ_INSERT_TAIL((head), (elm), field) \
(elm)->field.tqe_next = TAILQ_END(head); \
(elm)->field.tqe_prev = (head)->tqh_last; \
*(head)->tqh_last = (elm); \
(head)->tqh_last = &(elm)->field.tqe_next; \
} while (/*CONSTCOND*/0)
#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
QUEUEDEBUG_TAILQ_OP((listelm), field) \
if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != \
TAILQ_END(head)) \
(elm)->field.tqe_next->field.tqe_prev = \
&(elm)->field.tqe_next; \
else \
(head)->tqh_last = &(elm)->field.tqe_next; \
(listelm)->field.tqe_next = (elm); \
(elm)->field.tqe_prev = &(listelm)->field.tqe_next; \
} while (/*CONSTCOND*/0)
#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \
QUEUEDEBUG_TAILQ_OP((listelm), field) \
(elm)->field.tqe_prev = (listelm)->field.tqe_prev; \
(elm)->field.tqe_next = (listelm); \
*(listelm)->field.tqe_prev = (elm); \
(listelm)->field.tqe_prev = &(elm)->field.tqe_next; \
} while (/*CONSTCOND*/0)
#define TAILQ_REMOVE(head, elm, field) do { \
QUEUEDEBUG_TAILQ_PREREMOVE((head), (elm), field) \
QUEUEDEBUG_TAILQ_OP((elm), field) \
if (((elm)->field.tqe_next) != TAILQ_END(head)) \
(elm)->field.tqe_next->field.tqe_prev = \
(elm)->field.tqe_prev; \
else \
(head)->tqh_last = (elm)->field.tqe_prev; \
*(elm)->field.tqe_prev = (elm)->field.tqe_next; \
QUEUEDEBUG_TAILQ_POSTREMOVE((elm), field); \
} while (/*CONSTCOND*/0)
#define TAILQ_REPLACE(head, elm, elm2, field) do { \
if (((elm2)->field.tqe_next = (elm)->field.tqe_next) != \
TAILQ_END(head)) \
(elm2)->field.tqe_next->field.tqe_prev = \
&(elm2)->field.tqe_next; \
else \
(head)->tqh_last = &(elm2)->field.tqe_next; \
(elm2)->field.tqe_prev = (elm)->field.tqe_prev; \
*(elm2)->field.tqe_prev = (elm2); \
QUEUEDEBUG_TAILQ_POSTREMOVE((elm), field); \
} while (/*CONSTCOND*/0)
#define TAILQ_CONCAT(head1, head2, field) do { \
if (!TAILQ_EMPTY(head2)) { \
*(head1)->tqh_last = (head2)->tqh_first; \
(head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \
(head1)->tqh_last = (head2)->tqh_last; \
TAILQ_INIT((head2)); \
} \
} while (/*CONSTCOND*/0)
/*
* Singly-linked Tail queue declarations.
*/
#define STAILQ_HEAD(name, type) \
struct name { \
struct type *stqh_first; /* first element */ \
struct type **stqh_last; /* addr of last next element */ \
}
#define STAILQ_HEAD_INITIALIZER(head) \
{ NULL, &(head).stqh_first }
#define STAILQ_ENTRY(type) \
struct { \
struct type *stqe_next; /* next element */ \
}
/*
* Singly-linked Tail queue access methods.
*/
#define STAILQ_FIRST(head) ((head)->stqh_first)
#define STAILQ_END(head) NULL
#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next)
#define STAILQ_EMPTY(head) (STAILQ_FIRST(head) == STAILQ_END(head))
/*
* Singly-linked Tail queue functions.
*/
#define STAILQ_INIT(head) do { \
(head)->stqh_first = NULL; \
(head)->stqh_last = &(head)->stqh_first; \
} while (/*CONSTCOND*/0)
#define STAILQ_INSERT_HEAD(head, elm, field) do { \
if (((elm)->field.stqe_next = (head)->stqh_first) == NULL) \
(head)->stqh_last = &(elm)->field.stqe_next; \
(head)->stqh_first = (elm); \
} while (/*CONSTCOND*/0)
#define STAILQ_INSERT_TAIL(head, elm, field) do { \
(elm)->field.stqe_next = NULL; \
*(head)->stqh_last = (elm); \
(head)->stqh_last = &(elm)->field.stqe_next; \
} while (/*CONSTCOND*/0)
#define STAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
if (((elm)->field.stqe_next = (listelm)->field.stqe_next) == NULL)\
(head)->stqh_last = &(elm)->field.stqe_next; \
(listelm)->field.stqe_next = (elm); \
} while (/*CONSTCOND*/0)
#define STAILQ_REMOVE_HEAD(head, field) do { \
if (((head)->stqh_first = (head)->stqh_first->field.stqe_next) == NULL) \
(head)->stqh_last = &(head)->stqh_first; \
} while (/*CONSTCOND*/0)
#define STAILQ_REMOVE(head, elm, type, field) do { \
if ((head)->stqh_first == (elm)) { \
STAILQ_REMOVE_HEAD((head), field); \
} else { \
struct type *curelm = (head)->stqh_first; \
while (curelm->field.stqe_next != (elm)) \
curelm = curelm->field.stqe_next; \
if ((curelm->field.stqe_next = \
curelm->field.stqe_next->field.stqe_next) == NULL) \
(head)->stqh_last = &(curelm)->field.stqe_next; \
} \
} while (/*CONSTCOND*/0)
#define STAILQ_FOREACH(var, head, field) \
for ((var) = ((head)->stqh_first); \
(var); \
(var) = ((var)->field.stqe_next))
#define STAILQ_FOREACH_SAFE(var, head, field, tvar) \
for ((var) = STAILQ_FIRST((head)); \
(var) && ((tvar) = STAILQ_NEXT((var), field), 1); \
(var) = (tvar))
#define STAILQ_CONCAT(head1, head2) do { \
if (!STAILQ_EMPTY((head2))) { \
*(head1)->stqh_last = (head2)->stqh_first; \
(head1)->stqh_last = (head2)->stqh_last; \
STAILQ_INIT((head2)); \
} \
} while (/*CONSTCOND*/0)
#define STAILQ_LAST(head, type, field) \
(STAILQ_EMPTY((head)) ? \
NULL : \
((struct type *)(void *) \
((char *)((head)->stqh_last) - offsetof(struct type, field))))
#endif /* !_UHYVE_QUEUE_H_ */

View File

@ -1,503 +0,0 @@
/*
* Copyright (c) 2018, Stefan Lankes, RWTH Aachen University
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef __aarch64__
#define _GNU_SOURCE
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <errno.h>
#include <fcntl.h>
#include <sched.h>
#include <signal.h>
#include <limits.h>
#include <pthread.h>
#include <semaphore.h>
#include <elf.h>
#include <err.h>
#include <poll.h>
#include <sys/wait.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/eventfd.h>
#include <linux/const.h>
#include <linux/kvm.h>
#include "uhyve.h"
#include "proxy.h"
#define GUEST_OFFSET 0x0
#define GIC_SPI_IRQ_BASE 32
#define GICD_BASE (1ULL << 39)
#define GICC_BASE (GICD_BASE + GICD_SIZE)
#define GIC_SIZE (GICD_SIZE + GICC_SIZE)
#define GICD_SIZE 0x10000ULL
#define GICC_SIZE 0x20000ULL
#define KVM_GAP_SIZE (GIC_SIZE)
#define KVM_GAP_START GICD_BASE
#define PAGE_SIZE 0x1000
#ifndef offsetof
#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
#endif
#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 |\
KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
#define ARM_CPU_ID 3, 0, 0, 0
#define ARM_CPU_ID_MPIDR 5
static bool cap_irqfd = false;
static bool cap_read_only = false;
static int gic_fd = -1;
extern size_t guest_size;
extern uint64_t elf_entry;
extern uint8_t* klog;
extern bool verbose;
extern uint32_t ncores;
extern uint8_t* guest_mem;
extern size_t guest_size;
extern int kvm, vmfd, netfd, efd;
extern uint8_t* mboot;
extern __thread struct kvm_run *run;
extern __thread int vcpufd;
extern __thread uint32_t cpuid;
void print_registers(void)
{
struct kvm_one_reg reg;
uint64_t data;
fprintf(stderr, "\n Dump state of CPU %d\n\n", cpuid);
fprintf(stderr, " Registers\n");
fprintf(stderr, " =========\n");
reg.addr = (uint64_t)&data;
reg.id = ARM64_CORE_REG(regs.pc);
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, &reg);
fprintf(stderr, " PC: 0x%016lx\n", data);
reg.id = ARM64_CORE_REG(regs.pstate);
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, &reg);
fprintf(stderr, " PSTATE: 0x%016lx\n", data);
reg.id = ARM64_CORE_REG(sp_el1);
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, &reg);
fprintf(stderr, " SP_EL1: 0x%016lx\n", data);
reg.id = ARM64_CORE_REG(regs.regs[30]);
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, &reg);
fprintf(stderr, " LR: 0x%016lx\n", data);
reg.id = ARM64_SYS_REG(ARM_CPU_ID, ARM_CPU_ID_MPIDR);
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, &reg);
fprintf(stderr, " MPIDR: 0x%016lx\n", data);
for(int i=0; i<=29; i+=2)
{
reg.id = ARM64_CORE_REG(regs.regs[i]);
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, &reg);
fprintf(stderr, " X%d:\t 0x%016lx\t", i, data);
reg.id = ARM64_CORE_REG(regs.regs[i+1]);
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, &reg);
fprintf(stderr, " X%d:\t0x%016lx\n", i+1, data);
}
}
vcpu_state_t read_cpu_state()
{
err(1, "Migration is currently not supported!");
}
void* migration_handler(void* arg)
{
err(1, "Migration is currently not supported!");
}
void timer_handler(int signum)
{
err(1, "Checkpointing is currently not supported!");
}
void restore_cpu_state(vcpu_state_t state)
{
err(1, "Checkpointing is currently not supported!");
}
vcpu_state_t save_cpu_state(void)
{
err(1, "Checkpointing is currently not supported!");
}
void write_cpu_state(void)
{
err(1, "Checkpointing is currently not supported!");
}
int load_checkpoint(uint8_t* mem, char* path)
{
err(1, "Checkpointing is currently not supported!");
}
int load_migration_data(uint8_t* mem)
{
err(1, "Checkpointing is currently not supported!");
}
void wait_for_incomming_migration(migration_metadata_t *metadata, uint16_t listen_portno)
{
err(1, "Checkpointing is currently not supported!");
}
void init_cpu_state(uint64_t elf_entry)
{
struct kvm_vcpu_init vcpu_init = {
.features = 0,
};
struct kvm_vcpu_init preferred_init;
if (!ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, &preferred_init)) {
if ((preferred_init.target == KVM_ARM_TARGET_CORTEX_A57) ||
(preferred_init.target == KVM_ARM_TARGET_CORTEX_A53)) {
vcpu_init.target = preferred_init.target;
} else {
vcpu_init.target = KVM_ARM_TARGET_GENERIC_V8;
}
} else {
vcpu_init.target = KVM_ARM_TARGET_GENERIC_V8;
}
kvm_ioctl(vcpufd, KVM_ARM_VCPU_INIT, &vcpu_init);
// be sure that the multiprocessor is runable
struct kvm_mp_state mp_state = { KVM_MP_STATE_RUNNABLE };
kvm_ioctl(vcpufd, KVM_SET_MP_STATE, &mp_state);
struct kvm_one_reg reg;
uint64_t data;
/* pstate = all interrupts masked */
data = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1h;
reg.id = ARM64_CORE_REG(regs.pstate);
reg.addr = (uint64_t)&data;
kvm_ioctl(vcpufd, KVM_SET_ONE_REG, &reg);
#if 0
/* x0...x3 = 0 */
data = 0;
reg.id = ARM64_CORE_REG(regs.regs[0]);
kvm_ioctl(vcpufd, KVM_SET_ONE_REG, &reg);
reg.id = ARM64_CORE_REG(regs.regs[1]);
kvm_ioctl(vcpufd, KVM_SET_ONE_REG, &reg);
reg.id = ARM64_CORE_REG(regs.regs[2]);
kvm_ioctl(vcpufd, KVM_SET_ONE_REG, &reg);
reg.id = ARM64_CORE_REG(regs.regs[3]);
kvm_ioctl(vcpufd, KVM_SET_ONE_REG, &reg);
#endif
/* set start address */
data = elf_entry;
reg.id = ARM64_CORE_REG(regs.pc);
kvm_ioctl(vcpufd, KVM_SET_ONE_REG, &reg);
if (gic_fd > 0) {
int lines = 1;
uint32_t nr_irqs = lines * 32 + GIC_SPI_IRQ_BASE;
struct kvm_device_attr nr_irqs_attr = {
.group = KVM_DEV_ARM_VGIC_GRP_NR_IRQS,
.addr = (uint64_t)&nr_irqs,
};
struct kvm_device_attr vgic_init_attr = {
.group = KVM_DEV_ARM_VGIC_GRP_CTRL,
.attr = KVM_DEV_ARM_VGIC_CTRL_INIT,
};
kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &nr_irqs_attr);
kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &vgic_init_attr);
}
// only one core is able to enter startup code
// => the wait for the predecessor core
while (*((volatile uint32_t*) (mboot + 0x120)) < cpuid)
pthread_yield();
*((volatile uint32_t*) (mboot + 0x130)) = cpuid;
}
void init_kvm_arch(void)
{
guest_mem = mmap(NULL, guest_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (guest_mem == MAP_FAILED)
err(1, "mmap failed");
const char* merge = getenv("HERMIT_MERGEABLE");
if (merge && (strcmp(merge, "0") != 0)) {
/*
* The KSM feature is intended for applications that generate
* many instances of the same data (e.g., virtualization systems
* such as KVM). It can consume a lot of processing power!
*/
madvise(guest_mem, guest_size, MADV_MERGEABLE);
if (verbose)
fprintf(stderr, "VM uses KSN feature \"mergeable\" to reduce the memory footprint.\n");
}
const char* hugepage = getenv("HERMIT_HUGEPAGE");
if (merge && (strcmp(merge, "0") != 0)) {
madvise(guest_mem, guest_size, MADV_HUGEPAGE);
if (verbose)
fprintf(stderr, "VM uses huge pages to improve the performance.\n");
}
cap_read_only = kvm_ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_READONLY_MEM) <= 0 ? false : true;
if (!cap_read_only)
err(1, "the support of KVM_CAP_READONLY_MEM is curently required");
struct kvm_userspace_memory_region kvm_region = {
.slot = 0,
.guest_phys_addr = 0,
.memory_size = PAGE_SIZE,
.userspace_addr = (uint64_t) guest_mem,
.flags = KVM_MEM_READONLY,
};
kvm_ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region);
kvm_region = (struct kvm_userspace_memory_region) {
.slot = 1,
.guest_phys_addr = PAGE_SIZE,
.memory_size = guest_size - PAGE_SIZE,
.userspace_addr = (uint64_t) guest_mem + PAGE_SIZE,
#ifdef USE_DIRTY_LOG
.flags = KVM_MEM_LOG_DIRTY_PAGES,
#else
.flags = 0,
#endif
};
kvm_ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region);
#if 0
/* Create interrupt controller GICv2 */
uint64_t cpu_if_addr = GICC_BASE;
uint64_t dist_addr = GICD_BASE;
struct kvm_device_attr cpu_if_attr = {
.group = KVM_DEV_ARM_VGIC_GRP_ADDR,
.attr = KVM_VGIC_V2_ADDR_TYPE_CPU,
.addr = (uint64_t)&cpu_if_addr,
};
struct kvm_create_device gic_device = {
.flags = 0,
.type = KVM_DEV_TYPE_ARM_VGIC_V2,
};
struct kvm_device_attr dist_attr = {
.group = KVM_DEV_ARM_VGIC_GRP_ADDR,
.attr = KVM_VGIC_V2_ADDR_TYPE_DIST,
.addr = (uint64_t)&dist_addr,
};
kvm_ioctl(vmfd, KVM_CREATE_DEVICE, &gic_device);
gic_fd = gic_device.fd;
kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &cpu_if_attr);
kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &dist_attr);
#else
/* Create interrupt controller GICv2 */
struct kvm_arm_device_addr gic_addr[] = {
[0] = {
.id = KVM_VGIC_V2_ADDR_TYPE_DIST |
(KVM_ARM_DEVICE_VGIC_V2 << KVM_ARM_DEVICE_ID_SHIFT),
.addr = GICD_BASE,
},
[1] = {
.id = KVM_VGIC_V2_ADDR_TYPE_CPU |
(KVM_ARM_DEVICE_VGIC_V2 << KVM_ARM_DEVICE_ID_SHIFT),
.addr = GICC_BASE,
}
};
kvm_ioctl(vmfd, KVM_CREATE_IRQCHIP, NULL);
kvm_ioctl(vmfd, KVM_ARM_SET_DEVICE_ADDR, &gic_addr[0]);
kvm_ioctl(vmfd, KVM_ARM_SET_DEVICE_ADDR, &gic_addr[1]);
#endif
//fprintf(stderr, "Create gicd at 0x%llx\n", GICD_BASE);
//fprintf(stderr, "Create gicc at 0x%llx\n", GICC_BASE);
cap_irqfd = ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_IRQFD) <= 0 ? false : true;
if (!cap_irqfd)
err(1, "the support of KVM_CAP_IRQFD is curently required");
}
int load_kernel(uint8_t* mem, char* path)
{
Elf64_Ehdr hdr;
Elf64_Phdr *phdr = NULL;
size_t buflen;
size_t pstart = 0;
int fd, ret;
fd = open(path, O_RDONLY);
if (fd == -1)
{
perror("Unable to open file");
return -1;
}
ret = pread_in_full(fd, &hdr, sizeof(hdr), 0);
if (ret < 0)
goto out;
// check if the program is a HermitCore file
if (hdr.e_ident[EI_MAG0] != ELFMAG0
|| hdr.e_ident[EI_MAG1] != ELFMAG1
|| hdr.e_ident[EI_MAG2] != ELFMAG2
|| hdr.e_ident[EI_MAG3] != ELFMAG3
|| hdr.e_ident[EI_CLASS] != ELFCLASS64
|| hdr.e_ident[EI_OSABI] != HERMIT_ELFOSABI
|| hdr.e_type != ET_EXEC || hdr.e_machine != EM_AARCH64) {
fprintf(stderr, "Invalid HermitCore file!\n");
ret = -1;
goto out;
}
elf_entry = hdr.e_entry;
buflen = hdr.e_phentsize * hdr.e_phnum;
phdr = malloc(buflen);
if (!phdr) {
fprintf(stderr, "Not enough memory\n");
ret = -1;
goto out;
}
ret = pread_in_full(fd, phdr, buflen, hdr.e_phoff);
if (ret < 0)
goto out;
/*
* Load all segments with type "LOAD" from the file at offset
* p_offset, and copy that into in memory.
*/
for (Elf64_Half ph_i = 0; ph_i < hdr.e_phnum; ph_i++)
{
uint64_t paddr = phdr[ph_i].p_paddr;
size_t offset = phdr[ph_i].p_offset;
size_t filesz = phdr[ph_i].p_filesz;
size_t memsz = phdr[ph_i].p_memsz;
if (phdr[ph_i].p_type != PT_LOAD)
continue;
//fprintf(stderr, "Kernel location 0x%zx, file size 0x%zx, memory size 0x%zx\n", paddr, filesz, memsz);
ret = pread_in_full(fd, mem+paddr-GUEST_OFFSET, filesz, offset);
if (ret < 0)
goto out;
if (!klog)
klog = mem+paddr+0x1000-GUEST_OFFSET;
if (!mboot)
mboot = mem+paddr-GUEST_OFFSET;
//fprintf(stderr, "mboot at %p, klog at %p\n", mboot, klog);
if (!pstart) {
pstart = paddr;
// initialize kernel
*((uint64_t*) (mem+paddr-GUEST_OFFSET + 0x100)) = paddr; // physical start address
*((uint64_t*) (mem+paddr-GUEST_OFFSET + 0x108)) = guest_size - PAGE_SIZE; // physical limit
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x110)) = get_cpufreq();
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x128)) = ncores; // number of used cpus
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x130)) = 0; // cpuid
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x148)) = 1; // announce uhyve
char* str = getenv("HERMIT_IP");
if (str) {
uint32_t ip[4];
sscanf(str, "%u.%u.%u.%u", ip+0, ip+1, ip+2, ip+3);
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB0)) = (uint8_t) ip[0];
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB1)) = (uint8_t) ip[1];
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB2)) = (uint8_t) ip[2];
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB3)) = (uint8_t) ip[3];
}
str = getenv("HERMIT_GATEWAY");
if (str) {
uint32_t ip[4];
sscanf(str, "%u.%u.%u.%u", ip+0, ip+1, ip+2, ip+3);
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB4)) = (uint8_t) ip[0];
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB5)) = (uint8_t) ip[1];
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB6)) = (uint8_t) ip[2];
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB7)) = (uint8_t) ip[3];
}
str = getenv("HERMIT_MASK");
if (str) {
uint32_t ip[4];
sscanf(str, "%u.%u.%u.%u", ip+0, ip+1, ip+2, ip+3);
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB8)) = (uint8_t) ip[0];
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB9)) = (uint8_t) ip[1];
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xBA)) = (uint8_t) ip[2];
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xBB)) = (uint8_t) ip[3];
}
*((uint64_t*) (mem+paddr-GUEST_OFFSET + 0xbc)) = (uint64_t) guest_mem;
if (verbose)
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x174)) = (uint32_t) UHYVE_UART_PORT;
}
*((uint64_t*) (mem+pstart-GUEST_OFFSET + 0x158)) = paddr + memsz - pstart; // total kernel size
}
ret = 0;
out:
if (phdr)
free(phdr);
close(fd);
return ret;
}
#endif

View File

@ -1,72 +0,0 @@
/*
* This file was adapted from the solo5/ukvm code base, initial copyright block
* follows:
*/
/*
* Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file
*
* This file is part of ukvm, a unikernel monitor.
*
* Permission to use, copy, modify, and/or distribute this software
* for any purpose with or without fee is hereby granted, provided
* that the above copyright notice and this permission notice appear
* in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/*
* Based on binutils-gdb/gdb/stubs/i386-stub.c, which is:
* Not copyrighted.
*/
#ifdef __aarch64__
#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <err.h>
#include <inttypes.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <assert.h>
#include <stdbool.h>
#include <ctype.h>
#include <linux/kvm.h>
#include "uhyve.h"
#include "uhyve-gdb.h"
#include "queue.h"
void uhyve_gdb_handle_exception(int vcpufd, int sigval)
{
}
void uhyve_gdb_handle_term(void)
{
}
int uhyve_gdb_init(int vcpufd)
{
return -1;
}
#endif

View File

@ -1,40 +0,0 @@
/*
* This file was adapted from the solo5/ukvm code base, initial copyright block
* follows:
*/
/*
* Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file
*
* This file is part of ukvm, a unikernel monitor.
*
* Permission to use, copy, modify, and/or distribute this software
* for any purpose with or without fee is hereby granted, provided
* that the above copyright notice and this permission notice appear
* in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef UHYVE_GDB_AARCH64_H
#define UHYVE_GDB_AARCH64_H
#include <stdint.h>
#include <inttypes.h>
struct uhyve_gdb_regs {
uint64_t regs[31];
uint64_t lr;
uint64_t pc;
uint64_t pstate;
uint64_t sp;
};
#endif /* UHYVE_GDB_AARCH64_H */

View File

@ -1,993 +0,0 @@
/*
* This file was adapted from the solo5/ukvm code base, initial copyright block
* follows:
*/
/*
* Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file
*
* This file is part of ukvm, a unikernel monitor.
*
* Permission to use, copy, modify, and/or distribute this software
* for any purpose with or without fee is hereby granted, provided
* that the above copyright notice and this permission notice appear
* in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/*
* Based on binutils-gdb/gdb/stubs/i386-stub.c, which is:
* Not copyrighted.
*/
#ifdef __x86_64__
#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <err.h>
#include <inttypes.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <assert.h>
#include <stdbool.h>
#include <ctype.h>
#include <linux/kvm.h>
#include "uhyve.h"
#include "uhyve-gdb.h"
#include "queue.h"
struct breakpoint_t {
gdb_breakpoint_type type;
uint64_t addr;
size_t len;
uint32_t refcount;
uint8_t saved_insn; /* for software breakpoints */
SLIST_ENTRY(breakpoint_t) entries;
};
SLIST_HEAD(breakpoints_head, breakpoint_t);
static struct breakpoints_head sw_breakpoints;
static struct breakpoints_head hw_breakpoints;
/* The Intel SDM specifies that the DR7 has space for 4 breakpoints. */
#define MAX_HW_BREAKPOINTS 4
static uint32_t nr_hw_breakpoints = 0;
/* Stepping is disabled by default. */
static bool stepping = false;
/* This is the trap instruction used for software breakpoints. */
static const uint8_t int3 = 0xcc;
static int socket_fd = 0;
static int portno = 1234; /* Default port number */
static const char hexchars[] = "0123456789abcdef";
#define BUFMAX 4096
static char in_buffer[BUFMAX];
static unsigned char registers[BUFMAX];
/* uhyve variables */
extern size_t guest_size;
extern uint8_t *guest_mem;
void *uhyve_checked_gpa_p(uint64_t gpa, size_t sz, uint8_t * chk_guest_mem,
size_t chk_guest_size, const char *file, int line);
/* The actual error code is ignored by GDB, so any number will do. */
#define GDB_ERROR_MSG "E01"
static int hex(unsigned char ch)
{
if ((ch >= 'a') && (ch <= 'f'))
return (ch - 'a' + 10);
if ((ch >= '0') && (ch <= '9'))
return (ch - '0');
if ((ch >= 'A') && (ch <= 'F'))
return (ch - 'A' + 10);
return -1;
}
/*
* Converts the (count) bytes of memory pointed to by mem into an hex string in
* buf. Returns a pointer to the last char put in buf (null).
*/
static char *mem2hex(const unsigned char *mem, char *buf, size_t count)
{
size_t i;
unsigned char ch;
for (i = 0; i < count; i++) {
ch = *mem++;
*buf++ = hexchars[ch >> 4];
*buf++ = hexchars[ch % 16];
}
*buf = 0;
return buf;
}
/*
* Converts the hex string in buf into binary in mem.
* Returns a pointer to the character AFTER the last byte written.
*/
static unsigned char *hex2mem(const char *buf, unsigned char *mem, size_t count)
{
size_t i;
unsigned char ch;
assert(strlen(buf) >= (2 * count));
for (i = 0; i < count; i++) {
ch = hex(*buf++) << 4;
ch = ch + hex(*buf++);
*mem++ = ch;
}
return mem;
}
static int wait_for_connect(void)
{
int listen_socket_fd;
struct sockaddr_in server_addr, client_addr;
struct protoent *protoent;
struct in_addr ip_addr;
socklen_t len;
int opt;
listen_socket_fd = socket(AF_INET, SOCK_STREAM, 0);
if (listen_socket_fd == -1) {
err(1, "Could not create socket");
return -1;
}
opt = 1;
if (setsockopt(listen_socket_fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) == -1)
err(1, "setsockopt(SO_REUSEADDR) failed");
server_addr.sin_family = AF_INET;
server_addr.sin_addr.s_addr = htonl(INADDR_ANY);
server_addr.sin_port = htons(portno);
if (bind(listen_socket_fd, (struct sockaddr *)&server_addr,
sizeof(server_addr)) == -1) {
err(1, "bind failed");
return -1;
}
if (listen(listen_socket_fd, 0) == -1) {
err(1, "listen failed");
return -1;
}
warnx("Waiting for a debugger. Connect to it like this:");
warnx("\tgdb --ex=\"target remote localhost:%d\" UNIKERNEL", portno);
len = sizeof(client_addr);
socket_fd =
accept(listen_socket_fd, (struct sockaddr *)&client_addr, &len);
if (socket_fd == -1) {
err(1, "accept failed");
return -1;
}
close(listen_socket_fd);
protoent = getprotobyname("tcp");
if (!protoent) {
err(1, "getprotobyname (\"tcp\") failed");
return -1;
}
opt = 1;
if (setsockopt(socket_fd, protoent->p_proto, TCP_NODELAY, &opt,
sizeof(opt)) == -1)
err(1, "setsockopt(TCP_NODELAY) failed");
ip_addr.s_addr = client_addr.sin_addr.s_addr;
warnx("Connection from debugger at %s", inet_ntoa(ip_addr));
return 0;
}
static inline int send_char(char ch)
{
/* TCP is already buffering, so no need to buffer here as well. */
return send(socket_fd, &ch, 1, 0);
}
static char recv_char(void)
{
unsigned char ch;
int ret;
ret = recv(socket_fd, &ch, 1, 0);
if (ret < 0) {
return -1;
} else if (ret == 0) {
/* The peer has performed an orderly shutdown (from "man recv"). */
warnx("GDB: Connection closed from client");
close(socket_fd);
socket_fd = -1;
return -1;
} else {
assert(ret == 1);
}
/* All GDB remote packets are encoded in ASCII. */
assert(isascii(ch));
return (char)ch;
}
/*
* Scan for the sequence $<data>#<checksum>
* Returns a null terminated string.
*/
static char *recv_packet(void)
{
char *buffer = &in_buffer[0];
unsigned char checksum;
unsigned char xmitcsum;
char ch;
int count;
while (1) {
/* wait around for the start character, ignore all other characters */
do {
ch = recv_char();
if (ch == -1)
return NULL;
}
while (ch != '$');
retry:
checksum = 0;
xmitcsum = -1;
count = 0;
/* now, read until a # or end of buffer is found */
while (count < BUFMAX - 1) {
ch = recv_char();
if (ch == -1)
return NULL;
if (ch == '$')
goto retry;
if (ch == '#')
break;
checksum = checksum + ch;
buffer[count] = ch;
count = count + 1;
}
/* Let's make this a C string. */
buffer[count] = '\0';
if (ch == '#') {
ch = recv_char();
if (ch == -1)
return NULL;
xmitcsum = hex(ch) << 4;
ch = recv_char();
if (ch == -1)
return NULL;
xmitcsum += hex(ch);
if (checksum != xmitcsum) {
warnx("Failed checksum from GDB. "
"My count = 0x%x, sent=0x%x. buf=%s",
checksum, xmitcsum, buffer);
if (send_char('-') == -1)
/* Unsuccessful reply to a failed checksum */
err(1,
"GDB: Could not send an ACK to the debugger.");
} else {
if (send_char('+') == -1)
/* Unsuccessful reply to a successful transfer */
err(1,
"GDB: Could not send an ACK to the debugger.");
/* if a sequence char is present, reply the sequence ID */
if (buffer[2] == ':') {
send_char(buffer[0]);
send_char(buffer[1]);
return &buffer[3];
}
return &buffer[0];
}
}
}
}
/*
* Send packet of the form $<packet info>#<checksum> without waiting for an ACK
* from the debugger. Only send_response
*/
static void send_packet_no_ack(char *buffer)
{
unsigned char checksum;
int count;
char ch;
/*
* We ignore all send_char errors as we either: (1) care about sending our
* packet and we will keep sending it until we get a good ACK from the
* debugger, or (2) not care and just send it as a best-effort notification
* when dying.
*/
send_char('$');
checksum = 0;
count = 0;
ch = buffer[count];
while (ch) {
send_char(ch);
checksum += ch;
count += 1;
ch = buffer[count];
}
send_char('#');
send_char(hexchars[checksum >> 4]);
send_char(hexchars[checksum % 16]);
}
/*
* Send a packet and wait for a successful ACK of '+' from the debugger.
* An ACK of '-' means that we have to resend.
*/
static void send_packet(char *buffer)
{
char ch;
for (;;) {
send_packet_no_ack(buffer);
ch = recv_char();
if (ch == -1)
return;
if (ch == '+')
break;
}
}
#define send_error_msg() do { send_packet(GDB_ERROR_MSG); } while (0)
#define send_not_supported_msg() do { send_packet(""); } while (0)
#define send_okay_msg() do { send_packet("OK"); } while (0)
/*
* This is a response to 'c' and 's'. In other words, the VM was
* running and it stopped for some reason. This message is to tell the
* debugger that whe stopped (and why). The argument code can take these
* and some other values:
* - 'S AA' received signal AA
* - 'W AA' exited with return code AA
* - 'X AA' exited with signal AA
* https://sourceware.org/gdb/onlinedocs/gdb/Stop-Reply-Packets.html
*/
static void send_response(char code, int sigval, bool wait_for_ack)
{
char obuf[BUFMAX];
snprintf(obuf, sizeof(obuf), "%c%02x", code, sigval);
if (wait_for_ack)
send_packet(obuf);
else
send_packet_no_ack(obuf);
}
static void gdb_handle_exception(int vcpufd, int sigval)
{
char *packet;
char obuf[BUFMAX];
/* Notify the debugger of our last signal */
send_response('S', sigval, true);
for (;;) {
uint64_t addr = 0, result;
gdb_breakpoint_type type;
size_t len;
int command, ret;
packet = recv_packet();
if (packet == NULL)
/* Without a packet with instructions with what to do next there is
* really nothing we can do to recover. So, dying. */
errx(1,
"GDB: Exiting as we could not receive the next command from "
"the debugger.");
/*
* From the GDB manual:
* "At a minimum, a stub is required to support the g and G
* commands for register access, and the m and M commands
* for memory access. Stubs that only control single-threaded
* targets can implement run control with the c (continue),
* and s (step) commands."
*/
command = packet[0];
switch (command) {
case 's':
{
/* Step */
if (sscanf(packet, "s%" PRIx64, &addr) == 1) {
/* not supported, but that's OK as GDB will retry with the
* slower version of this: update all registers. */
send_not_supported_msg();
break; /* Wait for another command. */
}
if (uhyve_gdb_enable_ss(vcpufd) == -1) {
send_error_msg();
break; /* Wait for another command. */
}
return; /* Continue with program */
}
case 'c':
{
/* Continue (and disable stepping for the next instruction) */
if (sscanf(packet, "c%" PRIx64, &addr) == 1) {
/* not supported, but that's OK as GDB will retry with the
* slower version of this: update all registers. */
send_not_supported_msg();
break; /* Wait for another command. */
}
if (uhyve_gdb_disable_ss(vcpufd) == -1) {
send_error_msg();
break; /* Wait for another command. */
}
return; /* Continue with program */
}
case 'm':
{
/* Read memory content */
if (sscanf(packet, "m%" PRIx64 ",%zx", &addr, &len) != 2) {
send_error_msg();
break;
}
/* translate addr into guest phys first. it is
* needed if the address falls into the non directly mapped
* part of the virtual address space (ex: heap/stack) */
uint64_t phys_addr;
if (uhyve_gdb_guest_virt_to_phys(vcpufd, addr, &phys_addr)) {
send_error_msg();
} else {
mem2hex(guest_mem + phys_addr, obuf, len);
send_packet(obuf);
}
break; /* Wait for another command. */
}
case 'M':
{
/* Write memory content */
uint64_t phys_addr;
assert(strlen(packet) <= sizeof(obuf));
if (sscanf(packet, "M%" PRIx64 ",%zx:%s", &addr, &len, obuf) != 3) {
send_error_msg();
break;
}
/* translate to guest physical address first */
if (uhyve_gdb_guest_virt_to_phys(vcpufd, addr, &phys_addr)) {
send_error_msg();
} else {
hex2mem(obuf, guest_mem + phys_addr,
len);
send_okay_msg();
}
break; /* Wait for another command. */
}
case 'g':
{
/* Read general registers */
len = BUFMAX;
if (uhyve_gdb_read_registers(vcpufd, registers, &len) == -1) {
send_error_msg();
} else {
mem2hex(registers, obuf, len);
send_packet(obuf);
}
break; /* Wait for another command. */
}
case 'G':
{
/* Write general registers */
len = BUFMAX;
/* Call read_registers just to get len (not very efficient). */
if (uhyve_gdb_read_registers(vcpufd, registers, &len) == -1) {
send_error_msg();
break;
}
/* Packet looks like 'Gxxxxx', so we have to skip the first char */
hex2mem(packet + 1, registers, len);
if (uhyve_gdb_write_registers(vcpufd, registers, len) == -1) {
send_error_msg();
break;
}
send_okay_msg();
break; /* Wait for another command. */
}
case '?':
{
/* Return last signal */
send_response('S', sigval, true);
break; /* Wait for another command. */
}
case 'Z':
/* Insert a breakpoint */
case 'z':
{
/* Remove a breakpoint */
packet++;
if (sscanf(packet, "%" PRIx32 ",%" PRIx64 ",%zx",
&type, &addr, &len) != 3) {
send_error_msg();
break;
}
uint64_t phys_addr;
if (uhyve_gdb_guest_virt_to_phys(vcpufd, addr, &phys_addr)) {
send_error_msg();
} else {
if (command == 'Z')
ret = uhyve_gdb_add_breakpoint(vcpufd, type, phys_addr, len);
else
ret = uhyve_gdb_remove_breakpoint(vcpufd, type, phys_addr, len);
if (ret == -1)
send_error_msg();
else
send_okay_msg();
}
break;
}
case 'k':
{
warnx("Debugger asked us to quit");
send_okay_msg();
break;
}
case 'D':
{
warnx("Debugger detached");
send_okay_msg();
return;
}
default:
send_not_supported_msg();
break;
}
}
}
void uhyve_gdb_handle_exception(int vcpufd, int sigval)
{
gdb_handle_exception(vcpufd, sigval);
}
static void gdb_stub_start(int vcpufd)
{
wait_for_connect();
gdb_handle_exception(vcpufd, GDB_SIGNAL_FIRST);
}
int uhyve_gdb_init(int vcpufd)
{
/*
* GDB clients can change memory, and software breakpoints work by
* replacing instructions with int3's.
*/
if (mprotect(guest_mem, guest_size, PROT_READ | PROT_WRITE | PROT_EXEC) == -1)
err(1, "GDB: Cannot remove guest memory protection");
/* Notify the debugger that we are dying. */
atexit(uhyve_gdb_handle_term);
gdb_stub_start(vcpufd);
return 0;
}
void uhyve_gdb_handle_term(void)
{
/* TODO: this is graceful shutdown forcing the return value to zero,
* any way to pass an error code when things go wrong ? */
send_response('W', 0, true);
}
static int kvm_arch_insert_sw_breakpoint(struct breakpoint_t *bp)
{
uint8_t *insn = bp->addr + guest_mem;
bp->saved_insn = *insn;
/*
* We just modify the first byte even if the instruction is multi-byte.
* The debugger keeps track of the length of the instruction. The
* consequence of this is that we don't have to set all other bytes as
* NOP's.
*/
*insn = int3;
return 0;
}
static int kvm_arch_remove_sw_breakpoint(struct breakpoint_t *bp)
{
uint8_t *insn = bp->addr + guest_mem;
assert(*insn == int3);
*insn = bp->saved_insn;
return 0;
}
static int uhyve_gdb_update_guest_debug(int vcpufd)
{
struct kvm_guest_debug dbg = { 0 };
struct breakpoint_t *bp;
const uint8_t type_code[] = {
/* Break on instruction execution only. */
[GDB_BREAKPOINT_HW] = 0x0,
/* Break on data writes only. */
[GDB_WATCHPOINT_WRITE] = 0x1,
/* Break on data reads only. */
[GDB_WATCHPOINT_READ] = 0x2,
/* Break on data reads or writes but not instruction fetches. */
[GDB_WATCHPOINT_ACCESS] = 0x3
};
const uint8_t len_code[] = {
/*
* 00 1-byte length.
* 01 2-byte length.
* 10 8-byte length.
* 11 4-byte length.
*/
[1] = 0x0,[2] = 0x1,[4] = 0x3,[8] = 0x2
};
int n = 0;
if (stepping)
dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
if (!SLIST_EMPTY(&sw_breakpoints))
dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
if (!SLIST_EMPTY(&hw_breakpoints)) {
dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
/* Enable global breakpointing (across all threads) on the control
* debug register. */
dbg.arch.debugreg[7] = 1 << 9;
dbg.arch.debugreg[7] |= 1 << 10;
SLIST_FOREACH(bp, &hw_breakpoints, entries) {
assert(bp->type != GDB_BREAKPOINT_SW);
dbg.arch.debugreg[n] = bp->addr;
/* global breakpointing */
dbg.arch.debugreg[7] |= (2 << (n * 2));
/* read/write fields */
dbg.arch.debugreg[7] |=
(type_code[bp->type] << (16 + n * 4));
/* Length fields */
dbg.arch.debugreg[7] |=
((uint32_t) len_code[bp->len] << (18 + n * 4));
n++;
}
}
kvm_ioctl(vcpufd, KVM_SET_GUEST_DEBUG, &dbg);
return 0;
}
static struct breakpoint_t *bp_list_find(gdb_breakpoint_type type,
uint64_t addr, size_t len)
{
struct breakpoint_t *bp;
switch (type) {
case GDB_BREAKPOINT_SW:
SLIST_FOREACH(bp, &sw_breakpoints, entries) {
if (bp->addr == addr && bp->len == len)
return bp;
}
break;
case GDB_BREAKPOINT_HW:
case GDB_WATCHPOINT_WRITE:
case GDB_WATCHPOINT_READ:
case GDB_WATCHPOINT_ACCESS:
/* We only support hardware watchpoints. */
SLIST_FOREACH(bp, &hw_breakpoints, entries) {
if (bp->addr == addr && bp->len == len)
return bp;
}
break;
default:
assert(0);
}
return NULL;
}
/*
* Adds a new breakpoint to the list of breakpoints. Returns the found or
* created breakpoint. Returns NULL in case of failure or if we reached the max
* number of allowed hardware breakpoints (4).
*/
static struct breakpoint_t *bp_list_insert(gdb_breakpoint_type type,
uint64_t addr, size_t len)
{
struct breakpoint_t *bp;
bp = bp_list_find(type, addr, len);
if (bp) {
bp->refcount++;
return bp;
}
bp = malloc(sizeof(struct breakpoint_t));
if (bp == NULL)
return NULL;
bp->addr = addr;
bp->type = type;
bp->len = len;
bp->refcount = 1;
switch (type) {
case GDB_BREAKPOINT_SW:
SLIST_INSERT_HEAD(&sw_breakpoints, bp, entries);
break;
case GDB_BREAKPOINT_HW:
case GDB_WATCHPOINT_WRITE:
case GDB_WATCHPOINT_READ:
case GDB_WATCHPOINT_ACCESS:
/* We only support hardware watchpoints. */
if (nr_hw_breakpoints == MAX_HW_BREAKPOINTS)
return NULL;
nr_hw_breakpoints++;
SLIST_INSERT_HEAD(&hw_breakpoints, bp, entries);
break;
default:
assert(0);
}
return bp;
}
/*
* Removes a breakpoint from the list of breakpoints.
* Returns -1 if the breakpoint is not in the list.
*/
static int bp_list_remove(gdb_breakpoint_type type, uint64_t addr, size_t len)
{
struct breakpoint_t *bp = NULL;
bp = bp_list_find(type, addr, len);
if (!bp)
return -1;
bp->refcount--;
if (bp->refcount > 0)
return 0;
switch (type) {
case GDB_BREAKPOINT_SW:
SLIST_REMOVE(&sw_breakpoints, bp, breakpoint_t, entries);
break;
case GDB_BREAKPOINT_HW:
case GDB_WATCHPOINT_WRITE:
case GDB_WATCHPOINT_READ:
case GDB_WATCHPOINT_ACCESS:
/* We only support hardware watchpoints. */
SLIST_REMOVE(&hw_breakpoints, bp, breakpoint_t, entries);
nr_hw_breakpoints--;
break;
default:
assert(0);
}
free(bp);
return 0;
}
int uhyve_gdb_read_registers(int vcpufd, uint8_t * registers, size_t * len)
{
struct kvm_regs kregs;
struct kvm_sregs sregs;
struct uhyve_gdb_regs *gregs = (struct uhyve_gdb_regs *)registers;
int ret;
kvm_ioctl(vcpufd, KVM_GET_REGS, &kregs);
kvm_ioctl(vcpufd, KVM_GET_SREGS, &sregs);
if (*len < sizeof(struct uhyve_gdb_regs))
return -1;
*len = sizeof(struct uhyve_gdb_regs);
gregs->rax = kregs.rax;
gregs->rbx = kregs.rbx;
gregs->rcx = kregs.rcx;
gregs->rdx = kregs.rdx;
gregs->rsi = kregs.rsi;
gregs->rdi = kregs.rdi;
gregs->rbp = kregs.rbp;
gregs->rsp = kregs.rsp;
gregs->r8 = kregs.r8;
gregs->r9 = kregs.r9;
gregs->r10 = kregs.r10;
gregs->r11 = kregs.r11;
gregs->rip = kregs.rip;
gregs->eflags = kregs.rflags;
gregs->cs = sregs.cs.selector;
gregs->ss = sregs.ss.selector;
gregs->ds = sregs.ds.selector;
gregs->es = sregs.es.selector;
gregs->fs = sregs.fs.selector;
gregs->gs = sregs.gs.selector;
return 0;
}
int uhyve_gdb_write_registers(int vcpufd, uint8_t * registers, size_t len)
{
struct kvm_regs kregs;
struct kvm_sregs sregs;
struct uhyve_gdb_regs *gregs = (struct uhyve_gdb_regs *)registers;
int ret;
/* Let's read all registers just in case we miss filling one of them. */
kvm_ioctl(vcpufd, KVM_GET_REGS, &kregs);
kvm_ioctl(vcpufd, KVM_GET_SREGS, &sregs);
if (len < sizeof(struct uhyve_gdb_regs))
return -1;
kregs.rax = gregs->rax;
kregs.rbx = gregs->rbx;
kregs.rcx = gregs->rcx;
kregs.rdx = gregs->rdx;
kregs.rsi = gregs->rsi;
kregs.rdi = gregs->rdi;
kregs.rbp = gregs->rbp;
kregs.rsp = gregs->rsp;
kregs.r8 = gregs->r8;
kregs.r9 = gregs->r9;
kregs.r10 = gregs->r10;
kregs.r11 = gregs->r11;
kregs.rip = gregs->rip;
kregs.rflags = gregs->eflags;
/* XXX: not sure if just setting .selector is enough. */
sregs.cs.selector = gregs->cs;
sregs.ss.selector = gregs->ss;
sregs.ds.selector = gregs->ds;
sregs.es.selector = gregs->es;
sregs.fs.selector = gregs->fs;
sregs.gs.selector = gregs->gs;
kvm_ioctl(vcpufd, KVM_SET_REGS, &kregs);
kvm_ioctl(vcpufd, KVM_SET_SREGS, &sregs);
return 0;
}
int uhyve_gdb_add_breakpoint(int vcpufd, gdb_breakpoint_type type,
uint64_t addr, size_t len)
{
struct breakpoint_t *bp;
assert(type < GDB_BREAKPOINT_MAX);
if (bp_list_find(type, addr, len))
return 0;
bp = bp_list_insert(type, addr, len);
if (bp == NULL)
return -1;
if (type == GDB_BREAKPOINT_SW)
kvm_arch_insert_sw_breakpoint(bp);
if (uhyve_gdb_update_guest_debug(vcpufd) == -1)
return -1;
return 0;
}
int uhyve_gdb_remove_breakpoint(int vcpufd, gdb_breakpoint_type type,
uint64_t addr, size_t len)
{
struct breakpoint_t *bp;
assert(type < GDB_BREAKPOINT_MAX);
if (type == GDB_BREAKPOINT_SW) {
bp = bp_list_find(type, addr, len);
if (bp)
kvm_arch_remove_sw_breakpoint(bp);
}
if (bp_list_remove(type, addr, len) == -1)
return -1;
if (uhyve_gdb_update_guest_debug(vcpufd) == -1)
return -1;
return 0;
}
int uhyve_gdb_enable_ss(int vcpufd)
{
stepping = true;
if (uhyve_gdb_update_guest_debug(vcpufd) == -1)
return -1;
return 0;
}
int uhyve_gdb_disable_ss(int vcpufd)
{
stepping = false;
if (uhyve_gdb_update_guest_debug(vcpufd) == -1)
return -1;
return 0;
}
/* Convert a guest virtual address into the correspondign physical address */
int uhyve_gdb_guest_virt_to_phys(int vcpufd, const uint64_t virt, uint64_t * phys)
{
struct kvm_translation kt;
kt.linear_address = virt;
kvm_ioctl(vcpufd, KVM_TRANSLATE, &kt);
*phys = kt.physical_address;
return 0;
}
#endif

View File

@ -1,77 +0,0 @@
/*
* This file was adapted from the solo5/ukvm code base, initial copyright block
* follows:
*/
/*
* Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file
*
* This file is part of ukvm, a unikernel monitor.
*
* Permission to use, copy, modify, and/or distribute this software
* for any purpose with or without fee is hereby granted, provided
* that the above copyright notice and this permission notice appear
* in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef UHYVE_GDB_X86_64_H
#define UHYVE_GDB_X86_64_H
#include <stdint.h>
#include <inttypes.h>
/*
* X86_64
* XXX: Can't find any gdb include file with the list of registers per
* architecture (something like ia64_regs.h). The closest I can get is a
* list of the registers from gdb (debugging an ordinary x86_64 binary):
*
* (gdb) info registers
* rax 0x0 0
* rbx 0x0 0
* rcx 0x0 0
* ...
* fs 0x0 0
* gs 0x0 0
* (gdb)
*/
struct uhyve_gdb_regs {
uint64_t rax;
uint64_t rbx;
uint64_t rcx;
uint64_t rdx;
uint64_t rsi;
uint64_t rdi;
uint64_t rbp;
uint64_t rsp;
uint64_t r8;
uint64_t r9;
uint64_t r10;
uint64_t r11;
uint64_t r12;
uint64_t r13;
uint64_t r14;
uint64_t r15;
uint64_t rip;
uint32_t eflags;
uint32_t cs;
uint32_t ss;
uint32_t ds;
uint32_t es;
uint32_t fs;
uint32_t gs;
uint8_t st[8][10];
};
#endif /* UHYVE_GDB_X86_64_H */

View File

@ -1,76 +0,0 @@
/*
* This file was adapted from the solo5/ukvm code base, initial copyright block
* follows:
*/
/*
* Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file
*
* This file is part of ukvm, a unikernel monitor.
*
* Permission to use, copy, modify, and/or distribute this software
* for any purpose with or without fee is hereby granted, provided
* that the above copyright notice and this permission notice appear
* in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef UHYVE_GDB_H
#define UHYVE_GDB_H
#include <stdint.h>
#include <inttypes.h>
/* GDB breakpoint/watchpoint types */
typedef enum _gdb_breakpoint_type {
/* Do not change these. The values have to match on the GDB client
* side. */
GDB_BREAKPOINT_SW = 0,
GDB_BREAKPOINT_HW,
GDB_WATCHPOINT_WRITE,
GDB_WATCHPOINT_READ,
GDB_WATCHPOINT_ACCESS,
GDB_BREAKPOINT_MAX
} gdb_breakpoint_type;
#define GDB_SIGNAL_FIRST 0
#define GDB_SIGNAL_QUIT 3
#define GDB_SIGNAL_KILL 9
#define GDB_SIGNAL_TRAP 5
#define GDB_SIGNAL_SEGV 11
#define GDB_SIGNAL_TERM 15
#define GDB_SIGNAL_IO 23
#define GDB_SIGNAL_DEFAULT 144
/* prototypes */
int uhyve_gdb_enable_ss(int vcpufd);
int uhyve_gdb_disable_ss(int vcpufd);
int uhyve_gdb_read_registers(int vcpufd, uint8_t *reg, size_t *len);
int uhyve_gdb_write_registers(int vcpufd, uint8_t *reg, size_t len);
int uhyve_gdb_add_breakpoint(int vcpufd, gdb_breakpoint_type type,
uint64_t addr, size_t len);
int uhyve_gdb_remove_breakpoint(int vcpufd, gdb_breakpoint_type type,
uint64_t addr, size_t len);
int uhyve_gdb_guest_virt_to_phys(int vcpufd, const uint64_t virt,
uint64_t *phys);
/* interface with uhyve.c */
void uhyve_gdb_handle_exception(int vcpufd, int sigval);
void uhyve_gdb_handle_term(void);
int uhyve_gdb_init(int vcpufd);
#ifdef __x86_64__
#include "uhyve-gdb-x86_64.h"
#else
#include "uhyve-gdb-aarch64.h"
#endif
#endif /* UHYVE_GDB_H */

View File

@ -1,873 +0,0 @@
/*
* Copyright (c) 2018, Simon Pickartz, RWTH Aachen University
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define _GNU_SOURCE
#include <stdbool.h>
#include <stdlib.h>
#include <arpa/inet.h>
#include <infiniband/verbs.h>
#include <errno.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "uhyve-migration.h"
#include "uhyve.h"
#ifdef __RDMA_MIGRATION__
#define IB_USE_ODP (0)
#define IB_CQ_ENTRIES (1)
#define IB_MAX_INLINE_DATA (0)
#define IB_MAX_DEST_RD_ATOMIC (1)
#define IB_MIN_RNR_TIMER (1)
#define IB_MAX_SEND_WR (8192) // TODO: should be
// com_hndl.dev_attr_ex.orig_attr.max_qp_wr
// fix for mlx_5 adapter
#define IB_MAX_RECV_WR (1)
#define IB_MAX_SEND_SGE (1)
#define IB_MAX_RECV_SGE (1)
typedef enum ib_wr_ids {
IB_WR_NO_ID = 0,
IB_WR_WRITE_LAST_PAGE_ID,
IB_WR_RECV_LAST_PAGE_ID,
IB_WR_BASE_ID
} ib_wr_ids_t;
uint64_t cur_wr_id = IB_WR_BASE_ID;
typedef struct qp_info {
uint32_t qpn;
uint16_t lid;
uint16_t psn;
uint32_t *keys;
uint64_t addr;
} qp_info_t;
typedef struct com_hndl {
struct ibv_context *ctx; /* device context */
struct ibv_device_attr_ex dev_attr_ex; /* extended device attributes */
struct ibv_port_attr port_attr; /* port attributes */
struct ibv_pd *pd; /* protection domain */
struct ibv_mr **mrs; /* memory regions */
struct ibv_cq *cq; /* completion queue */
struct ibv_qp *qp; /* queue pair */
struct ibv_comp_channel *comp_chan; /* comp. event channel */
qp_info_t loc_qp_info;
qp_info_t rem_qp_info;
uint8_t used_port; /* port of the IB device */
uint8_t *buf; /* the guest memory (with potential gaps!) */
size_t mr_cnt; /* number of memory regions */
} com_hndl_t;
static com_hndl_t com_hndl;
static struct ibv_send_wr *send_list = NULL;
static struct ibv_send_wr *send_list_last = NULL;
static size_t send_list_length = 0;
/**
* \brief Prints info of a send_wr
*
* \param id the ID of the send_wr
*/
static inline
void print_send_wr_info(uint64_t id)
{
struct ibv_send_wr *search_wr = send_list;
/* find send_wr with id */
while(search_wr) {
if (search_wr->wr_id == id) {
fprintf(stderr, "[INFO] WR_ID: %llu; LADDR: 0x%llx; RADDR: 0x%llx; SIZE: %llu\n",
search_wr->wr_id,
search_wr->sg_list->addr,
search_wr->wr.rdma.remote_addr,
search_wr->sg_list->length);
break;
}
search_wr = search_wr->next;
}
if (search_wr == NULL) {
fprintf(stderr, "[ERROR] Could not find send_wr with ID %llu\n", id);
}
}
/**
* \brief Initializes the IB communication structures
*
* \param com_hndl the structure containing all communication relevant infos
* \param buf the buffer that should be registrered with the QP
*
* This function sets up the IB communication channel. It registers the 'buf'
* with a new protection domain. On its termination there is a QP in the INIT
* state ready to be connected with the remote side.
*/
static void
init_com_hndl(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
{
/* initialize com_hndl */
memset(&com_hndl, 0, sizeof(com_hndl));
/* the guest physical memory is the communication buffer */
com_hndl.buf = guest_mem;
com_hndl.mr_cnt = mem_chunk_cnt;
struct ibv_device **device_list = NULL;
int num_devices = 0;
bool active_port_found = false;
/* determine first available device */
if ((device_list = ibv_get_device_list(&num_devices)) == NULL) {
fprintf(stderr,
"[ERROR] Could not determine available IB devices "
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
/* find device with active port */
size_t cur_dev = 0;
for (cur_dev=0; cur_dev<num_devices; ++cur_dev){
/* open the device context */
if ((com_hndl.ctx = ibv_open_device(device_list[cur_dev])) == NULL) {
fprintf(stderr,
"[ERROR] Could not open the device context "
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
/* query extended device capabilities (e.g., to check for ODP support */
struct ibv_query_device_ex_input device_ex_input;
if (ibv_query_device_ex(com_hndl.ctx, &device_ex_input, &com_hndl.dev_attr_ex) < 0) {
fprintf(stderr,
"[ERROR] Could not query extended device attributes "
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
/* determine port count via normal device query (necessary for mlx_5) */
if (ibv_query_device(com_hndl.ctx, &com_hndl.dev_attr_ex.orig_attr) < 0) {
fprintf(stderr,
"[ERROR] Could not query normal device attributes "
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
/* check all ports */
size_t num_ports = com_hndl.dev_attr_ex.orig_attr.phys_port_cnt;
for (size_t cur_port=0; cur_port<=num_ports; ++cur_port) {
/* query current port */
if (ibv_query_port(com_hndl.ctx, cur_port, &com_hndl.port_attr) < 0){
fprintf(stderr,
"[ERROR] Could not query port %u "
"- %d (%s). Abort!\n",
cur_port,
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
if (com_hndl.port_attr.state == IBV_PORT_ACTIVE) {
active_port_found = 1;
com_hndl.used_port = cur_port;
break;
}
}
/* close this device if no active port was found */
if (!active_port_found) {
if (ibv_close_device(com_hndl.ctx) < 0) {
fprintf(stderr,
"[ERROR] Could not close the device context "
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
} else {
break;
}
}
if (!active_port_found) {
fprintf(stderr, "[ERROR] No active port found. Abort!\n");
exit(EXIT_FAILURE);
}
fprintf(stderr, "[INFO] Using device '%s' and port %u\n",
ibv_get_device_name(device_list[cur_dev]),
com_hndl.used_port);
/* allocate protection domain */
if ((com_hndl.pd = ibv_alloc_pd(com_hndl.ctx)) == NULL) {
fprintf(stderr,
"[ERROR] Could not allocate protection domain "
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
/* register guest memory chunks with the protection domain */
int i = 0;
com_hndl.mrs = (struct ibv_mr**)malloc(sizeof(struct ibv_mr*)*com_hndl.mr_cnt);
int access_flags = (IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE);
if ((IB_USE_ODP) &&
(com_hndl.dev_attr_ex.odp_caps.general_caps & IBV_ODP_SUPPORT) &&
(com_hndl.dev_attr_ex.odp_caps.per_transport_caps.rc_odp_caps & IBV_ODP_SUPPORT_WRITE)) {
access_flags |= IBV_ACCESS_ON_DEMAND;
}
for (i=0; i<com_hndl.mr_cnt; ++i) {
if ((com_hndl.mrs[i] = ibv_reg_mr(com_hndl.pd,
mem_chunks[i].ptr,
mem_chunks[i].size,
access_flags)) == NULL) {
fprintf(stderr,
"[ERROR] Could not register the memory region #%d (ptr: %llx; size: %llu) "
"- %d (%s). Abort!\n",
i,
mem_chunks[i].ptr,
mem_chunks[i].size,
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
fprintf(stderr, "[INFO] com_hndl.mrs[%d]->addr = 0x%llx; com_hndl->mrs[%d].length = %llu\n",
i,
com_hndl.mrs[i]->addr,
i,
com_hndl.mrs[i]->length);
}
/* create completion event channel */
if ((com_hndl.comp_chan =
ibv_create_comp_channel(com_hndl.ctx)) == NULL) {
fprintf(stderr,
"[ERROR] Could not create the completion channel "
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
/* create the completion queue */
if ((com_hndl.cq = ibv_create_cq(com_hndl.ctx,
IB_CQ_ENTRIES,
NULL,
com_hndl.comp_chan,
0)) == NULL) {
fprintf(stderr,
"[ERROR] Could not create the completion queue "
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
/* create send and recv queue pair and initialize it */
struct ibv_qp_init_attr init_attr = {
.send_cq = com_hndl.cq,
.recv_cq = com_hndl.cq,
.cap = {
.max_send_wr = IB_MAX_SEND_WR,
.max_recv_wr = IB_MAX_RECV_WR,
.max_send_sge = IB_MAX_SEND_SGE,
.max_recv_sge = IB_MAX_RECV_SGE,
.max_inline_data = IB_MAX_INLINE_DATA
},
.qp_type = IBV_QPT_RC,
.sq_sig_all = 0 /* we do not want a CQE for each WR */
};
if ((com_hndl.qp = ibv_create_qp(com_hndl.pd, &init_attr)) == NULL) {
fprintf(stderr,
"[ERROR] Could not create the queue pair "
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
struct ibv_qp_attr attr = {
.qp_state = IBV_QPS_INIT,
.pkey_index = 0,
.port_num = com_hndl.used_port,
.qp_access_flags = (IBV_ACCESS_REMOTE_WRITE)
};
if (ibv_modify_qp(com_hndl.qp,
&attr,
IBV_QP_STATE |
IBV_QP_PKEY_INDEX |
IBV_QP_PORT |
IBV_QP_ACCESS_FLAGS) < 0) {
fprintf(stderr,
"[ERROR] Could not set QP into init state "
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
/* fill in local qp_info */
com_hndl.loc_qp_info.qpn = com_hndl.qp->qp_num;
com_hndl.loc_qp_info.psn = lrand48() & 0xffffff;
com_hndl.loc_qp_info.addr = (uint64_t)com_hndl.buf;
com_hndl.loc_qp_info.lid = com_hndl.port_attr.lid;
com_hndl.loc_qp_info.keys = (uint32_t*)malloc(sizeof(uint32_t)*com_hndl.mr_cnt);
for (i=0; i<com_hndl.mr_cnt; ++i) {
com_hndl.loc_qp_info.keys[i] = com_hndl.mrs[i]->rkey;
}
}
/**
* \brief Frees IB related resources
*
* \param com_hndl the structure containing all communication relevant infos
*/
static void
destroy_com_hndl(void)
{
if (ibv_destroy_qp(com_hndl.qp) < 0) {
fprintf(stderr,
"[ERROR] Could not destroy the queue pair "
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
if (ibv_destroy_cq(com_hndl.cq) < 0) {
fprintf(stderr,
"[ERROR] Could not deallocate the protection domain "
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
if (ibv_destroy_comp_channel(com_hndl.comp_chan) < 0) {
fprintf(stderr,
"[ERROR] Could not destroy the completion channel "
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
int i = 0;
for (i=0; i<com_hndl.mr_cnt; ++i) {
if (ibv_dereg_mr(com_hndl.mrs[i]) < 0) {
fprintf(stderr,
"[ERROR] Could not deregister MR #%d "
"- %d (%s). Abort!\n",
i,
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
}
if (ibv_dealloc_pd(com_hndl.pd) < 0) {
fprintf(stderr,
"[ERROR] Could not deallocate the protection domain "
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
if (ibv_close_device(com_hndl.ctx) < 0) {
fprintf(stderr,
"[ERROR] Could not close the device context "
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
/* free dynamic data structures */
free(com_hndl.loc_qp_info.keys);
free(com_hndl.rem_qp_info.keys);
free(com_hndl.mrs);
com_hndl.loc_qp_info.keys = NULL;
com_hndl.rem_qp_info.keys = NULL;
com_hndl.mrs = NULL;
}
/**
* \brief Connects the QP created within init_com_hndl
*
* \param com_hndl the structure containing all communication relevant infos
*
* This function performs the actual connection setup between the two QPs.
*/
static void
con_com_buf(void) {
/* transistion to ready-to-receive state */
struct ibv_qp_attr qp_attr = {
.qp_state = IBV_QPS_RTR,
.path_mtu = IBV_MTU_2048,
.dest_qp_num = com_hndl.rem_qp_info.qpn,
.rq_psn = com_hndl.rem_qp_info.psn,
.max_dest_rd_atomic = IB_MAX_DEST_RD_ATOMIC,
.min_rnr_timer = IB_MIN_RNR_TIMER,
.ah_attr = {
.is_global = 0,
.sl = 0,
.src_path_bits = 0,
.dlid = com_hndl.rem_qp_info.lid,
.port_num = com_hndl.used_port,
}
};
if (ibv_modify_qp(com_hndl.qp,
&qp_attr,
IBV_QP_STATE |
IBV_QP_PATH_MTU |
IBV_QP_DEST_QPN |
IBV_QP_RQ_PSN |
IBV_QP_MAX_DEST_RD_ATOMIC |
IBV_QP_MIN_RNR_TIMER |
IBV_QP_AV)) {
fprintf(stderr,
"[ERROR] Could not put QP into RTR state"
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(errno);
}
/* transistion to ready-to-send state */
qp_attr.qp_state = IBV_QPS_RTS;
qp_attr.timeout = 14;
qp_attr.retry_cnt = 7;
qp_attr.rnr_retry = 7; /* infinite retrys on RNR NACK */
qp_attr.sq_psn = com_hndl.loc_qp_info.psn;
qp_attr.max_rd_atomic = 1;
if (ibv_modify_qp(com_hndl.qp, &qp_attr,
IBV_QP_STATE |
IBV_QP_TIMEOUT |
IBV_QP_RETRY_CNT |
IBV_QP_RNR_RETRY |
IBV_QP_SQ_PSN |
IBV_QP_MAX_QP_RD_ATOMIC)) {
fprintf(stderr,
"[ERROR] Could not put QP into RTS state"
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(errno);
}
}
/**
* \brief Set the destination node for a migration
*
* \param ip_str a string containing the IPv4 addr of the destination
* \param port the migration port
*/
static void
exchange_qp_info(bool server)
{
size_t keys_size = sizeof(uint32_t)*com_hndl.mr_cnt;
int res = 0;
if (server) {
/* general QP info */
res = recv_data(&com_hndl.rem_qp_info, sizeof(qp_info_t));
res = send_data(&com_hndl.loc_qp_info, sizeof(qp_info_t));
/* remote keys */
com_hndl.rem_qp_info.keys = (uint32_t*)malloc(keys_size);
res = recv_data(com_hndl.rem_qp_info.keys, keys_size);
res = send_data(com_hndl.loc_qp_info.keys, keys_size);
} else {
/* general QP info */
res = send_data(&com_hndl.loc_qp_info, sizeof(qp_info_t));
res = recv_data(&com_hndl.rem_qp_info, sizeof(qp_info_t));
/* remote keys */
com_hndl.rem_qp_info.keys = (uint32_t*)malloc(keys_size);
res = send_data(com_hndl.loc_qp_info.keys, keys_size);
res = recv_data(com_hndl.rem_qp_info.keys, keys_size);
}
fprintf(stderr, "[INFO] loc_qp_info (QPN: %lu; LID: %lu; PSN: %lu; ADDR: 0x%x ",
com_hndl.loc_qp_info.qpn,
com_hndl.loc_qp_info.lid,
com_hndl.loc_qp_info.psn,
com_hndl.loc_qp_info.addr);
int i = 0;
for (i=0; i<com_hndl.mr_cnt; ++i) {
fprintf(stderr, "KEY[%d]: %lu; ", i, com_hndl.loc_qp_info.keys[i]);
}
printf("\b\b)\n");
fprintf(stderr, "[INFO] rem_qp_info (QPN: %lu; LID: %lu; PSN: %lu; ADDR: 0x%x ",
com_hndl.rem_qp_info.qpn,
com_hndl.rem_qp_info.lid,
com_hndl.rem_qp_info.psn,
com_hndl.rem_qp_info.addr);
for (i=0; i<com_hndl.mr_cnt; ++i) {
fprintf(stderr, "KEY[%d]: %lu; ", i, com_hndl.rem_qp_info.keys[i]);
}
printf("\b\b)\n");
}
/**
* \brief Prepares the an 'ibv_send_wr'
*
* This function prepares an 'ibv_send_wr' structure that is prepared for the
* transmission of a single memory page using the IBV_WR_RDMA_WRITE verb.
*/
static inline struct ibv_send_wr *
prepare_send_list_elem(void)
{
/* create work request */
struct ibv_send_wr *send_wr = (struct ibv_send_wr*)calloc(1, sizeof(struct ibv_send_wr));
struct ibv_sge *sge = (struct ibv_sge*)calloc(1, sizeof(struct ibv_sge));
/* basic work request configuration */
send_wr->next = NULL;
send_wr->sg_list = sge;
send_wr->num_sge = 1;
send_wr->wr_id = ++cur_wr_id;
send_wr->opcode = IBV_WR_RDMA_WRITE;
return send_wr;
}
/**
* \brief Creates an 'ibv_send_wr' and appends it to the send_list
*
* \param addr the page table entry of the memory page
* \param addr_size the size of the page table entry
* \param page the buffer to be send in this WR
* \param page_size the size of the buffer
*
* This function creates an 'ibv_send_wr' structure and appends this to the
* global send_list. It sets the source/destination information and sets the
* IBV_SEND_SIGNALED flag as appropriate.
*/
static void
create_send_list_entry (void *addr, size_t addr_size, void *page, size_t page_size)
{
/* create work request */
struct ibv_send_wr *send_wr = prepare_send_list_elem();
/* configure source buffer */
int i = 0;
for (i=0; i<com_hndl.mr_cnt; ++i) {
if (((uint64_t)page >= (uint64_t)com_hndl.mrs[i]->addr) &&
((uint64_t)page < ((uint64_t)com_hndl.mrs[i]->addr + (uint64_t)com_hndl.mrs[i]->length))) {
send_wr->sg_list->addr = (uintptr_t)page;
send_wr->sg_list->length = page_size;
send_wr->sg_list->lkey = com_hndl.mrs[i]->lkey;
send_wr->wr.rdma.rkey = com_hndl.rem_qp_info.keys[i];
break;
}
}
/* did we find the correct memory region? */
if (i == com_hndl.mr_cnt) {
fprintf(stderr, "[ERROR] Could not find a valid MR for address 0x%llx!\n", page);
return;
}
/* configure destination buffer */
if (addr) {
send_wr->wr.rdma.remote_addr = com_hndl.rem_qp_info.addr + determine_dest_offset(*(size_t*)addr);
} else {
send_wr->wr.rdma.remote_addr = com_hndl.rem_qp_info.addr;
}
/* apped work request to send list */
if (send_list == NULL) {
send_list = send_list_last = send_wr;
} else {
send_list_last->next = send_wr;
send_list_last = send_list_last->next;
}
/* we have to request a CQE if max_send_wr is reached to avoid overflows */
if ((++send_list_length%com_hndl.dev_attr_ex.orig_attr.max_qp_wr) == 0) {
send_list_last->send_flags = IBV_SEND_SIGNALED;
}
}
/**
* \brief Prepares a send_list containing all memory defined by com_hndl.mrs
*
* This function creates as many send_wr items as required to cover all
* com_hndl.mrs in accordance with the maximum message size that can be
* transmitted per send_sr (com_hndl.port_attr.max_msg_sz).
*/
static inline
void enqueue_all_mrs(void)
{
uint64_t max_msg_sz = com_hndl.port_attr.max_msg_sz;
int i = 0;
/* send all MRs */
for (i=0; i<com_hndl.mr_cnt; ++i) {
uint64_t cur_mr_length = com_hndl.mrs[i]->length;
/* split the MR if it exceed the max_msg_sz */
size_t cur_chunk = 0, max_chunks = cur_mr_length/max_msg_sz;
for (cur_chunk; cur_chunk < max_chunks; ++cur_chunk) {
size_t cur_offset = cur_chunk*max_msg_sz;
size_t cur_glob_offset = cur_offset + (uint64_t)com_hndl.mrs[i]->addr - (uint64_t)guest_mem;
create_send_list_entry((void*)&cur_glob_offset, 0, (void*)((uint64_t)com_hndl.mrs[i]->addr+cur_offset), max_msg_sz);
}
/* do we have a remainder? */
uint64_t remainder = cur_mr_length%max_msg_sz;
if (remainder) {
size_t cur_offset = cur_mr_length-remainder;
size_t cur_glob_offset = cur_offset + (uint64_t)com_hndl.mrs[i]->addr - (uint64_t)guest_mem;
create_send_list_entry((void*)&cur_glob_offset, 0, (void*)((uint64_t)com_hndl.mrs[i]->addr+cur_offset), remainder);
}
}
}
/**
* \brief Sends the guest memory to the destination
*
* \param mode MIG_MODE_COMPLETE_DUMP sends the complete memory and
* MIG_MODE_INCREMENTAL_DUMP only the mapped guest pages
*/
void send_guest_mem(mig_mode_t mode, bool final_dump, size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
{
int res = 0, i = 0;
static bool ib_initialized = false;
/* prepare IB channel */
if (!ib_initialized) {
init_com_hndl(mem_chunk_cnt, mem_chunks);
exchange_qp_info(false);
con_com_buf();
ib_initialized = true;
}
/* determine migration mode */
switch (mode) {
case MIG_MODE_COMPLETE_DUMP:
enqueue_all_mrs();
break;
case MIG_MODE_INCREMENTAL_DUMP:
/* iterate guest page tables */
determine_dirty_pages(create_send_list_entry);
break;
default:
fprintf(stderr, "[ERROR] Unknown migration mode. Abort!\n");
exit(EXIT_FAILURE);
}
/* create a dumy WR request if there is nothing to send */
if (send_list_length == 0)
create_send_list_entry(NULL, 0, NULL, 0);
/* we have to wait for the last WR before informing dest */
if ((mode == MIG_MODE_COMPLETE_DUMP) || final_dump) {
send_list_last->wr_id = IB_WR_WRITE_LAST_PAGE_ID;
send_list_last->opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
send_list_last->send_flags = IBV_SEND_SIGNALED | IBV_SEND_SOLICITED;
send_list_last->imm_data = htonl(0x1);
} else {
send_list_last->wr_id = IB_WR_WRITE_LAST_PAGE_ID;
send_list_last->send_flags = IBV_SEND_SIGNALED;
}
printf("DEBUG: Send list length %d\n", send_list_length);
/* we have to call ibv_post_send() as long as 'send_list' contains elements */
struct ibv_wc wc;
struct ibv_send_wr *remaining_send_wr = NULL;
do {
/* send data */
remaining_send_wr = NULL;
if (ibv_post_send(com_hndl.qp, send_list, &remaining_send_wr) && (errno != ENOMEM)) {
fprintf(stderr,
"[ERROR] Could not post send"
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
/* wait for send WRs if CQ is full */
do {
if ((res = ibv_poll_cq(com_hndl.cq, 1, &wc)) < 0) {
fprintf(stderr,
"[ERROR] Could not poll on CQ"
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
} while (res < 1);
if (wc.status != IBV_WC_SUCCESS) {
fprintf(stderr,
"[ERROR] WR failed status %s (%d) for wr_id %llu\n",
ibv_wc_status_str(wc.status),
wc.status,
wc.wr_id);
print_send_wr_info(wc.wr_id);
}
send_list = remaining_send_wr;
} while (remaining_send_wr);
/* ensure that we receive the CQE for the last page */
if (wc.wr_id != IB_WR_WRITE_LAST_PAGE_ID) {
fprintf(stderr,
"[ERROR] WR failed status %s (%d) for wr_id %d\n",
ibv_wc_status_str(wc.status),
wc.status,
(int)wc.wr_id);
}
/* cleanup send_list */
struct ibv_send_wr *cur_send_wr = send_list;
struct ibv_send_wr *tmp_send_wr = NULL;
while (cur_send_wr != NULL) {
free(cur_send_wr->sg_list);
tmp_send_wr = cur_send_wr;
cur_send_wr = cur_send_wr->next;
free(tmp_send_wr);
}
send_list_length = 0;
/* do not close the channel in a pre-dump */
if (!final_dump)
return;
/* free IB-related resources */
destroy_com_hndl();
ib_initialized = false;
fprintf(stderr, "Guest memory sent!\n");
}
/**
* \brief Receives the guest memory from the source
*
* The receive participates in the IB connection setup and waits for the
* 'solicited' event sent with the last WR issued by the sender.
*/
void recv_guest_mem(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
{
int res = 0;
/* prepare IB channel */
init_com_hndl(mem_chunk_cnt, mem_chunks);
exchange_qp_info(true);
con_com_buf();
/* request notification on the event channel */
if (ibv_req_notify_cq(com_hndl.cq, 1) < 0) {
fprintf(stderr,
"[ERROR] Could request notify for completion queue "
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
/* post recv matching IBV_RDMA_WRITE_WITH_IMM */
struct ibv_cq *ev_cq;
void *ev_ctx;
struct ibv_sge sg;
struct ibv_recv_wr recv_wr;
struct ibv_recv_wr *bad_wr;
uint32_t recv_buf = 0;
memset(&sg, 0, sizeof(sg));
sg.addr = (uintptr_t)&recv_buf;
sg.length = sizeof(recv_buf);
sg.lkey = com_hndl.mrs[0]->lkey;
memset(&recv_wr, 0, sizeof(recv_wr));
recv_wr.wr_id = 0;
recv_wr.sg_list = &sg;
recv_wr.num_sge = 1;
if (ibv_post_recv(com_hndl.qp, &recv_wr, &bad_wr) < 0) {
fprintf(stderr,
"[ERROR] Could post recv - %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
/* wait for requested event */
if (ibv_get_cq_event(com_hndl.comp_chan, &ev_cq, &ev_ctx) < 0) {
fprintf(stderr,
"[ERROR] Could get event from completion channel "
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
/* acknowledge the event */
ibv_ack_cq_events(com_hndl.cq, 1);
/* free IB-related resources */
destroy_com_hndl();
fprintf(stderr, "Guest memory received!\n");
}
#endif /* __RDMA_MIGRATION__ */

View File

@ -1,277 +0,0 @@
/*
* Copyright (c) 2018, Simon Pickartz, RWTH Aachen University
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef __x86_64__
#define _GNU_SOURCE
#include <arpa/inet.h>
#include <errno.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "uhyve-migration.h"
#include "uhyve.h"
static struct sockaddr_in mig_server;
static int com_sock = 0;
static int listen_sock = 0;
static mig_type_t mig_type = MIG_TYPE_COLD;
/**
* \brief Returns the configured migration type
*/
mig_type_t
get_migration_type(void)
{
return mig_type;
}
/**
* \brief Sets the migration type
*
* \param mig_type_str A string defining the migration type
*/
void
set_migration_type(const char *mig_type_str)
{
if (mig_type_str == NULL)
return;
int i;
bool found_type = false;
for (i=0; i<sizeof(mig_type_conv)/sizeof(mig_type_conv[0]); ++i) {
if (!strcmp (mig_type_str, mig_type_conv[i].str)) {
mig_type = mig_type_conv[i].mig_type;
found_type = true;
}
}
/* we do not know this migration type */
if (!found_type) {
fprintf(stderr, "ERROR: Migration type '%s' not supported. Fallback to 'cold'\n", mig_type_str);
}
return;
}
/**
* \brief Closes a socket
*
* \param sock the socket to be closed
*/
static inline void
close_sock(int sock)
{
if (close(sock) < 0) {
fprintf(stderr,
"ERROR: Could not close the communication socket "
"- %d (%s). Abort!\n",
errno,
strerror(errno));
exit(EXIT_FAILURE);
}
}
/**
* \brief Set the destination node for a migration
*
* \param ip_str a string containing the IPv4 addr of the destination
* \param port the migration port
*/
void set_migration_target(const char *ip_str, int port)
{
/* determine server address */
memset(&mig_server, '0', sizeof(mig_server));
mig_server.sin_family = AF_INET;
mig_server.sin_port = htons(port);
int res = inet_pton(AF_INET, ip_str, &mig_server.sin_addr);
if (res == 0) {
fprintf(stderr, "'%s' is not a valid server address\n", ip_str);
} else if (res < 0) {
fprintf(stderr, "An error occured while retrieving the migration server address\n");
perror("inet_pton");
}
}
/**
* \brief Connects to a migration target via TCP/IP
*/
void connect_to_server(void)
{
int res = 0;
char buf[INET_ADDRSTRLEN];
if (inet_ntop(AF_INET, (const void*)&mig_server.sin_addr, buf, INET_ADDRSTRLEN) == NULL) {
perror("inet_ntop");
exit(EXIT_FAILURE);
}
if((com_sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
perror("socket");
exit(EXIT_FAILURE);
}
fprintf(stderr, "Trying to connect to migration server: %s\n", buf);
if (connect(com_sock, (struct sockaddr *)&mig_server, sizeof(mig_server)) < 0) {
perror("connect");
exit(EXIT_FAILURE);
}
fprintf(stderr, "Successfully connected to: %s\n", buf);
}
/**
* \brief Waits for a migration source to connect via TCP/IP
*
* \param listen_portno the port of the migration socket
*/
void wait_for_client(uint16_t listen_portno)
{
int client_addr_len = 0, res = 0;
struct sockaddr_in serv_addr;
struct sockaddr_in client_addr;
/* open migration socket */
fprintf(stderr, "Waiting for incomming migration request ...\n");
listen_sock = socket(AF_INET, SOCK_STREAM, 0);
memset(&serv_addr, '0', sizeof(serv_addr));
serv_addr.sin_family = AF_INET;
serv_addr.sin_addr.s_addr = htonl(INADDR_ANY);
serv_addr.sin_port = htons(listen_portno);
bind(listen_sock, (struct sockaddr*)&serv_addr, sizeof(serv_addr));
listen(listen_sock, 10);
client_addr_len = sizeof(struct sockaddr_in);
if ((com_sock = accept(listen_sock, &client_addr, &client_addr_len)) < 0) {
perror("accept");
exit(EXIT_FAILURE);
}
char buf[INET_ADDRSTRLEN];
if (inet_ntop(AF_INET, (const void*)&client_addr.sin_addr, buf, INET_ADDRSTRLEN) == NULL) {
perror("inet_ntop");
exit(EXIT_FAILURE);
}
fprintf(stderr, "Incomming migration from: %s\n", buf);
}
/**
* \brief Receives data from the migration socket
*
* \param buffer the destination buffer
* \param length the buffer size
*/
int recv_data(void *buffer, size_t length)
{
size_t bytes_received = 0;
while(bytes_received < length) {
bytes_received += recv(
com_sock,
(void*)((uint64_t)buffer+bytes_received),
length-bytes_received,
0);
}
return bytes_received;
}
/**
* \brief Sends data via the migration socket
*
* \param buffer the source buffer
* \param length the buffer size
*/
int send_data(void *buffer, size_t length)
{
size_t bytes_sent = 0;
while(bytes_sent < length) {
bytes_sent += send(
com_sock,
(void*)((uint64_t)buffer+bytes_sent),
length-bytes_sent,
0);
}
return bytes_sent;
}
/**
* \brief Closes the TCP connection
*/
void close_migration_channel(void)
{
if (listen_sock) {
close_sock(listen_sock);
}
close_sock(com_sock);
}
#ifndef __RDMA_MIGRATION__
void send_guest_mem(mig_mode_t mode, bool final_dump, size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
{
/* determine migration mode */
switch (mode) {
case MIG_MODE_INCREMENTAL_DUMP:
fprintf(stderr, "ERROR: Incremental dumps currently not supported via TCP/IP. Fallback to complete dump!\n");
case MIG_MODE_COMPLETE_DUMP:
send_data(guest_mem, guest_size);
break;
default:
fprintf(stderr, "ERROR: Unknown migration mode. Abort!\n");
exit(EXIT_FAILURE);
}
fprintf(stderr, "Guest memory sent!\n");
}
void recv_guest_mem(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
{
recv_data(guest_mem, guest_size);
fprintf(stderr, "Guest memory received!\n");
}
#endif /* __RDMA_MIGRATION__ */
#else
/* dummy implementation for aarch64 */
void set_migration_target(const char *ip_str, int port)
{
}
void set_migration_type(const char *mig_type_str)
{
}
#endif

View File

@ -1,91 +0,0 @@
#ifndef __UHYVE_MIGRATION_H__
/*
* Copyright (c) 2018, Simon Pickartz, RWTH Aachen University
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @author Simon Pickartz
* @file tools/uhyve-migration.h
* @brief Migration-related functions
*/
#define __UHYVE_MIGRATION_H__
#include <stdbool.h>
extern size_t guest_size;
extern uint8_t* guest_mem;
#define MIGRATION_PORT 1337
typedef enum {
MIG_MODE_COMPLETE_DUMP = 1,
MIG_MODE_INCREMENTAL_DUMP,
} mig_mode_t;
typedef enum {
MIG_TYPE_COLD = 0,
MIG_TYPE_LIVE,
} mig_type_t;
const static struct {
mig_type_t mig_type;
const char *str;
} mig_type_conv [] = {
{MIG_TYPE_COLD, "cold"},
{MIG_TYPE_LIVE, "live"},
};
typedef struct _mem_chunk {
size_t size;
uint8_t *ptr;
} mem_chunk_t;
typedef struct _migration_metadata {
uint32_t ncores;
size_t guest_size;
uint32_t no_checkpoint;
uint64_t elf_entry;
bool full_checkpoint;
} migration_metadata_t;
void set_migration_type(const char *mig_type_str);
mig_type_t get_migration_type(void);
void wait_for_client(uint16_t listen_portno);
void set_migration_target(const char *ip_str, int port);
void connect_to_server(void);
void close_migration_channel(void);
int recv_data(void *buffer, size_t length);
int send_data(void *buffer, size_t length);
void send_guest_mem(mig_mode_t mode, bool final_dump, size_t mem_chunk_cnt, mem_chunk_t *mem_chunks);
void recv_guest_mem(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks);
#endif /* __UHYVE_MIGRATION_H__ */

View File

@ -1,189 +0,0 @@
/* Copyright (c) 2015, IBM
* Author(s): Dan Williams <djwillia@us.ibm.com>
* Ricardo Koller <kollerr@us.ibm.com>
* Copyright (c) 2017, RWTH Aachen University
* Author(s): Tim van de Kamp <tim.van.de.kamp@rwth-aachen.de>
*
* Permission to use, copy, modify, and/or distribute this software
* for any purpose with or without fee is hereby granted, provided
* that the above copyright notice and this permission notice appear
* in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* We used several existing projects as guides
* kvmtest.c: http://lwn.net/Articles/658512/
* lkvm: http://github.com/clearlinux/kvmtool
*/
/*
* 15.1.2017: extend original version (https://github.com/Solo5/solo5)
* for HermitCore
*/
#include "uhyve-net.h"
#include <ctype.h>
/* TODO: create an array or equal for more then one netif */
static uhyve_netinfo_t netinfo;
//-------------------------------------- ATTACH LINUX TAP -----------------------------------------//
int attach_linux_tap(const char *dev)
{
struct ifreq ifr;
int fd, err;
// @<number> indicates a pre-existing open fd onto the correct device.
if (dev[0] == '@') {
fd = atoi(&dev[1]);
if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1)
return -1;
return fd;
}
fd = open("/dev/net/tun", O_RDWR | O_NONBLOCK);
// Initialize interface request for TAP interface
memset(&ifr, 0x00, sizeof(ifr));
ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
if (strlen(dev) > IFNAMSIZ) {
errno = EINVAL;
return -1;
}
strncpy(ifr.ifr_name, dev, IFNAMSIZ);
// Try to create OR attach to an existing device. The Linux API has no way
// to differentiate between the two
// create before a tap device with these commands:
//
// sudo ip tuntap add <devname> mode tap user <user>
// sudo ip addr add 10.0.5.1/24 broadcast 10.0.5.255
// sudo ip link set dev <devname> up
//
if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
err = errno;
close(fd);
errno = err;
return -1;
}
// If we got back a different device than the one requested, e.g. because
// the caller mistakenly passed in '%d' (yes, that's really in the Linux API)
// then fail
if (strncmp(ifr.ifr_name, dev, IFNAMSIZ) != 0) {
close(fd);
errno = ENODEV;
return -1;
}
// Attempt a zero-sized write to the device. If the device was freshly created
// (as opposed to attached to an existing ine) this will fail with EIO. Ignore
// any other error return since that may indicate the device is up
//
// If this check produces a false positive then caller's later writes to fd will
// fali with EIO, which is not great but at least we tried
char buf[1] = { 0 };
if (write(fd, buf, 0) == -1 && errno == EIO) {
close(fd);
errno = ENODEV;
return -1;
}
return fd;
}
//---------------------------------- GET MAC ----------------------------------------------//
char* uhyve_get_mac(void)
{
return netinfo.mac_str;
}
//---------------------------------- SET MAC ----------------------------------------------//
int uhyve_set_mac(void)
{
int mac_is_set = 0;
uint8_t guest_mac[6];
char* str = getenv("HERMIT_NETIF_MAC");
if (str)
{
const char *macptr = str;
const char *v_macptr = macptr;
// checking str is a valid MAC address
int i = 0;
int s = 0;
while(*v_macptr) {
if(isxdigit(*v_macptr)) {
i++;
} else if (*v_macptr == ':') {
if (i / 2 - 1 != s++)
break;
} else {
s = -1;
}
v_macptr++;
}
if (i != 12 || s != 5) {
warnx("Malformed mac address: %s\n", macptr);
} else {
snprintf(netinfo.mac_str, sizeof(netinfo.mac_str), "%s", macptr);
mac_is_set = 1;
}
}
if (!mac_is_set) {
int rfd = open("/dev/urandom", O_RDONLY);
if(rfd == -1)
err(1, "Could not open /dev/urandom\n");
int ret;
ret = read(rfd, guest_mac, sizeof(guest_mac));
// compare the number of bytes read with the size of guest_mac
assert(ret == sizeof(guest_mac));
close(rfd);
guest_mac[0] &= 0xfe; // creats a random MAC-address in the locally administered
guest_mac[0] |= 0x02; // address range which can be used without conflict with other public devices
// save the MAC address in the netinfo
snprintf(netinfo.mac_str, sizeof(netinfo.mac_str),
"%02x:%02x:%02x:%02x:%02x:%02x",
guest_mac[0], guest_mac[1], guest_mac[2],
guest_mac[3], guest_mac[4], guest_mac[5]);
}
return 0;
}
//-------------------------------------- SETUP NETWORK ---------------------------------------------//
int uhyve_net_init(const char *netif)
{
if (netif == NULL) {
err(1, "ERROR: no netif defined\n");
return -1;
}
// attaching netif
netfd = attach_linux_tap(netif);
if (netfd < 0) {
err(1, "Could not attach interface: %s\n", netif);
exit(1);
}
uhyve_set_mac();
return netfd;
}

View File

@ -1,60 +0,0 @@
#ifndef __UHYVE_NET_H__
#define __UHYVE_NET_H__
#include <linux/kvm.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <errno.h>
#include <unistd.h>
#include <sys/select.h>
#include <sys/stat.h>
/* network interface */
#include <sys/socket.h>
#include <linux/if.h>
#include <linux/if_tun.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <err.h>
extern int netfd;
// UHYVE_PORT_NETINFO
typedef struct {
/* OUT */
char mac_str[18];
} __attribute__((packed)) uhyve_netinfo_t;
// UHYVE_PORT_NETWRITE
typedef struct {
/* IN */
const void* data;
size_t len;
/* OUT */
int ret;
} __attribute__((packed)) uhyve_netwrite_t;
// UHYVE_PORT_NETREAD
typedef struct {
/* IN */
void* data;
/* IN / OUT */
size_t len;
/* OUT */
int ret;
} __attribute__((packed)) uhyve_netread_t;
// UHYVE_PORT_NETSTAT
typedef struct {
/* IN */
int status;
} __attribute__((packed)) uhyve_netstat_t;
int uhyve_net_init(const char *hermit_netif);
char* uhyve_get_mac(void);
#endif

View File

@ -1,55 +0,0 @@
/* Copyright (c) 2017, RWTH Aachen University
* Author(s): Daniel Krebs <github@daniel-krebs.net>
*
* Permission to use, copy, modify, and/or distribute this software
* for any purpose with or without fee is hereby granted, provided
* that the above copyright notice and this permission notice appear
* in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef UHYVE_SYSCALLS_H
#define UHYVE_SYSCALLS_H
#include <unistd.h>
#include <stddef.h>
typedef struct {
int fd;
const char* buf;
size_t len;
} __attribute__((packed)) uhyve_write_t;
typedef struct {
const char* name;
int flags;
int mode;
int ret;
} __attribute__((packed)) uhyve_open_t;
typedef struct {
int fd;
int ret;
} __attribute__((packed)) uhyve_close_t;
typedef struct {
int fd;
char* buf;
size_t len;
ssize_t ret;
} __attribute__((packed)) uhyve_read_t;
typedef struct {
int fd;
off_t offset;
int whence;
} __attribute__((packed)) uhyve_lseek_t;
#endif // UHYVE_SYSCALLS_H

File diff suppressed because it is too large Load Diff

View File

@ -1,98 +0,0 @@
#ifndef __UHYVE_CPU_H__
#define __UHYVE_CPU_H__
#ifndef _BITUL
#ifdef __ASSEMBLY__
#define _AC(X,Y) X
#define _AT(T,X) X
#else
#define __AC(X,Y) (X##Y)
#define _AC(X,Y) __AC(X,Y)
#define _AT(T,X) ((T)(X))
#endif
#define _BITUL(x) (_AC(1,UL) << (x))
#define _BITULL(x) (_AC(1,ULL) << (x))
#endif
/*
* EFLAGS bits
*/
#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
/*
* Basic CPU control in CR0
*/
#define X86_CR0_PE_BIT 0 /* Protection Enable */
#define X86_CR0_PE _BITUL(X86_CR0_PE_BIT)
#define X86_CR0_PG_BIT 31 /* Paging */
#define X86_CR0_PG _BITUL(X86_CR0_PG_BIT)
/*
* Intel CPU features in CR4
*/
#define X86_CR4_PAE_BIT 5 /* enable physical address extensions */
#define X86_CR4_PAE _BITUL(X86_CR4_PAE_BIT)
/*
* Intel long mode page directory/table entries
*/
#define X86_PDPT_P_BIT 0 /* Present */
#define X86_PDPT_P _BITUL(X86_PDPT_P_BIT)
#define X86_PDPT_RW_BIT 1 /* Writable */
#define X86_PDPT_RW _BITUL(X86_PDPT_RW_BIT)
#define X86_PDPT_PS_BIT 7 /* Page size */
#define X86_PDPT_PS _BITUL(X86_PDPT_PS_BIT)
/*
* GDT and KVM segment manipulation
*/
#define GDT_DESC_OFFSET(n) ((n) * 0x8)
#define GDT_GET_BASE(x) ( \
(((x) & 0xFF00000000000000) >> 32) | \
(((x) & 0x000000FF00000000) >> 16) | \
(((x) & 0x00000000FFFF0000) >> 16))
#define GDT_GET_LIMIT(x) (__u32)( \
(((x) & 0x000F000000000000) >> 32) | \
(((x) & 0x000000000000FFFF)))
/* Constructor for a conventional segment GDT (or LDT) entry */
/* This is a macro so it can be used in initializers */
#define GDT_ENTRY(flags, base, limit) \
((((base) & _AC(0xff000000, ULL)) << (56-24)) | \
(((flags) & _AC(0x0000f0ff, ULL)) << 40) | \
(((limit) & _AC(0x000f0000, ULL)) << (48-16)) | \
(((base) & _AC(0x00ffffff, ULL)) << 16) | \
(((limit) & _AC(0x0000ffff, ULL))))
#define GDT_GET_G(x) (__u8)(((x) & 0x0080000000000000) >> 55)
#define GDT_GET_DB(x) (__u8)(((x) & 0x0040000000000000) >> 54)
#define GDT_GET_L(x) (__u8)(((x) & 0x0020000000000000) >> 53)
#define GDT_GET_AVL(x) (__u8)(((x) & 0x0010000000000000) >> 52)
#define GDT_GET_P(x) (__u8)(((x) & 0x0000800000000000) >> 47)
#define GDT_GET_DPL(x) (__u8)(((x) & 0x0000600000000000) >> 45)
#define GDT_GET_S(x) (__u8)(((x) & 0x0000100000000000) >> 44)
#define GDT_GET_TYPE(x)(__u8)(((x) & 0x00000F0000000000) >> 40)
#define GDT_TO_KVM_SEGMENT(seg, gdt_table, sel) \
do { \
__u64 gdt_ent = gdt_table[sel]; \
seg.base = GDT_GET_BASE(gdt_ent); \
seg.limit = GDT_GET_LIMIT(gdt_ent); \
seg.selector = sel * 8; \
seg.type = GDT_GET_TYPE(gdt_ent); \
seg.present = GDT_GET_P(gdt_ent); \
seg.dpl = GDT_GET_DPL(gdt_ent); \
seg.db = GDT_GET_DB(gdt_ent); \
seg.s = GDT_GET_S(gdt_ent); \
seg.l = GDT_GET_L(gdt_ent); \
seg.g = GDT_GET_G(gdt_ent); \
seg.avl = GDT_GET_AVL(gdt_ent); \
} while (0)
#endif

View File

@ -1,813 +0,0 @@
/* Copyright (c) 2015, IBM
* Author(s): Dan Williams <djwillia@us.ibm.com>
* Ricardo Koller <kollerr@us.ibm.com>
* Copyright (c) 2017, RWTH Aachen University
* Author(s): Stefan Lankes <slankes@eonerc.rwth-aachen.de>
*
* Permission to use, copy, modify, and/or distribute this software
* for any purpose with or without fee is hereby granted, provided
* that the above copyright notice and this permission notice appear
* in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* We used several existing projects as guides
* kvmtest.c: http://lwn.net/Articles/658512/
* Solo5: https://github.com/Solo5/solo5
*/
/*
* 15.1.2017: extend original version (https://github.com/Solo5/solo5)
* for HermitCore
* 25.2.2017: add SMP support to enable more than one core
* 24.4.2017: add checkpoint/restore support,
* remove memory limit
*/
#define _GNU_SOURCE
#include <arpa/inet.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <errno.h>
#include <fcntl.h>
#include <sched.h>
#include <signal.h>
#include <limits.h>
#include <pthread.h>
#include <semaphore.h>
#include <elf.h>
#include <err.h>
#include <poll.h>
#include <sys/wait.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/eventfd.h>
#include <linux/const.h>
#include <linux/kvm.h>
#include "uhyve.h"
#include "uhyve-syscalls.h"
#include "uhyve-migration.h"
#include "uhyve-net.h"
#include "uhyve-gdb.h"
#include "proxy.h"
static bool restart = false;
static bool migration = false;
static pthread_t net_thread;
static int* vcpu_fds = NULL;
static pthread_mutex_t kvm_lock = PTHREAD_MUTEX_INITIALIZER;
extern bool verbose;
static char* guest_path = NULL;
static bool uhyve_gdb_enabled = false;
size_t guest_size = 0x20000000ULL;
bool full_checkpoint = false;
pthread_barrier_t barrier;
pthread_barrier_t migration_barrier;
pthread_t* vcpu_threads = NULL;
uint8_t* klog = NULL;
uint8_t* guest_mem = NULL;
uint32_t no_checkpoint = 0;
uint32_t ncores = 1;
uint64_t elf_entry;
int kvm = -1, vmfd = -1, netfd = -1, efd = -1;
uint8_t* mboot = NULL;
__thread struct kvm_run *run = NULL;
__thread int vcpufd = -1;
__thread uint32_t cpuid = 0;
static sem_t net_sem;
int uhyve_argc = -1;
int uhyve_envc = -1;
char **uhyve_argv = NULL;
extern char **environ;
char **uhyve_envp = NULL;
vcpu_state_t *vcpu_thread_states = NULL;
static sigset_t signal_mask;
typedef struct {
int argc;
int argsz[MAX_ARGC_ENVC];
int envc;
int envsz[MAX_ARGC_ENVC];
} __attribute__ ((packed)) uhyve_cmdsize_t;
typedef struct {
char **argv;
char **envp;
} __attribute__ ((packed)) uhyve_cmdval_t;
static uint64_t memparse(const char *ptr)
{
// local pointer to end of parsed string
char *endptr;
// parse number
uint64_t size = strtoull(ptr, &endptr, 0);
// parse size extension, intentional fall-through
switch (*endptr) {
case 'E':
case 'e':
size <<= 10;
case 'P':
case 'p':
size <<= 10;
case 'T':
case 't':
size <<= 10;
case 'G':
case 'g':
size <<= 10;
case 'M':
case 'm':
size <<= 10;
case 'K':
case 'k':
size <<= 10;
endptr++;
default:
break;
}
return size;
}
// Just close file descriptor if not already done
static void close_fd(int* fd)
{
if (*fd != -1) {
close(*fd);
*fd = -1;
}
}
static void uhyve_exit(void* arg)
{
//print_registers();
if (pthread_mutex_trylock(&kvm_lock))
{
close_fd(&vcpufd);
return;
}
// only the main thread will execute this
if (vcpu_threads) {
for(uint32_t i=0; i<ncores; i++) {
if (pthread_self() == vcpu_threads[i])
continue;
pthread_kill(vcpu_threads[i], SIGTERM);
}
if (netfd > 0)
pthread_kill(net_thread, SIGTERM);
}
close_fd(&vcpufd);
}
static void uhyve_atexit(void)
{
uhyve_exit(NULL);
if (vcpu_threads) {
for(uint32_t i = 0; i < ncores; i++) {
if (pthread_self() == vcpu_threads[i])
continue;
pthread_join(vcpu_threads[i], NULL);
}
free(vcpu_threads);
}
if (vcpu_fds)
free(vcpu_fds);
// clean up and close KVM
close_fd(&vmfd);
close_fd(&kvm);
}
static void* wait_for_packet(void* arg)
{
int ret;
struct pollfd fds = { .fd = netfd,
.events = POLLIN,
.revents = 0};
while(1)
{
fds.revents = 0;
ret = poll(&fds, 1, -1000);
if (ret < 0 && errno == EINTR)
continue;
if (ret < 0)
perror("poll()");
else if (ret) {
uint64_t event_counter = 1;
write(efd, &event_counter, sizeof(event_counter));
sem_wait(&net_sem);
}
}
return NULL;
}
static inline void check_network(void)
{
// should we start the network thread?
if ((efd < 0) && (getenv("HERMIT_NETIF"))) {
struct kvm_irqfd irqfd = {};
efd = eventfd(0, 0);
irqfd.fd = efd;
irqfd.gsi = UHYVE_IRQ;
kvm_ioctl(vmfd, KVM_IRQFD, &irqfd);
sem_init(&net_sem, 0, 0);
if (pthread_create(&net_thread, NULL, wait_for_packet, NULL))
err(1, "unable to create thread");
}
}
static int vcpu_loop(void)
{
int ret;
pthread_barrier_wait(&barrier);
if (restart) {
vcpu_state_t cpu_state = read_cpu_state();
restore_cpu_state(cpu_state);
} else if (vcpu_thread_states) {
restore_cpu_state(vcpu_thread_states[cpuid]);
} else {
init_cpu_state(elf_entry);
}
if (cpuid == 0) {
if (restart) {
no_checkpoint++;
} else if (migration) {
free(vcpu_thread_states);
vcpu_thread_states = NULL;
}
}
/* init uhyve gdb support */
if (uhyve_gdb_enabled) {
if (cpuid == 0)
uhyve_gdb_init(vcpufd);
pthread_barrier_wait(&barrier);
}
while (1) {
ret = ioctl(vcpufd, KVM_RUN, NULL);
if(ret == -1) {
switch(errno) {
case EINTR:
continue;
case EFAULT: {
struct kvm_regs regs;
kvm_ioctl(vcpufd, KVM_GET_REGS, &regs);
#ifdef __x86_64__
err(1, "KVM: host/guest translation fault: rip=0x%llx", regs.rip);
#else
err(1, "KVM: host/guest translation fault: elr_el1=0x%llx", regs.elr_el1);
#endif
}
default:
err(1, "KVM: ioctl KVM_RUN in vcpu_loop for cpuid %d failed", cpuid);
break;
}
}
uint64_t port = 0;
unsigned raddr = 0;
/* handle requests */
switch (run->exit_reason) {
case KVM_EXIT_HLT:
fprintf(stderr, "Guest has halted the CPU, this is considered as a normal exit.\n");
if (uhyve_gdb_enabled)
uhyve_gdb_handle_term();
return 0;
case KVM_EXIT_MMIO:
port = run->mmio.phys_addr;
if (run->mmio.is_write)
memcpy(&raddr, run->mmio.data, sizeof(raddr) /*run->mmio.len*/);
//printf("KVM: handled KVM_EXIT_MMIO at 0x%lx (data %u)\n", port, raddr);
case KVM_EXIT_IO:
if (!port) {
port = run->io.port;
raddr = *((unsigned*)((size_t)run+run->io.data_offset));
}
//printf("port 0x%x\n", run->io.port);
switch (port) {
case UHYVE_UART_PORT:
if (verbose)
putc((unsigned char) raddr, stderr);
break;
case UHYVE_PORT_WRITE: {
uhyve_write_t* uhyve_write = (uhyve_write_t*) (guest_mem+raddr);
uhyve_write->len = write(uhyve_write->fd, guest_mem+(size_t)uhyve_write->buf, uhyve_write->len);
break;
}
case UHYVE_PORT_READ: {
uhyve_read_t* uhyve_read = (uhyve_read_t*) (guest_mem+raddr);
uhyve_read->ret = read(uhyve_read->fd, guest_mem+(size_t)uhyve_read->buf, uhyve_read->len);
break;
}
case UHYVE_PORT_EXIT: {
if (cpuid)
pthread_exit((int*)(guest_mem+raddr));
else
exit(*(int*)(guest_mem+raddr));
break;
}
case UHYVE_PORT_OPEN: {
uhyve_open_t* uhyve_open = (uhyve_open_t*) (guest_mem+raddr);
char rpath[PATH_MAX];
// forbid to open the kvm device
if (realpath((const char*)guest_mem+(size_t)uhyve_open->name, rpath) < 0)
uhyve_open->ret = -1;
else if (strcmp(rpath, "/dev/kvm") == 0)
uhyve_open->ret = -1;
else
uhyve_open->ret = open((const char*)guest_mem+(size_t)uhyve_open->name, uhyve_open->flags, uhyve_open->mode);
break;
}
case UHYVE_PORT_CLOSE: {
uhyve_close_t* uhyve_close = (uhyve_close_t*) (guest_mem+raddr);
if (uhyve_close->fd > 2)
uhyve_close->ret = close(uhyve_close->fd);
else
uhyve_close->ret = 0;
break;
}
case UHYVE_PORT_NETINFO: {
uhyve_netinfo_t* uhyve_netinfo = (uhyve_netinfo_t*)(guest_mem+raddr);
memcpy(uhyve_netinfo->mac_str, uhyve_get_mac(), 18);
// guest configure the ethernet device => start network thread
check_network();
break;
}
case UHYVE_PORT_NETWRITE: {
uhyve_netwrite_t* uhyve_netwrite = (uhyve_netwrite_t*)(guest_mem + raddr);
uhyve_netwrite->ret = 0;
ret = write(netfd, guest_mem + (size_t)uhyve_netwrite->data, uhyve_netwrite->len);
if (ret >= 0) {
uhyve_netwrite->ret = 0;
uhyve_netwrite->len = ret;
} else {
uhyve_netwrite->ret = -1;
}
break;
}
case UHYVE_PORT_NETREAD: {
uhyve_netread_t* uhyve_netread = (uhyve_netread_t*)(guest_mem + raddr);
ret = read(netfd, guest_mem + (size_t)uhyve_netread->data, uhyve_netread->len);
if (ret > 0) {
uhyve_netread->len = ret;
uhyve_netread->ret = 0;
} else {
uhyve_netread->ret = -1;
sem_post(&net_sem);
}
break;
}
case UHYVE_PORT_NETSTAT: {
uhyve_netstat_t* uhyve_netstat = (uhyve_netstat_t*)(guest_mem + raddr);
char* str = getenv("HERMIT_NETIF");
if (str)
uhyve_netstat->status = 1;
else
uhyve_netstat->status = 0;
break;
}
case UHYVE_PORT_LSEEK: {
uhyve_lseek_t* uhyve_lseek = (uhyve_lseek_t*) (guest_mem+raddr);
uhyve_lseek->offset = lseek(uhyve_lseek->fd, uhyve_lseek->offset, uhyve_lseek->whence);
break;
}
case UHYVE_PORT_CMDSIZE: {
int i;
uhyve_cmdsize_t *val = (uhyve_cmdsize_t *) (guest_mem+raddr);
val->argc = uhyve_argc;
for(i=0; i<uhyve_argc; i++)
val->argsz[i] = strlen(uhyve_argv[i]) + 1;
val->envc = uhyve_envc;
for(i=0; i<uhyve_envc; i++)
val->envsz[i] = strlen(uhyve_envp[i]) + 1;
break;
}
case UHYVE_PORT_CMDVAL: {
int i;
char **argv_ptr, **env_ptr;
uhyve_cmdval_t *val = (uhyve_cmdval_t *) (guest_mem+raddr);
/* argv */
argv_ptr = (char **)(guest_mem + (size_t)val->argv);
for(i=0; i<uhyve_argc; i++)
strcpy(guest_mem + (size_t)argv_ptr[i], uhyve_argv[i]);
/* env */
env_ptr = (char **)(guest_mem + (size_t)val->envp);
for(i=0; i<uhyve_envc; i++)
strcpy(guest_mem + (size_t)env_ptr[i], uhyve_envp[i]);
break;
}
default:
err(1, "KVM: unhandled KVM_EXIT_IO / KVM_EXIT_MMIO at port 0x%lx\n", port);
break;
}
break;
case KVM_EXIT_FAIL_ENTRY:
if (uhyve_gdb_enabled)
uhyve_gdb_handle_exception(vcpufd, GDB_SIGNAL_SEGV);
err(1, "KVM: entry failure: hw_entry_failure_reason=0x%llx\n",
run->fail_entry.hardware_entry_failure_reason);
break;
case KVM_EXIT_INTERNAL_ERROR:
if (uhyve_gdb_enabled)
uhyve_gdb_handle_exception(vcpufd, GDB_SIGNAL_SEGV);
err(1, "KVM: internal error exit: suberror = 0x%x\n", run->internal.suberror);
break;
case KVM_EXIT_SHUTDOWN:
fprintf(stderr, "KVM: receive shutdown command\n");
case KVM_EXIT_DEBUG:
if (uhyve_gdb_enabled) {
uhyve_gdb_handle_exception(vcpufd, GDB_SIGNAL_TRAP);
break;
} else print_registers();
exit(EXIT_FAILURE);
default:
fprintf(stderr, "KVM: unhandled exit: exit_reason = 0x%x\n", run->exit_reason);
exit(EXIT_FAILURE);
}
}
close(vcpufd);
vcpufd = -1;
return 0;
}
static int vcpu_init(void)
{
vcpu_fds[cpuid] = vcpufd = kvm_ioctl(vmfd, KVM_CREATE_VCPU, cpuid);
/* Map the shared kvm_run structure and following data. */
size_t mmap_size = (size_t) kvm_ioctl(kvm, KVM_GET_VCPU_MMAP_SIZE, NULL);
if (mmap_size < sizeof(*run))
err(1, "KVM: invalid VCPU_MMAP_SIZE: %zd", mmap_size);
run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpufd, 0);
if (run == MAP_FAILED)
err(1, "KVM: VCPU mmap failed");
return 0;
}
static void sigusr_handler(int signum)
{
pthread_barrier_wait(&barrier);
write_cpu_state();
pthread_barrier_wait(&barrier);
}
static void vcpu_thread_mig_handler(int signum)
{
/* memory should be allocated at this point */
assert(vcpu_thread_states != NULL);
/* ensure consistency among VCPUs */
pthread_barrier_wait(&barrier);
/* save state */
vcpu_thread_states[cpuid] = save_cpu_state();
/* synchronize with migration thread */
pthread_barrier_wait(&migration_barrier);
/* wait to be killed */
pthread_barrier_wait(&migration_barrier);
}
static void* uhyve_thread(void* arg)
{
size_t ret;
struct sigaction sa;
pthread_cleanup_push(uhyve_exit, NULL);
cpuid = (size_t) arg;
/* install signal handler for checkpoint */
memset(&sa, 0x00, sizeof(sa));
sa.sa_handler = &sigusr_handler;
sigaction(SIGTHRCHKP, &sa, NULL);
/* install signal handler for migration */
memset(&sa, 0x00, sizeof(sa));
sa.sa_handler = &vcpu_thread_mig_handler;
sigaction(SIGTHRMIG, &sa, NULL);
// create new cpu
vcpu_init();
pthread_barrier_wait(&barrier);
// run cpu loop until thread gets killed
ret = vcpu_loop();
pthread_cleanup_pop(1);
return (void*) ret;
}
void sigterm_handler(int signum)
{
pthread_exit(0);
}
int uhyve_init(char *path)
{
FILE *f = NULL;
guest_path = path;
signal(SIGTERM, sigterm_handler);
// register routine to close the VM
atexit(uhyve_atexit);
const char *start_mig_server = getenv("HERMIT_MIGRATION_SERVER");
/*
* Three startups
* a) incoming migration
* b) load existing checkpoint
* c) normal run
*/
if (start_mig_server) {
migration = true;
migration_metadata_t metadata;
wait_for_incomming_migration(&metadata, MIGRATION_PORT);
ncores = metadata.ncores;
guest_size = metadata.guest_size;
elf_entry = metadata.elf_entry;
full_checkpoint = metadata.full_checkpoint;
} else if ((f = fopen("checkpoint/chk_config.txt", "r")) != NULL) {
int tmp = 0;
restart = true;
fscanf(f, "number of cores: %u\n", &ncores);
fscanf(f, "memory size: 0x%zx\n", &guest_size);
fscanf(f, "checkpoint number: %u\n", &no_checkpoint);
fscanf(f, "entry point: 0x%zx", &elf_entry);
fscanf(f, "full checkpoint: %d", &tmp);
full_checkpoint = tmp ? true : false;
if (verbose)
fprintf(stderr,
"Restart from checkpoint %u "
"(ncores %d, mem size 0x%zx)\n",
no_checkpoint, ncores, guest_size);
fclose(f);
} else {
const char* hermit_memory = getenv("HERMIT_MEM");
if (hermit_memory)
guest_size = memparse(hermit_memory);
const char* hermit_cpus = getenv("HERMIT_CPUS");
if (hermit_cpus)
ncores = (uint32_t) atoi(hermit_cpus);
const char* full_chk = getenv("HERMIT_FULLCHECKPOINT");
if (full_chk && (strcmp(full_chk, "0") != 0))
full_checkpoint = true;
}
vcpu_threads = (pthread_t*) calloc(ncores, sizeof(pthread_t));
if (!vcpu_threads)
err(1, "Not enough memory");
vcpu_fds = (int*) calloc(ncores, sizeof(int));
if (!vcpu_fds)
err(1, "Not enough memory");
kvm = open("/dev/kvm", O_RDWR | O_CLOEXEC);
if (kvm < 0)
err(1, "Could not open: /dev/kvm");
/* Make sure we have the stable version of the API */
int kvm_api_version = kvm_ioctl(kvm, KVM_GET_API_VERSION, NULL);
if (kvm_api_version != 12)
err(1, "KVM: API version is %d, uhyve requires version 12", kvm_api_version);
/* Create the virtual machine */
vmfd = kvm_ioctl(kvm, KVM_CREATE_VM, 0);
#ifdef __x86_64__
init_kvm_arch();
if (restart) {
if (load_checkpoint(guest_mem, path) != 0)
exit(EXIT_FAILURE);
} else if (start_mig_server) {
load_migration_data(guest_mem);
close_migration_channel();
} else {
if (load_kernel(guest_mem, path) != 0)
exit(EXIT_FAILURE);
}
#endif
pthread_barrier_init(&barrier, NULL, ncores);
pthread_barrier_init(&migration_barrier, NULL, ncores+1);
cpuid = 0;
// create first CPU, it will be the boot processor by default
int ret = vcpu_init();
const char* netif_str = getenv("HERMIT_NETIF");
if (netif_str)
{
// TODO: strncmp for different network interfaces
// for example tun/tap device or uhyvetap device
netfd = uhyve_net_init(netif_str);
if (netfd < 0)
err(1, "unable to initialized network");
}
return ret;
}
int uhyve_loop(int argc, char **argv)
{
const char* hermit_check = getenv("HERMIT_CHECKPOINT");
const char* hermit_mig_support = getenv("HERMIT_MIGRATION_SUPPORT");
const char* hermit_mig_type = getenv("HERMIT_MIGRATION_TYPE");
const char* hermit_debug = getenv("HERMIT_DEBUG");
int ts = 0, i = 0;
if (hermit_debug && (atoi(hermit_debug) != 0))
uhyve_gdb_enabled = true;
/* argv[0] is 'proxy', do not count it */
uhyve_argc = argc-1;
uhyve_argv = &argv[1];
uhyve_envp = environ;
while(uhyve_envp[i] != NULL)
i++;
uhyve_envc = i;
if (uhyve_argc > MAX_ARGC_ENVC) {
fprintf(stderr, "uhyve downsiize envc from %d to %d\n", uhyve_argc, MAX_ARGC_ENVC);
uhyve_argc = MAX_ARGC_ENVC;
}
if (uhyve_envc > MAX_ARGC_ENVC-1) {
fprintf(stderr, "uhyve downsiize envc from %d to %d\n", uhyve_envc, MAX_ARGC_ENVC-1);
uhyve_envc = MAX_ARGC_ENVC-1;
}
if (uhyve_argc > MAX_ARGC_ENVC || uhyve_envc > MAX_ARGC_ENVC) {
fprintf(stderr, "uhyve cannot forward more than %d command line "
"arguments or environment variables, please consider increasing "
"the MAX_ARGC_ENVP cmake argument\n", MAX_ARGC_ENVC);
return -1;
}
if (hermit_check)
ts = atoi(hermit_check);
if (hermit_mig_support) {
set_migration_target(hermit_mig_support, MIGRATION_PORT);
set_migration_type(hermit_mig_type);
/* block SIGUSR1 in main thread */
sigemptyset (&signal_mask);
sigaddset (&signal_mask, SIGUSR1);
pthread_sigmask (SIG_BLOCK, &signal_mask, NULL);
/* start migration thread; handles SIGUSR1 */
pthread_t sig_thr_id;
pthread_create (&sig_thr_id, NULL, migration_handler, (void *)&signal_mask);
/* install signal handler for migration */
struct sigaction sa;
memset(&sa, 0x00, sizeof(sa));
sa.sa_handler = &vcpu_thread_mig_handler;
sigaction(SIGTHRMIG, &sa, NULL);
}
// First CPU is special because it will boot the system. Other CPUs will
// be booted linearily after the first one.
vcpu_threads[0] = pthread_self();
// start threads to create VCPUs
for(size_t i = 1; i < ncores; i++)
pthread_create(&vcpu_threads[i], NULL, uhyve_thread, (void*) i);
pthread_barrier_wait(&barrier);
#ifdef __aarch64__
init_kvm_arch();
if (restart) {
if (load_checkpoint(guest_mem, guest_path) != 0)
exit(EXIT_FAILURE);
} else {
if (load_kernel(guest_mem, guest_path) != 0)
exit(EXIT_FAILURE);
}
#endif
*((uint32_t*) (mboot+0x24)) = ncores;
if (ts > 0)
{
struct sigaction sa;
struct itimerval timer;
/* Install timer_handler as the signal handler for SIGVTALRM. */
memset(&sa, 0x00, sizeof(sa));
sa.sa_handler = &timer_handler;
sigaction(SIGALRM, &sa, NULL);
/* Configure the timer to expire after "ts" sec... */
timer.it_value.tv_sec = ts;
timer.it_value.tv_usec = 0;
/* ... and every "ts" sec after that. */
timer.it_interval.tv_sec = ts;
timer.it_interval.tv_usec = 0;
/* Start a virtual timer. It counts down whenever this process is executing. */
setitimer(ITIMER_REAL, &timer, NULL);
}
// Run first CPU
return vcpu_loop();
}

View File

@ -1,109 +0,0 @@
/*
* Copyright (c) 2018, Stefan Lankes, RWTH Aachen University
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __UHYVE_H__
#define __UHYVE_H__
#include <err.h>
#include <linux/kvm.h>
#define UHYVE_PORT_WRITE 0x400
#define UHYVE_PORT_OPEN 0x440
#define UHYVE_PORT_CLOSE 0x480
#define UHYVE_PORT_READ 0x500
#define UHYVE_PORT_EXIT 0x540
#define UHYVE_PORT_LSEEK 0x580
// Networkports
#define UHYVE_PORT_NETINFO 0x600
#define UHYVE_PORT_NETWRITE 0x640
#define UHYVE_PORT_NETREAD 0x680
#define UHYVE_PORT_NETSTAT 0x700
/* Ports and data structures for uhyve command line arguments and envp
* forwarding */
#define UHYVE_PORT_CMDSIZE 0x740
#define UHYVE_PORT_CMDVAL 0x780
#define UHYVE_UART_PORT 0x800
#define UHYVE_IRQ 11
#define SIGTHRCHKP (SIGRTMIN+0)
#define SIGTHRMIG (SIGRTMIN+1)
#define kvm_ioctl(fd, cmd, arg) ({ \
const int ret = ioctl(fd, cmd, arg); \
if(ret == -1) \
err(1, "KVM: ioctl " #cmd " failed"); \
ret; \
})
#ifdef __x86_64__
#define MAX_MSR_ENTRIES 25
struct msr_data {
struct kvm_msrs info;
struct kvm_msr_entry entries[MAX_MSR_ENTRIES];
};
typedef struct _vcpu_state {
struct msr_data msr_data;
struct kvm_regs regs;
struct kvm_sregs sregs;
struct kvm_fpu fpu;
struct kvm_lapic_state lapic;
struct kvm_xsave xsave;
struct kvm_xcrs xcrs;
struct kvm_vcpu_events events;
struct kvm_mp_state mp_state;
} vcpu_state_t;
#else
typedef struct _vcpu_state {
int dummy;
} vcpu_state_t;
#endif
typedef struct _migration_metadata migration_metadata_t;
void print_registers(void);
void timer_handler(int signum);
void *migration_handler(void *arg);
void restore_cpu_state(vcpu_state_t cpu_state);
vcpu_state_t read_cpu_state(void);
vcpu_state_t save_cpu_state(void);
void write_cpu_state(void);
void init_cpu_state(uint64_t elf_entry);
int load_kernel(uint8_t* mem, char* path);
int load_checkpoint(uint8_t* mem, char* path);
int load_migration_data(uint8_t* mem);
void wait_for_incomming_migration(migration_metadata_t *metadata, uint16_t listen_portno);
void init_kvm_arch(void);
int load_kernel(uint8_t* mem, char* path);
size_t determine_dest_offset(size_t src_addr);
void determine_dirty_pages(void (*save_page_handler)(void*, size_t, void*, size_t));
#endif

View File

@ -1,175 +0,0 @@
/*
* Copyright (c) 2017, Stefan Lankes, RWTH Aachen University
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <errno.h>
#include <limits.h>
#include "proxy.h"
#ifdef __x86_64__
inline static void __cpuid(uint32_t code, uint32_t* a, uint32_t* b, uint32_t* c, uint32_t* d)
{
__asm volatile ("cpuid" : "=a"(*a), "=b"(*b), "=c"(*c), "=d"(*d) : "0"(code), "2"(*c));
}
// Try to determine the frequency from the CPU brand.
// Code is derived from the manual "Intel Processor
// Identification and the CPUID Instruction".
static uint32_t get_frequency_from_brand(void)
{
char cpu_brand[4*3*sizeof(uint32_t)+1] = {[0 ... 4*3*sizeof(uint32_t)] = 0};
uint32_t* bint = (uint32_t*) cpu_brand;
uint32_t index, multiplier = 0;
uint32_t cpu_freq = 0;
uint32_t extended;
__cpuid(0x80000000, &extended, bint+1, bint+2, bint+3);
if (extended < 0x80000004)
return 0;
__cpuid(0x80000002, bint+0, bint+1, bint+2, bint+3);
__cpuid(0x80000003, bint+4, bint+5, bint+6, bint+7);
__cpuid(0x80000004, bint+8, bint+9, bint+10, bint+11);
for(index=0; index<sizeof(cpu_brand)-2; index++)
{
if ((cpu_brand[index+1] == 'H') && (cpu_brand[index+2] == 'z'))
{
if (cpu_brand[index] == 'M')
multiplier = 1;
else if (cpu_brand[index] == 'G')
multiplier = 1000;
else if (cpu_brand[index] == 'T')
multiplier = 1000000;
}
if (multiplier > 0) {
uint32_t freq;
// Compute frequency (in MHz) from brand string
if (cpu_brand[index-3] == '.') { // If format is “x.xx”
freq = (uint32_t)(cpu_brand[index-4] - '0') * multiplier;
freq += (uint32_t)(cpu_brand[index-2] - '0') * (multiplier / 10);
freq += (uint32_t)(cpu_brand[index-1] - '0') * (multiplier / 100);
} else { // If format is xxxx
freq = (uint32_t)(cpu_brand[index-4] - '0') * 1000;
freq += (uint32_t)(cpu_brand[index-3] - '0') * 100;
freq += (uint32_t)(cpu_brand[index-2] - '0') * 10;
freq += (uint32_t)(cpu_brand[index-1] - '0');
freq *= multiplier;
}
return freq;
}
}
return 0;
}
#endif
uint32_t get_cpufreq(void)
{
char line[128];
uint32_t freq = 0;
char* match;
#ifdef __x86_64__
freq = get_frequency_from_brand();
if (freq > 0)
return freq;
#endif
// TODO: fallback solution, on some systems is cpuinfo_max_freq the turbo frequency
// => wrong value
FILE* fp = fopen("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", "r");
if (fp != NULL) {
if (fgets(line, sizeof(line), fp) != NULL) {
// cpuinfo_max_freq is in kHz
freq = (uint32_t) atoi(line) / 1000;
}
fclose(fp);
} else if( (fp = fopen("/proc/cpuinfo", "r")) ) {
// Resorting to /proc/cpuinfo, however on most systems this will only
// return the current frequency that might change over time.
// Currently only needed when running inside a VM
// read until we find the line indicating cpu frequency
while(fgets(line, sizeof(line), fp) != NULL) {
match = strstr(line, "cpu MHz");
if(match != NULL) {
// advance pointer to beginning of number
while( ((*match < '0') || (*match > '9')) && (*match != '\0') )
match++;
freq = (uint32_t) atoi(match);
break;
}
}
fclose(fp);
}
return freq;
}
ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset)
{
ssize_t total = 0;
char *p = buf;
if (count > SSIZE_MAX) {
errno = E2BIG;
return -1;
}
while (count > 0) {
ssize_t nr;
nr = pread(fd, p, count, offset);
if (nr == 0)
return total;
else if (nr == -1 && errno == EINTR)
continue;
else if (nr == -1)
return -1;
count -= nr;
total += nr;
p += nr;
offset += nr;
}
return total;
}