mirror of
https://github.com/hermitcore/libhermit.git
synced 2025-03-09 00:00:03 +01:00
move proxy and hypervisor to a new submodule
This commit is contained in:
parent
69ed9f5354
commit
d62fb9f762
24 changed files with 8 additions and 7567 deletions
3
.gitmodules
vendored
3
.gitmodules
vendored
|
@ -6,3 +6,6 @@
|
|||
path = usr/libomp
|
||||
url = https://github.com/hermitcore/libomp_oss.git
|
||||
branch = hermit
|
||||
[submodule "caves"]
|
||||
path = caves
|
||||
url = https://github.com/hermitcore/hermit-caves.git
|
||||
|
|
|
@ -127,6 +127,9 @@ install(DIRECTORY include/hermit
|
|||
FILES_MATCHING
|
||||
PATTERN *.h)
|
||||
|
||||
install(FILES tools/init.sh
|
||||
DESTINATION tools)
|
||||
|
||||
# provide custom target to only install libhermit without its runtimes which is
|
||||
# needed during the compilation of the cross toolchain
|
||||
add_custom_target(hermit-bootstrap-install
|
||||
|
@ -150,7 +153,7 @@ add_custom_target(hermit
|
|||
# be relocated for installation
|
||||
|
||||
## HermitCore's own tools such as Qemu/KVM proxy
|
||||
build_external(tools ${HERMIT_ROOT}/tools "")
|
||||
build_external(caves ${HERMIT_ROOT}/caves "")
|
||||
|
||||
if("${TARGET_ARCH}" STREQUAL "x86_64-hermit")
|
||||
|
||||
|
|
1
caves
Submodule
1
caves
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 155b31e13779b8d2446781b779bfa6a6ae46748c
|
|
@ -1,53 +0,0 @@
|
|||
cmake_minimum_required(VERSION 3.7)
|
||||
project(hermit_tools)
|
||||
|
||||
include(CheckIncludeFiles)
|
||||
include(../cmake/HermitCore-Paths.cmake)
|
||||
|
||||
option(ENABLE_RDMA_MIGRATION "Migration support via RDMA" OFF)
|
||||
|
||||
|
||||
add_compile_options(-std=c99)
|
||||
|
||||
list(APPEND LIBS "-pthread")
|
||||
set(SRC proxy.c
|
||||
utils.c
|
||||
uhyve.c
|
||||
uhyve-net.c
|
||||
uhyve-migration.c
|
||||
uhyve-x86_64.c
|
||||
uhyve-aarch64.c
|
||||
uhyve-gdb-x86_64.c
|
||||
uhyve-gdb-aarch64.c
|
||||
)
|
||||
|
||||
### Optional migration via RDMA
|
||||
if(ENABLE_RDMA_MIGRATION)
|
||||
add_definitions(-D__RDMA_MIGRATION__)
|
||||
list(APPEND LIBS "-libverbs")
|
||||
set(SRC ${SRC} uhyve-migration-rdma.c)
|
||||
else()
|
||||
remove_definitions(-D__RDMA_MIGRATION__)
|
||||
endif()
|
||||
|
||||
check_include_files(asm/msr-index.h HAVE_MSR_INDEX_H)
|
||||
|
||||
if(HAVE_MSR_INDEX_H)
|
||||
add_definitions(-DHAVE_MSR_INDEX_H=1)
|
||||
endif()
|
||||
|
||||
add_executable(proxy ${SRC})
|
||||
|
||||
target_compile_options(proxy PUBLIC ${LIBS})
|
||||
target_compile_options(proxy PUBLIC -DMAX_ARGC_ENVC=${MAX_ARGC_ENVC})
|
||||
target_link_libraries(proxy ${LIBS})
|
||||
|
||||
install(TARGETS proxy
|
||||
DESTINATION bin)
|
||||
|
||||
install(FILES init.sh
|
||||
DESTINATION tools)
|
||||
|
||||
# Show include files in IDE
|
||||
file(GLOB_RECURSE TOOLS_INCLUDES "*.h")
|
||||
add_custom_target(tools_includes_ide SOURCES ${TOOLS_INCLUDES})
|
1064
tools/proxy.c
1064
tools/proxy.c
File diff suppressed because it is too large
Load diff
|
@ -1,55 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2017, Stefan Lankes, RWTH Aachen University
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __PROXY_H__
|
||||
#define __PROXY_H__
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define HERMIT_ELFOSABI 0x42
|
||||
|
||||
#define __HERMIT_exit 0
|
||||
#define __HERMIT_write 1
|
||||
#define __HERMIT_open 2
|
||||
#define __HERMIT_close 3
|
||||
#define __HERMIT_read 4
|
||||
#define __HERMIT_lseek 5
|
||||
|
||||
int uhyve_init(char *path);
|
||||
int uhyve_loop(int argc, char **argv);
|
||||
|
||||
// define some helper functions
|
||||
uint32_t get_cpufreq(void);
|
||||
ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset);
|
||||
|
||||
#endif
|
638
tools/queue.h
638
tools/queue.h
|
@ -1,638 +0,0 @@
|
|||
/* $NetBSD: queue.h,v 1.68 2014/11/19 08:10:01 uebayasi Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1991, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)queue.h 8.5 (Berkeley) 8/20/94
|
||||
*/
|
||||
|
||||
#ifndef _UHYVE_QUEUE_H_
|
||||
#define _UHYVE_QUEUE_H_
|
||||
|
||||
/*
|
||||
* This file defines five types of data structures: singly-linked lists,
|
||||
* lists, simple queues, tail queues, and circular queues.
|
||||
*
|
||||
* A singly-linked list is headed by a single forward pointer. The
|
||||
* elements are singly linked for minimum space and pointer manipulation
|
||||
* overhead at the expense of O(n) removal for arbitrary elements. New
|
||||
* elements can be added to the list after an existing element or at the
|
||||
* head of the list. Elements being removed from the head of the list
|
||||
* should use the explicit macro for this purpose for optimum
|
||||
* efficiency. A singly-linked list may only be traversed in the forward
|
||||
* direction. Singly-linked lists are ideal for applications with large
|
||||
* datasets and few or no removals or for implementing a LIFO queue.
|
||||
*
|
||||
* A list is headed by a single forward pointer (or an array of forward
|
||||
* pointers for a hash table header). The elements are doubly linked
|
||||
* so that an arbitrary element can be removed without a need to
|
||||
* traverse the list. New elements can be added to the list before
|
||||
* or after an existing element or at the head of the list. A list
|
||||
* may only be traversed in the forward direction.
|
||||
*
|
||||
* A simple queue is headed by a pair of pointers, one the head of the
|
||||
* list and the other to the tail of the list. The elements are singly
|
||||
* linked to save space, so elements can only be removed from the
|
||||
* head of the list. New elements can be added to the list after
|
||||
* an existing element, at the head of the list, or at the end of the
|
||||
* list. A simple queue may only be traversed in the forward direction.
|
||||
*
|
||||
* A tail queue is headed by a pair of pointers, one to the head of the
|
||||
* list and the other to the tail of the list. The elements are doubly
|
||||
* linked so that an arbitrary element can be removed without a need to
|
||||
* traverse the list. New elements can be added to the list before or
|
||||
* after an existing element, at the head of the list, or at the end of
|
||||
* the list. A tail queue may be traversed in either direction.
|
||||
*
|
||||
* A circle queue is headed by a pair of pointers, one to the head of the
|
||||
* list and the other to the tail of the list. The elements are doubly
|
||||
* linked so that an arbitrary element can be removed without a need to
|
||||
* traverse the list. New elements can be added to the list before or after
|
||||
* an existing element, at the head of the list, or at the end of the list.
|
||||
* A circle queue may be traversed in either direction, but has a more
|
||||
* complex end of list detection.
|
||||
*
|
||||
* For details on the use of these macros, see the queue(3) manual page.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Singly-linked List definitions.
|
||||
*/
|
||||
#define SLIST_HEAD(name, type) \
|
||||
struct name { \
|
||||
struct type *slh_first; /* first element */ \
|
||||
}
|
||||
|
||||
#define SLIST_HEAD_INITIALIZER(head) \
|
||||
{ NULL }
|
||||
|
||||
#define SLIST_ENTRY(type) \
|
||||
struct { \
|
||||
struct type *sle_next; /* next element */ \
|
||||
}
|
||||
|
||||
/*
|
||||
* Singly-linked List access methods.
|
||||
*/
|
||||
#define SLIST_FIRST(head) ((head)->slh_first)
|
||||
#define SLIST_END(head) NULL
|
||||
#define SLIST_EMPTY(head) ((head)->slh_first == NULL)
|
||||
#define SLIST_NEXT(elm, field) ((elm)->field.sle_next)
|
||||
|
||||
#define SLIST_FOREACH(var, head, field) \
|
||||
for((var) = (head)->slh_first; \
|
||||
(var) != SLIST_END(head); \
|
||||
(var) = (var)->field.sle_next)
|
||||
|
||||
#define SLIST_FOREACH_SAFE(var, head, field, tvar) \
|
||||
for ((var) = SLIST_FIRST((head)); \
|
||||
(var) != SLIST_END(head) && \
|
||||
((tvar) = SLIST_NEXT((var), field), 1); \
|
||||
(var) = (tvar))
|
||||
|
||||
/*
|
||||
* Singly-linked List functions.
|
||||
*/
|
||||
#define SLIST_INIT(head) do { \
|
||||
(head)->slh_first = SLIST_END(head); \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \
|
||||
(elm)->field.sle_next = (slistelm)->field.sle_next; \
|
||||
(slistelm)->field.sle_next = (elm); \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define SLIST_INSERT_HEAD(head, elm, field) do { \
|
||||
(elm)->field.sle_next = (head)->slh_first; \
|
||||
(head)->slh_first = (elm); \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define SLIST_REMOVE_AFTER(slistelm, field) do { \
|
||||
(slistelm)->field.sle_next = \
|
||||
SLIST_NEXT(SLIST_NEXT((slistelm), field), field); \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define SLIST_REMOVE_HEAD(head, field) do { \
|
||||
(head)->slh_first = (head)->slh_first->field.sle_next; \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define SLIST_REMOVE(head, elm, type, field) do { \
|
||||
if ((head)->slh_first == (elm)) { \
|
||||
SLIST_REMOVE_HEAD((head), field); \
|
||||
} \
|
||||
else { \
|
||||
struct type *curelm = (head)->slh_first; \
|
||||
while(curelm->field.sle_next != (elm)) \
|
||||
curelm = curelm->field.sle_next; \
|
||||
curelm->field.sle_next = \
|
||||
curelm->field.sle_next->field.sle_next; \
|
||||
} \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
|
||||
/*
|
||||
* List definitions.
|
||||
*/
|
||||
#define LIST_HEAD(name, type) \
|
||||
struct name { \
|
||||
struct type *lh_first; /* first element */ \
|
||||
}
|
||||
|
||||
#define LIST_HEAD_INITIALIZER(head) \
|
||||
{ NULL }
|
||||
|
||||
#define LIST_ENTRY(type) \
|
||||
struct { \
|
||||
struct type *le_next; /* next element */ \
|
||||
struct type **le_prev; /* address of previous next element */ \
|
||||
}
|
||||
|
||||
/*
|
||||
* List access methods.
|
||||
*/
|
||||
#define LIST_FIRST(head) ((head)->lh_first)
|
||||
#define LIST_END(head) NULL
|
||||
#define LIST_EMPTY(head) ((head)->lh_first == LIST_END(head))
|
||||
#define LIST_NEXT(elm, field) ((elm)->field.le_next)
|
||||
|
||||
#define LIST_FOREACH(var, head, field) \
|
||||
for ((var) = ((head)->lh_first); \
|
||||
(var) != LIST_END(head); \
|
||||
(var) = ((var)->field.le_next))
|
||||
|
||||
#define LIST_FOREACH_SAFE(var, head, field, tvar) \
|
||||
for ((var) = LIST_FIRST((head)); \
|
||||
(var) != LIST_END(head) && \
|
||||
((tvar) = LIST_NEXT((var), field), 1); \
|
||||
(var) = (tvar))
|
||||
|
||||
#define LIST_MOVE(head1, head2) do { \
|
||||
LIST_INIT((head2)); \
|
||||
if (!LIST_EMPTY((head1))) { \
|
||||
(head2)->lh_first = (head1)->lh_first; \
|
||||
LIST_INIT((head1)); \
|
||||
} \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
/*
|
||||
* List functions.
|
||||
*/
|
||||
#if defined(QUEUEDEBUG)
|
||||
#define QUEUEDEBUG_LIST_INSERT_HEAD(head, elm, field) \
|
||||
if ((head)->lh_first && \
|
||||
(head)->lh_first->field.le_prev != &(head)->lh_first) \
|
||||
QUEUEDEBUG_ABORT("LIST_INSERT_HEAD %p %s:%d", (head), \
|
||||
__FILE__, __LINE__);
|
||||
#define QUEUEDEBUG_LIST_OP(elm, field) \
|
||||
if ((elm)->field.le_next && \
|
||||
(elm)->field.le_next->field.le_prev != \
|
||||
&(elm)->field.le_next) \
|
||||
QUEUEDEBUG_ABORT("LIST_* forw %p %s:%d", (elm), \
|
||||
__FILE__, __LINE__); \
|
||||
if (*(elm)->field.le_prev != (elm)) \
|
||||
QUEUEDEBUG_ABORT("LIST_* back %p %s:%d", (elm), \
|
||||
__FILE__, __LINE__);
|
||||
#define QUEUEDEBUG_LIST_POSTREMOVE(elm, field) \
|
||||
(elm)->field.le_next = (void *)1L; \
|
||||
(elm)->field.le_prev = (void *)1L;
|
||||
#else
|
||||
#define QUEUEDEBUG_LIST_INSERT_HEAD(head, elm, field)
|
||||
#define QUEUEDEBUG_LIST_OP(elm, field)
|
||||
#define QUEUEDEBUG_LIST_POSTREMOVE(elm, field)
|
||||
#endif
|
||||
|
||||
#define LIST_INIT(head) do { \
|
||||
(head)->lh_first = LIST_END(head); \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define LIST_INSERT_AFTER(listelm, elm, field) do { \
|
||||
QUEUEDEBUG_LIST_OP((listelm), field) \
|
||||
if (((elm)->field.le_next = (listelm)->field.le_next) != \
|
||||
LIST_END(head)) \
|
||||
(listelm)->field.le_next->field.le_prev = \
|
||||
&(elm)->field.le_next; \
|
||||
(listelm)->field.le_next = (elm); \
|
||||
(elm)->field.le_prev = &(listelm)->field.le_next; \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define LIST_INSERT_BEFORE(listelm, elm, field) do { \
|
||||
QUEUEDEBUG_LIST_OP((listelm), field) \
|
||||
(elm)->field.le_prev = (listelm)->field.le_prev; \
|
||||
(elm)->field.le_next = (listelm); \
|
||||
*(listelm)->field.le_prev = (elm); \
|
||||
(listelm)->field.le_prev = &(elm)->field.le_next; \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define LIST_INSERT_HEAD(head, elm, field) do { \
|
||||
QUEUEDEBUG_LIST_INSERT_HEAD((head), (elm), field) \
|
||||
if (((elm)->field.le_next = (head)->lh_first) != LIST_END(head))\
|
||||
(head)->lh_first->field.le_prev = &(elm)->field.le_next;\
|
||||
(head)->lh_first = (elm); \
|
||||
(elm)->field.le_prev = &(head)->lh_first; \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define LIST_REMOVE(elm, field) do { \
|
||||
QUEUEDEBUG_LIST_OP((elm), field) \
|
||||
if ((elm)->field.le_next != NULL) \
|
||||
(elm)->field.le_next->field.le_prev = \
|
||||
(elm)->field.le_prev; \
|
||||
*(elm)->field.le_prev = (elm)->field.le_next; \
|
||||
QUEUEDEBUG_LIST_POSTREMOVE((elm), field) \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define LIST_REPLACE(elm, elm2, field) do { \
|
||||
if (((elm2)->field.le_next = (elm)->field.le_next) != NULL) \
|
||||
(elm2)->field.le_next->field.le_prev = \
|
||||
&(elm2)->field.le_next; \
|
||||
(elm2)->field.le_prev = (elm)->field.le_prev; \
|
||||
*(elm2)->field.le_prev = (elm2); \
|
||||
QUEUEDEBUG_LIST_POSTREMOVE((elm), field) \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
/*
|
||||
* Simple queue definitions.
|
||||
*/
|
||||
#define SIMPLEQ_HEAD(name, type) \
|
||||
struct name { \
|
||||
struct type *sqh_first; /* first element */ \
|
||||
struct type **sqh_last; /* addr of last next element */ \
|
||||
}
|
||||
|
||||
#define SIMPLEQ_HEAD_INITIALIZER(head) \
|
||||
{ NULL, &(head).sqh_first }
|
||||
|
||||
#define SIMPLEQ_ENTRY(type) \
|
||||
struct { \
|
||||
struct type *sqe_next; /* next element */ \
|
||||
}
|
||||
|
||||
/*
|
||||
* Simple queue access methods.
|
||||
*/
|
||||
#define SIMPLEQ_FIRST(head) ((head)->sqh_first)
|
||||
#define SIMPLEQ_END(head) NULL
|
||||
#define SIMPLEQ_EMPTY(head) ((head)->sqh_first == SIMPLEQ_END(head))
|
||||
#define SIMPLEQ_NEXT(elm, field) ((elm)->field.sqe_next)
|
||||
|
||||
#define SIMPLEQ_FOREACH(var, head, field) \
|
||||
for ((var) = ((head)->sqh_first); \
|
||||
(var) != SIMPLEQ_END(head); \
|
||||
(var) = ((var)->field.sqe_next))
|
||||
|
||||
#define SIMPLEQ_FOREACH_SAFE(var, head, field, next) \
|
||||
for ((var) = ((head)->sqh_first); \
|
||||
(var) != SIMPLEQ_END(head) && \
|
||||
((next = ((var)->field.sqe_next)), 1); \
|
||||
(var) = (next))
|
||||
|
||||
/*
|
||||
* Simple queue functions.
|
||||
*/
|
||||
#define SIMPLEQ_INIT(head) do { \
|
||||
(head)->sqh_first = NULL; \
|
||||
(head)->sqh_last = &(head)->sqh_first; \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define SIMPLEQ_INSERT_HEAD(head, elm, field) do { \
|
||||
if (((elm)->field.sqe_next = (head)->sqh_first) == NULL) \
|
||||
(head)->sqh_last = &(elm)->field.sqe_next; \
|
||||
(head)->sqh_first = (elm); \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define SIMPLEQ_INSERT_TAIL(head, elm, field) do { \
|
||||
(elm)->field.sqe_next = NULL; \
|
||||
*(head)->sqh_last = (elm); \
|
||||
(head)->sqh_last = &(elm)->field.sqe_next; \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define SIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do { \
|
||||
if (((elm)->field.sqe_next = (listelm)->field.sqe_next) == NULL)\
|
||||
(head)->sqh_last = &(elm)->field.sqe_next; \
|
||||
(listelm)->field.sqe_next = (elm); \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define SIMPLEQ_REMOVE_HEAD(head, field) do { \
|
||||
if (((head)->sqh_first = (head)->sqh_first->field.sqe_next) == NULL) \
|
||||
(head)->sqh_last = &(head)->sqh_first; \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define SIMPLEQ_REMOVE_AFTER(head, elm, field) do { \
|
||||
if (((elm)->field.sqe_next = (elm)->field.sqe_next->field.sqe_next) \
|
||||
== NULL) \
|
||||
(head)->sqh_last = &(elm)->field.sqe_next; \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define SIMPLEQ_REMOVE(head, elm, type, field) do { \
|
||||
if ((head)->sqh_first == (elm)) { \
|
||||
SIMPLEQ_REMOVE_HEAD((head), field); \
|
||||
} else { \
|
||||
struct type *curelm = (head)->sqh_first; \
|
||||
while (curelm->field.sqe_next != (elm)) \
|
||||
curelm = curelm->field.sqe_next; \
|
||||
if ((curelm->field.sqe_next = \
|
||||
curelm->field.sqe_next->field.sqe_next) == NULL) \
|
||||
(head)->sqh_last = &(curelm)->field.sqe_next; \
|
||||
} \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define SIMPLEQ_CONCAT(head1, head2) do { \
|
||||
if (!SIMPLEQ_EMPTY((head2))) { \
|
||||
*(head1)->sqh_last = (head2)->sqh_first; \
|
||||
(head1)->sqh_last = (head2)->sqh_last; \
|
||||
SIMPLEQ_INIT((head2)); \
|
||||
} \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define SIMPLEQ_LAST(head, type, field) \
|
||||
(SIMPLEQ_EMPTY((head)) ? \
|
||||
NULL : \
|
||||
((struct type *)(void *) \
|
||||
((char *)((head)->sqh_last) - offsetof(struct type, field))))
|
||||
|
||||
/*
|
||||
* Tail queue definitions.
|
||||
*/
|
||||
#define _TAILQ_HEAD(name, type, qual) \
|
||||
struct name { \
|
||||
qual type *tqh_first; /* first element */ \
|
||||
qual type *qual *tqh_last; /* addr of last next element */ \
|
||||
}
|
||||
#define TAILQ_HEAD(name, type) _TAILQ_HEAD(name, struct type,)
|
||||
|
||||
#define TAILQ_HEAD_INITIALIZER(head) \
|
||||
{ TAILQ_END(head), &(head).tqh_first }
|
||||
|
||||
#define _TAILQ_ENTRY(type, qual) \
|
||||
struct { \
|
||||
qual type *tqe_next; /* next element */ \
|
||||
qual type *qual *tqe_prev; /* address of previous next element */\
|
||||
}
|
||||
#define TAILQ_ENTRY(type) _TAILQ_ENTRY(struct type,)
|
||||
|
||||
/*
|
||||
* Tail queue access methods.
|
||||
*/
|
||||
#define TAILQ_FIRST(head) ((head)->tqh_first)
|
||||
#define TAILQ_END(head) (NULL)
|
||||
#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
|
||||
#define TAILQ_LAST(head, headname) \
|
||||
(*(((struct headname *)((head)->tqh_last))->tqh_last))
|
||||
#define TAILQ_PREV(elm, headname, field) \
|
||||
(*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
|
||||
#define TAILQ_EMPTY(head) (TAILQ_FIRST(head) == TAILQ_END(head))
|
||||
|
||||
|
||||
#define TAILQ_FOREACH(var, head, field) \
|
||||
for ((var) = ((head)->tqh_first); \
|
||||
(var) != TAILQ_END(head); \
|
||||
(var) = ((var)->field.tqe_next))
|
||||
|
||||
#define TAILQ_FOREACH_SAFE(var, head, field, next) \
|
||||
for ((var) = ((head)->tqh_first); \
|
||||
(var) != TAILQ_END(head) && \
|
||||
((next) = TAILQ_NEXT(var, field), 1); (var) = (next))
|
||||
|
||||
#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \
|
||||
for ((var) = (*(((struct headname *)((head)->tqh_last))->tqh_last));\
|
||||
(var) != TAILQ_END(head); \
|
||||
(var) = (*(((struct headname *)((var)->field.tqe_prev))->tqh_last)))
|
||||
|
||||
#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, prev) \
|
||||
for ((var) = TAILQ_LAST((head), headname); \
|
||||
(var) != TAILQ_END(head) && \
|
||||
((prev) = TAILQ_PREV((var), headname, field), 1); (var) = (prev))
|
||||
|
||||
/*
|
||||
* Tail queue functions.
|
||||
*/
|
||||
#if defined(QUEUEDEBUG)
|
||||
#define QUEUEDEBUG_TAILQ_INSERT_HEAD(head, elm, field) \
|
||||
if ((head)->tqh_first && \
|
||||
(head)->tqh_first->field.tqe_prev != &(head)->tqh_first) \
|
||||
QUEUEDEBUG_ABORT("TAILQ_INSERT_HEAD %p %s:%d", (head), \
|
||||
__FILE__, __LINE__);
|
||||
#define QUEUEDEBUG_TAILQ_INSERT_TAIL(head, elm, field) \
|
||||
if (*(head)->tqh_last != NULL) \
|
||||
QUEUEDEBUG_ABORT("TAILQ_INSERT_TAIL %p %s:%d", (head), \
|
||||
__FILE__, __LINE__);
|
||||
#define QUEUEDEBUG_TAILQ_OP(elm, field) \
|
||||
if ((elm)->field.tqe_next && \
|
||||
(elm)->field.tqe_next->field.tqe_prev != \
|
||||
&(elm)->field.tqe_next) \
|
||||
QUEUEDEBUG_ABORT("TAILQ_* forw %p %s:%d", (elm), \
|
||||
__FILE__, __LINE__); \
|
||||
if (*(elm)->field.tqe_prev != (elm)) \
|
||||
QUEUEDEBUG_ABORT("TAILQ_* back %p %s:%d", (elm), \
|
||||
__FILE__, __LINE__);
|
||||
#define QUEUEDEBUG_TAILQ_PREREMOVE(head, elm, field) \
|
||||
if ((elm)->field.tqe_next == NULL && \
|
||||
(head)->tqh_last != &(elm)->field.tqe_next) \
|
||||
QUEUEDEBUG_ABORT("TAILQ_PREREMOVE head %p elm %p %s:%d",\
|
||||
(head), (elm), __FILE__, __LINE__);
|
||||
#define QUEUEDEBUG_TAILQ_POSTREMOVE(elm, field) \
|
||||
(elm)->field.tqe_next = (void *)1L; \
|
||||
(elm)->field.tqe_prev = (void *)1L;
|
||||
#else
|
||||
#define QUEUEDEBUG_TAILQ_INSERT_HEAD(head, elm, field)
|
||||
#define QUEUEDEBUG_TAILQ_INSERT_TAIL(head, elm, field)
|
||||
#define QUEUEDEBUG_TAILQ_OP(elm, field)
|
||||
#define QUEUEDEBUG_TAILQ_PREREMOVE(head, elm, field)
|
||||
#define QUEUEDEBUG_TAILQ_POSTREMOVE(elm, field)
|
||||
#endif
|
||||
|
||||
#define TAILQ_INIT(head) do { \
|
||||
(head)->tqh_first = TAILQ_END(head); \
|
||||
(head)->tqh_last = &(head)->tqh_first; \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define TAILQ_INSERT_HEAD(head, elm, field) do { \
|
||||
QUEUEDEBUG_TAILQ_INSERT_HEAD((head), (elm), field) \
|
||||
if (((elm)->field.tqe_next = (head)->tqh_first) != TAILQ_END(head))\
|
||||
(head)->tqh_first->field.tqe_prev = \
|
||||
&(elm)->field.tqe_next; \
|
||||
else \
|
||||
(head)->tqh_last = &(elm)->field.tqe_next; \
|
||||
(head)->tqh_first = (elm); \
|
||||
(elm)->field.tqe_prev = &(head)->tqh_first; \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define TAILQ_INSERT_TAIL(head, elm, field) do { \
|
||||
QUEUEDEBUG_TAILQ_INSERT_TAIL((head), (elm), field) \
|
||||
(elm)->field.tqe_next = TAILQ_END(head); \
|
||||
(elm)->field.tqe_prev = (head)->tqh_last; \
|
||||
*(head)->tqh_last = (elm); \
|
||||
(head)->tqh_last = &(elm)->field.tqe_next; \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
|
||||
QUEUEDEBUG_TAILQ_OP((listelm), field) \
|
||||
if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != \
|
||||
TAILQ_END(head)) \
|
||||
(elm)->field.tqe_next->field.tqe_prev = \
|
||||
&(elm)->field.tqe_next; \
|
||||
else \
|
||||
(head)->tqh_last = &(elm)->field.tqe_next; \
|
||||
(listelm)->field.tqe_next = (elm); \
|
||||
(elm)->field.tqe_prev = &(listelm)->field.tqe_next; \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \
|
||||
QUEUEDEBUG_TAILQ_OP((listelm), field) \
|
||||
(elm)->field.tqe_prev = (listelm)->field.tqe_prev; \
|
||||
(elm)->field.tqe_next = (listelm); \
|
||||
*(listelm)->field.tqe_prev = (elm); \
|
||||
(listelm)->field.tqe_prev = &(elm)->field.tqe_next; \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define TAILQ_REMOVE(head, elm, field) do { \
|
||||
QUEUEDEBUG_TAILQ_PREREMOVE((head), (elm), field) \
|
||||
QUEUEDEBUG_TAILQ_OP((elm), field) \
|
||||
if (((elm)->field.tqe_next) != TAILQ_END(head)) \
|
||||
(elm)->field.tqe_next->field.tqe_prev = \
|
||||
(elm)->field.tqe_prev; \
|
||||
else \
|
||||
(head)->tqh_last = (elm)->field.tqe_prev; \
|
||||
*(elm)->field.tqe_prev = (elm)->field.tqe_next; \
|
||||
QUEUEDEBUG_TAILQ_POSTREMOVE((elm), field); \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define TAILQ_REPLACE(head, elm, elm2, field) do { \
|
||||
if (((elm2)->field.tqe_next = (elm)->field.tqe_next) != \
|
||||
TAILQ_END(head)) \
|
||||
(elm2)->field.tqe_next->field.tqe_prev = \
|
||||
&(elm2)->field.tqe_next; \
|
||||
else \
|
||||
(head)->tqh_last = &(elm2)->field.tqe_next; \
|
||||
(elm2)->field.tqe_prev = (elm)->field.tqe_prev; \
|
||||
*(elm2)->field.tqe_prev = (elm2); \
|
||||
QUEUEDEBUG_TAILQ_POSTREMOVE((elm), field); \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define TAILQ_CONCAT(head1, head2, field) do { \
|
||||
if (!TAILQ_EMPTY(head2)) { \
|
||||
*(head1)->tqh_last = (head2)->tqh_first; \
|
||||
(head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \
|
||||
(head1)->tqh_last = (head2)->tqh_last; \
|
||||
TAILQ_INIT((head2)); \
|
||||
} \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
/*
|
||||
* Singly-linked Tail queue declarations.
|
||||
*/
|
||||
#define STAILQ_HEAD(name, type) \
|
||||
struct name { \
|
||||
struct type *stqh_first; /* first element */ \
|
||||
struct type **stqh_last; /* addr of last next element */ \
|
||||
}
|
||||
|
||||
#define STAILQ_HEAD_INITIALIZER(head) \
|
||||
{ NULL, &(head).stqh_first }
|
||||
|
||||
#define STAILQ_ENTRY(type) \
|
||||
struct { \
|
||||
struct type *stqe_next; /* next element */ \
|
||||
}
|
||||
|
||||
/*
|
||||
* Singly-linked Tail queue access methods.
|
||||
*/
|
||||
#define STAILQ_FIRST(head) ((head)->stqh_first)
|
||||
#define STAILQ_END(head) NULL
|
||||
#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next)
|
||||
#define STAILQ_EMPTY(head) (STAILQ_FIRST(head) == STAILQ_END(head))
|
||||
|
||||
/*
|
||||
* Singly-linked Tail queue functions.
|
||||
*/
|
||||
#define STAILQ_INIT(head) do { \
|
||||
(head)->stqh_first = NULL; \
|
||||
(head)->stqh_last = &(head)->stqh_first; \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define STAILQ_INSERT_HEAD(head, elm, field) do { \
|
||||
if (((elm)->field.stqe_next = (head)->stqh_first) == NULL) \
|
||||
(head)->stqh_last = &(elm)->field.stqe_next; \
|
||||
(head)->stqh_first = (elm); \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define STAILQ_INSERT_TAIL(head, elm, field) do { \
|
||||
(elm)->field.stqe_next = NULL; \
|
||||
*(head)->stqh_last = (elm); \
|
||||
(head)->stqh_last = &(elm)->field.stqe_next; \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define STAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
|
||||
if (((elm)->field.stqe_next = (listelm)->field.stqe_next) == NULL)\
|
||||
(head)->stqh_last = &(elm)->field.stqe_next; \
|
||||
(listelm)->field.stqe_next = (elm); \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define STAILQ_REMOVE_HEAD(head, field) do { \
|
||||
if (((head)->stqh_first = (head)->stqh_first->field.stqe_next) == NULL) \
|
||||
(head)->stqh_last = &(head)->stqh_first; \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define STAILQ_REMOVE(head, elm, type, field) do { \
|
||||
if ((head)->stqh_first == (elm)) { \
|
||||
STAILQ_REMOVE_HEAD((head), field); \
|
||||
} else { \
|
||||
struct type *curelm = (head)->stqh_first; \
|
||||
while (curelm->field.stqe_next != (elm)) \
|
||||
curelm = curelm->field.stqe_next; \
|
||||
if ((curelm->field.stqe_next = \
|
||||
curelm->field.stqe_next->field.stqe_next) == NULL) \
|
||||
(head)->stqh_last = &(curelm)->field.stqe_next; \
|
||||
} \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define STAILQ_FOREACH(var, head, field) \
|
||||
for ((var) = ((head)->stqh_first); \
|
||||
(var); \
|
||||
(var) = ((var)->field.stqe_next))
|
||||
|
||||
#define STAILQ_FOREACH_SAFE(var, head, field, tvar) \
|
||||
for ((var) = STAILQ_FIRST((head)); \
|
||||
(var) && ((tvar) = STAILQ_NEXT((var), field), 1); \
|
||||
(var) = (tvar))
|
||||
|
||||
#define STAILQ_CONCAT(head1, head2) do { \
|
||||
if (!STAILQ_EMPTY((head2))) { \
|
||||
*(head1)->stqh_last = (head2)->stqh_first; \
|
||||
(head1)->stqh_last = (head2)->stqh_last; \
|
||||
STAILQ_INIT((head2)); \
|
||||
} \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#define STAILQ_LAST(head, type, field) \
|
||||
(STAILQ_EMPTY((head)) ? \
|
||||
NULL : \
|
||||
((struct type *)(void *) \
|
||||
((char *)((head)->stqh_last) - offsetof(struct type, field))))
|
||||
|
||||
#endif /* !_UHYVE_QUEUE_H_ */
|
|
@ -1,503 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2018, Stefan Lankes, RWTH Aachen University
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef __aarch64__
|
||||
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <sched.h>
|
||||
#include <signal.h>
|
||||
#include <limits.h>
|
||||
#include <pthread.h>
|
||||
#include <semaphore.h>
|
||||
#include <elf.h>
|
||||
#include <err.h>
|
||||
#include <poll.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/eventfd.h>
|
||||
#include <linux/const.h>
|
||||
#include <linux/kvm.h>
|
||||
|
||||
#include "uhyve.h"
|
||||
#include "proxy.h"
|
||||
|
||||
#define GUEST_OFFSET 0x0
|
||||
|
||||
#define GIC_SPI_IRQ_BASE 32
|
||||
#define GICD_BASE (1ULL << 39)
|
||||
#define GICC_BASE (GICD_BASE + GICD_SIZE)
|
||||
#define GIC_SIZE (GICD_SIZE + GICC_SIZE)
|
||||
#define GICD_SIZE 0x10000ULL
|
||||
#define GICC_SIZE 0x20000ULL
|
||||
|
||||
#define KVM_GAP_SIZE (GIC_SIZE)
|
||||
#define KVM_GAP_START GICD_BASE
|
||||
|
||||
#define PAGE_SIZE 0x1000
|
||||
|
||||
#ifndef offsetof
|
||||
#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
|
||||
#endif
|
||||
|
||||
#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 |\
|
||||
KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
|
||||
#define ARM_CPU_ID 3, 0, 0, 0
|
||||
#define ARM_CPU_ID_MPIDR 5
|
||||
|
||||
static bool cap_irqfd = false;
|
||||
static bool cap_read_only = false;
|
||||
static int gic_fd = -1;
|
||||
|
||||
extern size_t guest_size;
|
||||
extern uint64_t elf_entry;
|
||||
extern uint8_t* klog;
|
||||
extern bool verbose;
|
||||
extern uint32_t ncores;
|
||||
extern uint8_t* guest_mem;
|
||||
extern size_t guest_size;
|
||||
extern int kvm, vmfd, netfd, efd;
|
||||
extern uint8_t* mboot;
|
||||
extern __thread struct kvm_run *run;
|
||||
extern __thread int vcpufd;
|
||||
extern __thread uint32_t cpuid;
|
||||
|
||||
void print_registers(void)
|
||||
{
|
||||
struct kvm_one_reg reg;
|
||||
uint64_t data;
|
||||
|
||||
fprintf(stderr, "\n Dump state of CPU %d\n\n", cpuid);
|
||||
fprintf(stderr, " Registers\n");
|
||||
fprintf(stderr, " =========\n");
|
||||
|
||||
reg.addr = (uint64_t)&data;
|
||||
reg.id = ARM64_CORE_REG(regs.pc);
|
||||
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®);
|
||||
fprintf(stderr, " PC: 0x%016lx\n", data);
|
||||
|
||||
reg.id = ARM64_CORE_REG(regs.pstate);
|
||||
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®);
|
||||
fprintf(stderr, " PSTATE: 0x%016lx\n", data);
|
||||
|
||||
reg.id = ARM64_CORE_REG(sp_el1);
|
||||
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®);
|
||||
fprintf(stderr, " SP_EL1: 0x%016lx\n", data);
|
||||
|
||||
reg.id = ARM64_CORE_REG(regs.regs[30]);
|
||||
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®);
|
||||
fprintf(stderr, " LR: 0x%016lx\n", data);
|
||||
|
||||
reg.id = ARM64_SYS_REG(ARM_CPU_ID, ARM_CPU_ID_MPIDR);
|
||||
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®);
|
||||
fprintf(stderr, " MPIDR: 0x%016lx\n", data);
|
||||
|
||||
for(int i=0; i<=29; i+=2)
|
||||
{
|
||||
reg.id = ARM64_CORE_REG(regs.regs[i]);
|
||||
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®);
|
||||
fprintf(stderr, " X%d:\t 0x%016lx\t", i, data);
|
||||
|
||||
reg.id = ARM64_CORE_REG(regs.regs[i+1]);
|
||||
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®);
|
||||
fprintf(stderr, " X%d:\t0x%016lx\n", i+1, data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
vcpu_state_t read_cpu_state()
|
||||
{
|
||||
err(1, "Migration is currently not supported!");
|
||||
}
|
||||
|
||||
void* migration_handler(void* arg)
|
||||
{
|
||||
err(1, "Migration is currently not supported!");
|
||||
}
|
||||
|
||||
void timer_handler(int signum)
|
||||
{
|
||||
err(1, "Checkpointing is currently not supported!");
|
||||
}
|
||||
|
||||
void restore_cpu_state(vcpu_state_t state)
|
||||
{
|
||||
err(1, "Checkpointing is currently not supported!");
|
||||
}
|
||||
|
||||
vcpu_state_t save_cpu_state(void)
|
||||
{
|
||||
err(1, "Checkpointing is currently not supported!");
|
||||
}
|
||||
|
||||
|
||||
void write_cpu_state(void)
|
||||
{
|
||||
err(1, "Checkpointing is currently not supported!");
|
||||
}
|
||||
|
||||
int load_checkpoint(uint8_t* mem, char* path)
|
||||
{
|
||||
err(1, "Checkpointing is currently not supported!");
|
||||
}
|
||||
|
||||
int load_migration_data(uint8_t* mem)
|
||||
{
|
||||
err(1, "Checkpointing is currently not supported!");
|
||||
}
|
||||
|
||||
void wait_for_incomming_migration(migration_metadata_t *metadata, uint16_t listen_portno)
|
||||
{
|
||||
err(1, "Checkpointing is currently not supported!");
|
||||
}
|
||||
|
||||
void init_cpu_state(uint64_t elf_entry)
|
||||
{
|
||||
struct kvm_vcpu_init vcpu_init = {
|
||||
.features = 0,
|
||||
};
|
||||
struct kvm_vcpu_init preferred_init;
|
||||
|
||||
if (!ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, &preferred_init)) {
|
||||
if ((preferred_init.target == KVM_ARM_TARGET_CORTEX_A57) ||
|
||||
(preferred_init.target == KVM_ARM_TARGET_CORTEX_A53)) {
|
||||
vcpu_init.target = preferred_init.target;
|
||||
} else {
|
||||
vcpu_init.target = KVM_ARM_TARGET_GENERIC_V8;
|
||||
}
|
||||
} else {
|
||||
vcpu_init.target = KVM_ARM_TARGET_GENERIC_V8;
|
||||
}
|
||||
|
||||
kvm_ioctl(vcpufd, KVM_ARM_VCPU_INIT, &vcpu_init);
|
||||
|
||||
// be sure that the multiprocessor is runable
|
||||
struct kvm_mp_state mp_state = { KVM_MP_STATE_RUNNABLE };
|
||||
kvm_ioctl(vcpufd, KVM_SET_MP_STATE, &mp_state);
|
||||
|
||||
struct kvm_one_reg reg;
|
||||
uint64_t data;
|
||||
|
||||
/* pstate = all interrupts masked */
|
||||
data = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1h;
|
||||
reg.id = ARM64_CORE_REG(regs.pstate);
|
||||
reg.addr = (uint64_t)&data;
|
||||
kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®);
|
||||
|
||||
#if 0
|
||||
/* x0...x3 = 0 */
|
||||
data = 0;
|
||||
reg.id = ARM64_CORE_REG(regs.regs[0]);
|
||||
kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®);
|
||||
|
||||
reg.id = ARM64_CORE_REG(regs.regs[1]);
|
||||
kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®);
|
||||
|
||||
reg.id = ARM64_CORE_REG(regs.regs[2]);
|
||||
kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®);
|
||||
|
||||
reg.id = ARM64_CORE_REG(regs.regs[3]);
|
||||
kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®);
|
||||
#endif
|
||||
|
||||
/* set start address */
|
||||
data = elf_entry;
|
||||
reg.id = ARM64_CORE_REG(regs.pc);
|
||||
kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®);
|
||||
|
||||
if (gic_fd > 0) {
|
||||
int lines = 1;
|
||||
uint32_t nr_irqs = lines * 32 + GIC_SPI_IRQ_BASE;
|
||||
struct kvm_device_attr nr_irqs_attr = {
|
||||
.group = KVM_DEV_ARM_VGIC_GRP_NR_IRQS,
|
||||
.addr = (uint64_t)&nr_irqs,
|
||||
};
|
||||
struct kvm_device_attr vgic_init_attr = {
|
||||
.group = KVM_DEV_ARM_VGIC_GRP_CTRL,
|
||||
.attr = KVM_DEV_ARM_VGIC_CTRL_INIT,
|
||||
};
|
||||
|
||||
kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &nr_irqs_attr);
|
||||
kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &vgic_init_attr);
|
||||
}
|
||||
|
||||
// only one core is able to enter startup code
|
||||
// => the wait for the predecessor core
|
||||
while (*((volatile uint32_t*) (mboot + 0x120)) < cpuid)
|
||||
pthread_yield();
|
||||
*((volatile uint32_t*) (mboot + 0x130)) = cpuid;
|
||||
}
|
||||
|
||||
void init_kvm_arch(void)
|
||||
{
|
||||
guest_mem = mmap(NULL, guest_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (guest_mem == MAP_FAILED)
|
||||
err(1, "mmap failed");
|
||||
|
||||
const char* merge = getenv("HERMIT_MERGEABLE");
|
||||
if (merge && (strcmp(merge, "0") != 0)) {
|
||||
/*
|
||||
* The KSM feature is intended for applications that generate
|
||||
* many instances of the same data (e.g., virtualization systems
|
||||
* such as KVM). It can consume a lot of processing power!
|
||||
*/
|
||||
madvise(guest_mem, guest_size, MADV_MERGEABLE);
|
||||
if (verbose)
|
||||
fprintf(stderr, "VM uses KSN feature \"mergeable\" to reduce the memory footprint.\n");
|
||||
}
|
||||
|
||||
const char* hugepage = getenv("HERMIT_HUGEPAGE");
|
||||
if (merge && (strcmp(merge, "0") != 0)) {
|
||||
madvise(guest_mem, guest_size, MADV_HUGEPAGE);
|
||||
if (verbose)
|
||||
fprintf(stderr, "VM uses huge pages to improve the performance.\n");
|
||||
}
|
||||
|
||||
cap_read_only = kvm_ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_READONLY_MEM) <= 0 ? false : true;
|
||||
if (!cap_read_only)
|
||||
err(1, "the support of KVM_CAP_READONLY_MEM is curently required");
|
||||
|
||||
struct kvm_userspace_memory_region kvm_region = {
|
||||
.slot = 0,
|
||||
.guest_phys_addr = 0,
|
||||
.memory_size = PAGE_SIZE,
|
||||
.userspace_addr = (uint64_t) guest_mem,
|
||||
.flags = KVM_MEM_READONLY,
|
||||
};
|
||||
kvm_ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region);
|
||||
|
||||
kvm_region = (struct kvm_userspace_memory_region) {
|
||||
.slot = 1,
|
||||
.guest_phys_addr = PAGE_SIZE,
|
||||
.memory_size = guest_size - PAGE_SIZE,
|
||||
.userspace_addr = (uint64_t) guest_mem + PAGE_SIZE,
|
||||
#ifdef USE_DIRTY_LOG
|
||||
.flags = KVM_MEM_LOG_DIRTY_PAGES,
|
||||
#else
|
||||
.flags = 0,
|
||||
#endif
|
||||
};
|
||||
kvm_ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region);
|
||||
|
||||
#if 0
|
||||
/* Create interrupt controller GICv2 */
|
||||
uint64_t cpu_if_addr = GICC_BASE;
|
||||
uint64_t dist_addr = GICD_BASE;
|
||||
struct kvm_device_attr cpu_if_attr = {
|
||||
.group = KVM_DEV_ARM_VGIC_GRP_ADDR,
|
||||
.attr = KVM_VGIC_V2_ADDR_TYPE_CPU,
|
||||
.addr = (uint64_t)&cpu_if_addr,
|
||||
};
|
||||
struct kvm_create_device gic_device = {
|
||||
.flags = 0,
|
||||
.type = KVM_DEV_TYPE_ARM_VGIC_V2,
|
||||
};
|
||||
struct kvm_device_attr dist_attr = {
|
||||
.group = KVM_DEV_ARM_VGIC_GRP_ADDR,
|
||||
.attr = KVM_VGIC_V2_ADDR_TYPE_DIST,
|
||||
.addr = (uint64_t)&dist_addr,
|
||||
};
|
||||
kvm_ioctl(vmfd, KVM_CREATE_DEVICE, &gic_device);
|
||||
|
||||
gic_fd = gic_device.fd;
|
||||
kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &cpu_if_attr);
|
||||
kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &dist_attr);
|
||||
#else
|
||||
/* Create interrupt controller GICv2 */
|
||||
struct kvm_arm_device_addr gic_addr[] = {
|
||||
[0] = {
|
||||
.id = KVM_VGIC_V2_ADDR_TYPE_DIST |
|
||||
(KVM_ARM_DEVICE_VGIC_V2 << KVM_ARM_DEVICE_ID_SHIFT),
|
||||
.addr = GICD_BASE,
|
||||
},
|
||||
[1] = {
|
||||
.id = KVM_VGIC_V2_ADDR_TYPE_CPU |
|
||||
(KVM_ARM_DEVICE_VGIC_V2 << KVM_ARM_DEVICE_ID_SHIFT),
|
||||
.addr = GICC_BASE,
|
||||
}
|
||||
};
|
||||
|
||||
kvm_ioctl(vmfd, KVM_CREATE_IRQCHIP, NULL);
|
||||
kvm_ioctl(vmfd, KVM_ARM_SET_DEVICE_ADDR, &gic_addr[0]);
|
||||
kvm_ioctl(vmfd, KVM_ARM_SET_DEVICE_ADDR, &gic_addr[1]);
|
||||
#endif
|
||||
|
||||
//fprintf(stderr, "Create gicd at 0x%llx\n", GICD_BASE);
|
||||
//fprintf(stderr, "Create gicc at 0x%llx\n", GICC_BASE);
|
||||
|
||||
cap_irqfd = ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_IRQFD) <= 0 ? false : true;
|
||||
if (!cap_irqfd)
|
||||
err(1, "the support of KVM_CAP_IRQFD is curently required");
|
||||
}
|
||||
|
||||
int load_kernel(uint8_t* mem, char* path)
|
||||
{
|
||||
Elf64_Ehdr hdr;
|
||||
Elf64_Phdr *phdr = NULL;
|
||||
size_t buflen;
|
||||
size_t pstart = 0;
|
||||
int fd, ret;
|
||||
|
||||
fd = open(path, O_RDONLY);
|
||||
if (fd == -1)
|
||||
{
|
||||
perror("Unable to open file");
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = pread_in_full(fd, &hdr, sizeof(hdr), 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
// check if the program is a HermitCore file
|
||||
if (hdr.e_ident[EI_MAG0] != ELFMAG0
|
||||
|| hdr.e_ident[EI_MAG1] != ELFMAG1
|
||||
|| hdr.e_ident[EI_MAG2] != ELFMAG2
|
||||
|| hdr.e_ident[EI_MAG3] != ELFMAG3
|
||||
|| hdr.e_ident[EI_CLASS] != ELFCLASS64
|
||||
|| hdr.e_ident[EI_OSABI] != HERMIT_ELFOSABI
|
||||
|| hdr.e_type != ET_EXEC || hdr.e_machine != EM_AARCH64) {
|
||||
fprintf(stderr, "Invalid HermitCore file!\n");
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
elf_entry = hdr.e_entry;
|
||||
|
||||
buflen = hdr.e_phentsize * hdr.e_phnum;
|
||||
phdr = malloc(buflen);
|
||||
if (!phdr) {
|
||||
fprintf(stderr, "Not enough memory\n");
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = pread_in_full(fd, phdr, buflen, hdr.e_phoff);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Load all segments with type "LOAD" from the file at offset
|
||||
* p_offset, and copy that into in memory.
|
||||
*/
|
||||
for (Elf64_Half ph_i = 0; ph_i < hdr.e_phnum; ph_i++)
|
||||
{
|
||||
uint64_t paddr = phdr[ph_i].p_paddr;
|
||||
size_t offset = phdr[ph_i].p_offset;
|
||||
size_t filesz = phdr[ph_i].p_filesz;
|
||||
size_t memsz = phdr[ph_i].p_memsz;
|
||||
|
||||
if (phdr[ph_i].p_type != PT_LOAD)
|
||||
continue;
|
||||
|
||||
//fprintf(stderr, "Kernel location 0x%zx, file size 0x%zx, memory size 0x%zx\n", paddr, filesz, memsz);
|
||||
|
||||
ret = pread_in_full(fd, mem+paddr-GUEST_OFFSET, filesz, offset);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (!klog)
|
||||
klog = mem+paddr+0x1000-GUEST_OFFSET;
|
||||
if (!mboot)
|
||||
mboot = mem+paddr-GUEST_OFFSET;
|
||||
//fprintf(stderr, "mboot at %p, klog at %p\n", mboot, klog);
|
||||
|
||||
if (!pstart) {
|
||||
pstart = paddr;
|
||||
|
||||
// initialize kernel
|
||||
*((uint64_t*) (mem+paddr-GUEST_OFFSET + 0x100)) = paddr; // physical start address
|
||||
*((uint64_t*) (mem+paddr-GUEST_OFFSET + 0x108)) = guest_size - PAGE_SIZE; // physical limit
|
||||
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x110)) = get_cpufreq();
|
||||
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x128)) = ncores; // number of used cpus
|
||||
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x130)) = 0; // cpuid
|
||||
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x148)) = 1; // announce uhyve
|
||||
|
||||
|
||||
char* str = getenv("HERMIT_IP");
|
||||
if (str) {
|
||||
uint32_t ip[4];
|
||||
|
||||
sscanf(str, "%u.%u.%u.%u", ip+0, ip+1, ip+2, ip+3);
|
||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB0)) = (uint8_t) ip[0];
|
||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB1)) = (uint8_t) ip[1];
|
||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB2)) = (uint8_t) ip[2];
|
||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB3)) = (uint8_t) ip[3];
|
||||
}
|
||||
|
||||
str = getenv("HERMIT_GATEWAY");
|
||||
if (str) {
|
||||
uint32_t ip[4];
|
||||
|
||||
sscanf(str, "%u.%u.%u.%u", ip+0, ip+1, ip+2, ip+3);
|
||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB4)) = (uint8_t) ip[0];
|
||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB5)) = (uint8_t) ip[1];
|
||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB6)) = (uint8_t) ip[2];
|
||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB7)) = (uint8_t) ip[3];
|
||||
}
|
||||
str = getenv("HERMIT_MASK");
|
||||
if (str) {
|
||||
uint32_t ip[4];
|
||||
|
||||
sscanf(str, "%u.%u.%u.%u", ip+0, ip+1, ip+2, ip+3);
|
||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB8)) = (uint8_t) ip[0];
|
||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB9)) = (uint8_t) ip[1];
|
||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xBA)) = (uint8_t) ip[2];
|
||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xBB)) = (uint8_t) ip[3];
|
||||
}
|
||||
|
||||
*((uint64_t*) (mem+paddr-GUEST_OFFSET + 0xbc)) = (uint64_t) guest_mem;
|
||||
if (verbose)
|
||||
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x174)) = (uint32_t) UHYVE_UART_PORT;
|
||||
}
|
||||
*((uint64_t*) (mem+pstart-GUEST_OFFSET + 0x158)) = paddr + memsz - pstart; // total kernel size
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
if (phdr)
|
||||
free(phdr);
|
||||
|
||||
close(fd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
|
@ -1,72 +0,0 @@
|
|||
/*
|
||||
* This file was adapted from the solo5/ukvm code base, initial copyright block
|
||||
* follows:
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file
|
||||
*
|
||||
* This file is part of ukvm, a unikernel monitor.
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software
|
||||
* for any purpose with or without fee is hereby granted, provided
|
||||
* that the above copyright notice and this permission notice appear
|
||||
* in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
||||
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
||||
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Based on binutils-gdb/gdb/stubs/i386-stub.c, which is:
|
||||
* Not copyrighted.
|
||||
*/
|
||||
|
||||
#ifdef __aarch64__
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <err.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/tcp.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <netdb.h>
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <ctype.h>
|
||||
#include <linux/kvm.h>
|
||||
|
||||
#include "uhyve.h"
|
||||
#include "uhyve-gdb.h"
|
||||
#include "queue.h"
|
||||
|
||||
void uhyve_gdb_handle_exception(int vcpufd, int sigval)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void uhyve_gdb_handle_term(void)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
int uhyve_gdb_init(int vcpufd)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,40 +0,0 @@
|
|||
/*
|
||||
* This file was adapted from the solo5/ukvm code base, initial copyright block
|
||||
* follows:
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file
|
||||
*
|
||||
* This file is part of ukvm, a unikernel monitor.
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software
|
||||
* for any purpose with or without fee is hereby granted, provided
|
||||
* that the above copyright notice and this permission notice appear
|
||||
* in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
||||
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
||||
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UHYVE_GDB_AARCH64_H
|
||||
#define UHYVE_GDB_AARCH64_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
struct uhyve_gdb_regs {
|
||||
uint64_t regs[31];
|
||||
uint64_t lr;
|
||||
uint64_t pc;
|
||||
uint64_t pstate;
|
||||
uint64_t sp;
|
||||
};
|
||||
|
||||
#endif /* UHYVE_GDB_AARCH64_H */
|
|
@ -1,993 +0,0 @@
|
|||
/*
|
||||
* This file was adapted from the solo5/ukvm code base, initial copyright block
|
||||
* follows:
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file
|
||||
*
|
||||
* This file is part of ukvm, a unikernel monitor.
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software
|
||||
* for any purpose with or without fee is hereby granted, provided
|
||||
* that the above copyright notice and this permission notice appear
|
||||
* in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
||||
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
||||
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Based on binutils-gdb/gdb/stubs/i386-stub.c, which is:
|
||||
* Not copyrighted.
|
||||
*/
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <err.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/tcp.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <netdb.h>
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <ctype.h>
|
||||
#include <linux/kvm.h>
|
||||
|
||||
#include "uhyve.h"
|
||||
#include "uhyve-gdb.h"
|
||||
#include "queue.h"
|
||||
|
||||
struct breakpoint_t {
|
||||
gdb_breakpoint_type type;
|
||||
uint64_t addr;
|
||||
size_t len;
|
||||
uint32_t refcount;
|
||||
uint8_t saved_insn; /* for software breakpoints */
|
||||
|
||||
SLIST_ENTRY(breakpoint_t) entries;
|
||||
};
|
||||
|
||||
SLIST_HEAD(breakpoints_head, breakpoint_t);
|
||||
static struct breakpoints_head sw_breakpoints;
|
||||
static struct breakpoints_head hw_breakpoints;
|
||||
|
||||
/* The Intel SDM specifies that the DR7 has space for 4 breakpoints. */
|
||||
#define MAX_HW_BREAKPOINTS 4
|
||||
static uint32_t nr_hw_breakpoints = 0;
|
||||
|
||||
/* Stepping is disabled by default. */
|
||||
static bool stepping = false;
|
||||
/* This is the trap instruction used for software breakpoints. */
|
||||
static const uint8_t int3 = 0xcc;
|
||||
|
||||
static int socket_fd = 0;
|
||||
static int portno = 1234; /* Default port number */
|
||||
static const char hexchars[] = "0123456789abcdef";
|
||||
|
||||
#define BUFMAX 4096
|
||||
static char in_buffer[BUFMAX];
|
||||
static unsigned char registers[BUFMAX];
|
||||
|
||||
/* uhyve variables */
|
||||
extern size_t guest_size;
|
||||
extern uint8_t *guest_mem;
|
||||
|
||||
void *uhyve_checked_gpa_p(uint64_t gpa, size_t sz, uint8_t * chk_guest_mem,
|
||||
size_t chk_guest_size, const char *file, int line);
|
||||
|
||||
/* The actual error code is ignored by GDB, so any number will do. */
|
||||
#define GDB_ERROR_MSG "E01"
|
||||
|
||||
static int hex(unsigned char ch)
|
||||
{
|
||||
if ((ch >= 'a') && (ch <= 'f'))
|
||||
return (ch - 'a' + 10);
|
||||
if ((ch >= '0') && (ch <= '9'))
|
||||
return (ch - '0');
|
||||
if ((ch >= 'A') && (ch <= 'F'))
|
||||
return (ch - 'A' + 10);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Converts the (count) bytes of memory pointed to by mem into an hex string in
|
||||
* buf. Returns a pointer to the last char put in buf (null).
|
||||
*/
|
||||
static char *mem2hex(const unsigned char *mem, char *buf, size_t count)
|
||||
{
|
||||
size_t i;
|
||||
unsigned char ch;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
ch = *mem++;
|
||||
*buf++ = hexchars[ch >> 4];
|
||||
*buf++ = hexchars[ch % 16];
|
||||
}
|
||||
*buf = 0;
|
||||
return buf;
|
||||
}
|
||||
|
||||
/*
|
||||
* Converts the hex string in buf into binary in mem.
|
||||
* Returns a pointer to the character AFTER the last byte written.
|
||||
*/
|
||||
static unsigned char *hex2mem(const char *buf, unsigned char *mem, size_t count)
|
||||
{
|
||||
size_t i;
|
||||
unsigned char ch;
|
||||
|
||||
assert(strlen(buf) >= (2 * count));
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
ch = hex(*buf++) << 4;
|
||||
ch = ch + hex(*buf++);
|
||||
*mem++ = ch;
|
||||
}
|
||||
return mem;
|
||||
}
|
||||
|
||||
static int wait_for_connect(void)
|
||||
{
|
||||
int listen_socket_fd;
|
||||
struct sockaddr_in server_addr, client_addr;
|
||||
struct protoent *protoent;
|
||||
struct in_addr ip_addr;
|
||||
socklen_t len;
|
||||
int opt;
|
||||
|
||||
listen_socket_fd = socket(AF_INET, SOCK_STREAM, 0);
|
||||
if (listen_socket_fd == -1) {
|
||||
err(1, "Could not create socket");
|
||||
return -1;
|
||||
}
|
||||
|
||||
opt = 1;
|
||||
if (setsockopt(listen_socket_fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) == -1)
|
||||
err(1, "setsockopt(SO_REUSEADDR) failed");
|
||||
|
||||
server_addr.sin_family = AF_INET;
|
||||
server_addr.sin_addr.s_addr = htonl(INADDR_ANY);
|
||||
server_addr.sin_port = htons(portno);
|
||||
|
||||
if (bind(listen_socket_fd, (struct sockaddr *)&server_addr,
|
||||
sizeof(server_addr)) == -1) {
|
||||
err(1, "bind failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (listen(listen_socket_fd, 0) == -1) {
|
||||
err(1, "listen failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
warnx("Waiting for a debugger. Connect to it like this:");
|
||||
warnx("\tgdb --ex=\"target remote localhost:%d\" UNIKERNEL", portno);
|
||||
|
||||
len = sizeof(client_addr);
|
||||
socket_fd =
|
||||
accept(listen_socket_fd, (struct sockaddr *)&client_addr, &len);
|
||||
if (socket_fd == -1) {
|
||||
err(1, "accept failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
close(listen_socket_fd);
|
||||
|
||||
protoent = getprotobyname("tcp");
|
||||
if (!protoent) {
|
||||
err(1, "getprotobyname (\"tcp\") failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
opt = 1;
|
||||
if (setsockopt(socket_fd, protoent->p_proto, TCP_NODELAY, &opt,
|
||||
sizeof(opt)) == -1)
|
||||
err(1, "setsockopt(TCP_NODELAY) failed");
|
||||
|
||||
ip_addr.s_addr = client_addr.sin_addr.s_addr;
|
||||
warnx("Connection from debugger at %s", inet_ntoa(ip_addr));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int send_char(char ch)
|
||||
{
|
||||
/* TCP is already buffering, so no need to buffer here as well. */
|
||||
return send(socket_fd, &ch, 1, 0);
|
||||
}
|
||||
|
||||
static char recv_char(void)
|
||||
{
|
||||
unsigned char ch;
|
||||
int ret;
|
||||
|
||||
ret = recv(socket_fd, &ch, 1, 0);
|
||||
if (ret < 0) {
|
||||
return -1;
|
||||
} else if (ret == 0) {
|
||||
/* The peer has performed an orderly shutdown (from "man recv"). */
|
||||
warnx("GDB: Connection closed from client");
|
||||
close(socket_fd);
|
||||
socket_fd = -1;
|
||||
return -1;
|
||||
} else {
|
||||
assert(ret == 1);
|
||||
}
|
||||
|
||||
/* All GDB remote packets are encoded in ASCII. */
|
||||
assert(isascii(ch));
|
||||
|
||||
return (char)ch;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scan for the sequence $<data>#<checksum>
|
||||
* Returns a null terminated string.
|
||||
*/
|
||||
static char *recv_packet(void)
|
||||
{
|
||||
char *buffer = &in_buffer[0];
|
||||
unsigned char checksum;
|
||||
unsigned char xmitcsum;
|
||||
char ch;
|
||||
int count;
|
||||
|
||||
while (1) {
|
||||
/* wait around for the start character, ignore all other characters */
|
||||
do {
|
||||
ch = recv_char();
|
||||
if (ch == -1)
|
||||
return NULL;
|
||||
}
|
||||
while (ch != '$');
|
||||
|
||||
retry:
|
||||
checksum = 0;
|
||||
xmitcsum = -1;
|
||||
count = 0;
|
||||
|
||||
/* now, read until a # or end of buffer is found */
|
||||
while (count < BUFMAX - 1) {
|
||||
ch = recv_char();
|
||||
if (ch == -1)
|
||||
return NULL;
|
||||
if (ch == '$')
|
||||
goto retry;
|
||||
if (ch == '#')
|
||||
break;
|
||||
checksum = checksum + ch;
|
||||
buffer[count] = ch;
|
||||
count = count + 1;
|
||||
}
|
||||
/* Let's make this a C string. */
|
||||
buffer[count] = '\0';
|
||||
|
||||
if (ch == '#') {
|
||||
ch = recv_char();
|
||||
if (ch == -1)
|
||||
return NULL;
|
||||
xmitcsum = hex(ch) << 4;
|
||||
ch = recv_char();
|
||||
if (ch == -1)
|
||||
return NULL;
|
||||
xmitcsum += hex(ch);
|
||||
|
||||
if (checksum != xmitcsum) {
|
||||
warnx("Failed checksum from GDB. "
|
||||
"My count = 0x%x, sent=0x%x. buf=%s",
|
||||
checksum, xmitcsum, buffer);
|
||||
if (send_char('-') == -1)
|
||||
/* Unsuccessful reply to a failed checksum */
|
||||
err(1,
|
||||
"GDB: Could not send an ACK to the debugger.");
|
||||
} else {
|
||||
if (send_char('+') == -1)
|
||||
/* Unsuccessful reply to a successful transfer */
|
||||
err(1,
|
||||
"GDB: Could not send an ACK to the debugger.");
|
||||
|
||||
/* if a sequence char is present, reply the sequence ID */
|
||||
if (buffer[2] == ':') {
|
||||
send_char(buffer[0]);
|
||||
send_char(buffer[1]);
|
||||
|
||||
return &buffer[3];
|
||||
}
|
||||
|
||||
return &buffer[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Send packet of the form $<packet info>#<checksum> without waiting for an ACK
|
||||
* from the debugger. Only send_response
|
||||
*/
|
||||
static void send_packet_no_ack(char *buffer)
|
||||
{
|
||||
unsigned char checksum;
|
||||
int count;
|
||||
char ch;
|
||||
|
||||
/*
|
||||
* We ignore all send_char errors as we either: (1) care about sending our
|
||||
* packet and we will keep sending it until we get a good ACK from the
|
||||
* debugger, or (2) not care and just send it as a best-effort notification
|
||||
* when dying.
|
||||
*/
|
||||
|
||||
send_char('$');
|
||||
checksum = 0;
|
||||
count = 0;
|
||||
|
||||
ch = buffer[count];
|
||||
while (ch) {
|
||||
send_char(ch);
|
||||
checksum += ch;
|
||||
count += 1;
|
||||
ch = buffer[count];
|
||||
}
|
||||
|
||||
send_char('#');
|
||||
send_char(hexchars[checksum >> 4]);
|
||||
send_char(hexchars[checksum % 16]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Send a packet and wait for a successful ACK of '+' from the debugger.
|
||||
* An ACK of '-' means that we have to resend.
|
||||
*/
|
||||
static void send_packet(char *buffer)
|
||||
{
|
||||
char ch;
|
||||
|
||||
for (;;) {
|
||||
send_packet_no_ack(buffer);
|
||||
ch = recv_char();
|
||||
if (ch == -1)
|
||||
return;
|
||||
if (ch == '+')
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#define send_error_msg() do { send_packet(GDB_ERROR_MSG); } while (0)
|
||||
|
||||
#define send_not_supported_msg() do { send_packet(""); } while (0)
|
||||
|
||||
#define send_okay_msg() do { send_packet("OK"); } while (0)
|
||||
|
||||
/*
|
||||
* This is a response to 'c' and 's'. In other words, the VM was
|
||||
* running and it stopped for some reason. This message is to tell the
|
||||
* debugger that whe stopped (and why). The argument code can take these
|
||||
* and some other values:
|
||||
* - 'S AA' received signal AA
|
||||
* - 'W AA' exited with return code AA
|
||||
* - 'X AA' exited with signal AA
|
||||
* https://sourceware.org/gdb/onlinedocs/gdb/Stop-Reply-Packets.html
|
||||
*/
|
||||
static void send_response(char code, int sigval, bool wait_for_ack)
|
||||
{
|
||||
char obuf[BUFMAX];
|
||||
snprintf(obuf, sizeof(obuf), "%c%02x", code, sigval);
|
||||
if (wait_for_ack)
|
||||
send_packet(obuf);
|
||||
else
|
||||
send_packet_no_ack(obuf);
|
||||
}
|
||||
|
||||
static void gdb_handle_exception(int vcpufd, int sigval)
|
||||
{
|
||||
char *packet;
|
||||
char obuf[BUFMAX];
|
||||
|
||||
/* Notify the debugger of our last signal */
|
||||
send_response('S', sigval, true);
|
||||
|
||||
for (;;) {
|
||||
uint64_t addr = 0, result;
|
||||
gdb_breakpoint_type type;
|
||||
size_t len;
|
||||
int command, ret;
|
||||
|
||||
packet = recv_packet();
|
||||
if (packet == NULL)
|
||||
/* Without a packet with instructions with what to do next there is
|
||||
* really nothing we can do to recover. So, dying. */
|
||||
errx(1,
|
||||
"GDB: Exiting as we could not receive the next command from "
|
||||
"the debugger.");
|
||||
|
||||
/*
|
||||
* From the GDB manual:
|
||||
* "At a minimum, a stub is required to support the ‘g’ and ‘G’
|
||||
* commands for register access, and the ‘m’ and ‘M’ commands
|
||||
* for memory access. Stubs that only control single-threaded
|
||||
* targets can implement run control with the ‘c’ (continue),
|
||||
* and ‘s’ (step) commands."
|
||||
*/
|
||||
command = packet[0];
|
||||
switch (command) {
|
||||
case 's':
|
||||
{
|
||||
/* Step */
|
||||
if (sscanf(packet, "s%" PRIx64, &addr) == 1) {
|
||||
/* not supported, but that's OK as GDB will retry with the
|
||||
* slower version of this: update all registers. */
|
||||
send_not_supported_msg();
|
||||
break; /* Wait for another command. */
|
||||
}
|
||||
if (uhyve_gdb_enable_ss(vcpufd) == -1) {
|
||||
send_error_msg();
|
||||
break; /* Wait for another command. */
|
||||
}
|
||||
return; /* Continue with program */
|
||||
}
|
||||
|
||||
case 'c':
|
||||
{
|
||||
/* Continue (and disable stepping for the next instruction) */
|
||||
if (sscanf(packet, "c%" PRIx64, &addr) == 1) {
|
||||
/* not supported, but that's OK as GDB will retry with the
|
||||
* slower version of this: update all registers. */
|
||||
send_not_supported_msg();
|
||||
break; /* Wait for another command. */
|
||||
}
|
||||
if (uhyve_gdb_disable_ss(vcpufd) == -1) {
|
||||
send_error_msg();
|
||||
break; /* Wait for another command. */
|
||||
}
|
||||
return; /* Continue with program */
|
||||
}
|
||||
|
||||
case 'm':
|
||||
{
|
||||
/* Read memory content */
|
||||
if (sscanf(packet, "m%" PRIx64 ",%zx", &addr, &len) != 2) {
|
||||
send_error_msg();
|
||||
break;
|
||||
}
|
||||
/* translate addr into guest phys first. it is
|
||||
* needed if the address falls into the non directly mapped
|
||||
* part of the virtual address space (ex: heap/stack) */
|
||||
uint64_t phys_addr;
|
||||
|
||||
if (uhyve_gdb_guest_virt_to_phys(vcpufd, addr, &phys_addr)) {
|
||||
send_error_msg();
|
||||
} else {
|
||||
mem2hex(guest_mem + phys_addr, obuf, len);
|
||||
send_packet(obuf);
|
||||
}
|
||||
break; /* Wait for another command. */
|
||||
}
|
||||
|
||||
case 'M':
|
||||
{
|
||||
/* Write memory content */
|
||||
uint64_t phys_addr;
|
||||
|
||||
assert(strlen(packet) <= sizeof(obuf));
|
||||
if (sscanf(packet, "M%" PRIx64 ",%zx:%s", &addr, &len, obuf) != 3) {
|
||||
send_error_msg();
|
||||
break;
|
||||
}
|
||||
|
||||
/* translate to guest physical address first */
|
||||
if (uhyve_gdb_guest_virt_to_phys(vcpufd, addr, &phys_addr)) {
|
||||
send_error_msg();
|
||||
} else {
|
||||
hex2mem(obuf, guest_mem + phys_addr,
|
||||
len);
|
||||
send_okay_msg();
|
||||
}
|
||||
break; /* Wait for another command. */
|
||||
}
|
||||
|
||||
case 'g':
|
||||
{
|
||||
/* Read general registers */
|
||||
len = BUFMAX;
|
||||
if (uhyve_gdb_read_registers(vcpufd, registers, &len) == -1) {
|
||||
send_error_msg();
|
||||
} else {
|
||||
mem2hex(registers, obuf, len);
|
||||
send_packet(obuf);
|
||||
}
|
||||
break; /* Wait for another command. */
|
||||
}
|
||||
|
||||
case 'G':
|
||||
{
|
||||
/* Write general registers */
|
||||
len = BUFMAX;
|
||||
/* Call read_registers just to get len (not very efficient). */
|
||||
if (uhyve_gdb_read_registers(vcpufd, registers, &len) == -1) {
|
||||
send_error_msg();
|
||||
break;
|
||||
}
|
||||
/* Packet looks like 'Gxxxxx', so we have to skip the first char */
|
||||
hex2mem(packet + 1, registers, len);
|
||||
if (uhyve_gdb_write_registers(vcpufd, registers, len) == -1) {
|
||||
send_error_msg();
|
||||
break;
|
||||
}
|
||||
send_okay_msg();
|
||||
break; /* Wait for another command. */
|
||||
}
|
||||
|
||||
case '?':
|
||||
{
|
||||
/* Return last signal */
|
||||
send_response('S', sigval, true);
|
||||
break; /* Wait for another command. */
|
||||
}
|
||||
|
||||
case 'Z':
|
||||
/* Insert a breakpoint */
|
||||
case 'z':
|
||||
{
|
||||
/* Remove a breakpoint */
|
||||
packet++;
|
||||
if (sscanf(packet, "%" PRIx32 ",%" PRIx64 ",%zx",
|
||||
&type, &addr, &len) != 3) {
|
||||
send_error_msg();
|
||||
break;
|
||||
}
|
||||
uint64_t phys_addr;
|
||||
if (uhyve_gdb_guest_virt_to_phys(vcpufd, addr, &phys_addr)) {
|
||||
send_error_msg();
|
||||
} else {
|
||||
if (command == 'Z')
|
||||
ret = uhyve_gdb_add_breakpoint(vcpufd, type, phys_addr, len);
|
||||
else
|
||||
ret = uhyve_gdb_remove_breakpoint(vcpufd, type, phys_addr, len);
|
||||
|
||||
if (ret == -1)
|
||||
send_error_msg();
|
||||
else
|
||||
send_okay_msg();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'k':
|
||||
{
|
||||
warnx("Debugger asked us to quit");
|
||||
send_okay_msg();
|
||||
break;
|
||||
}
|
||||
|
||||
case 'D':
|
||||
{
|
||||
warnx("Debugger detached");
|
||||
send_okay_msg();
|
||||
return;
|
||||
}
|
||||
|
||||
default:
|
||||
send_not_supported_msg();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void uhyve_gdb_handle_exception(int vcpufd, int sigval)
|
||||
{
|
||||
gdb_handle_exception(vcpufd, sigval);
|
||||
}
|
||||
|
||||
static void gdb_stub_start(int vcpufd)
|
||||
{
|
||||
wait_for_connect();
|
||||
gdb_handle_exception(vcpufd, GDB_SIGNAL_FIRST);
|
||||
}
|
||||
|
||||
int uhyve_gdb_init(int vcpufd)
|
||||
{
|
||||
/*
|
||||
* GDB clients can change memory, and software breakpoints work by
|
||||
* replacing instructions with int3's.
|
||||
*/
|
||||
if (mprotect(guest_mem, guest_size, PROT_READ | PROT_WRITE | PROT_EXEC) == -1)
|
||||
err(1, "GDB: Cannot remove guest memory protection");
|
||||
|
||||
/* Notify the debugger that we are dying. */
|
||||
atexit(uhyve_gdb_handle_term);
|
||||
|
||||
gdb_stub_start(vcpufd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void uhyve_gdb_handle_term(void)
|
||||
{
|
||||
/* TODO: this is graceful shutdown forcing the return value to zero,
|
||||
* any way to pass an error code when things go wrong ? */
|
||||
send_response('W', 0, true);
|
||||
}
|
||||
|
||||
static int kvm_arch_insert_sw_breakpoint(struct breakpoint_t *bp)
|
||||
{
|
||||
uint8_t *insn = bp->addr + guest_mem;
|
||||
bp->saved_insn = *insn;
|
||||
/*
|
||||
* We just modify the first byte even if the instruction is multi-byte.
|
||||
* The debugger keeps track of the length of the instruction. The
|
||||
* consequence of this is that we don't have to set all other bytes as
|
||||
* NOP's.
|
||||
*/
|
||||
*insn = int3;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_arch_remove_sw_breakpoint(struct breakpoint_t *bp)
|
||||
{
|
||||
uint8_t *insn = bp->addr + guest_mem;
|
||||
assert(*insn == int3);
|
||||
*insn = bp->saved_insn;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int uhyve_gdb_update_guest_debug(int vcpufd)
|
||||
{
|
||||
struct kvm_guest_debug dbg = { 0 };
|
||||
struct breakpoint_t *bp;
|
||||
const uint8_t type_code[] = {
|
||||
/* Break on instruction execution only. */
|
||||
[GDB_BREAKPOINT_HW] = 0x0,
|
||||
/* Break on data writes only. */
|
||||
[GDB_WATCHPOINT_WRITE] = 0x1,
|
||||
/* Break on data reads only. */
|
||||
[GDB_WATCHPOINT_READ] = 0x2,
|
||||
/* Break on data reads or writes but not instruction fetches. */
|
||||
[GDB_WATCHPOINT_ACCESS] = 0x3
|
||||
};
|
||||
const uint8_t len_code[] = {
|
||||
/*
|
||||
* 00 — 1-byte length.
|
||||
* 01 — 2-byte length.
|
||||
* 10 — 8-byte length.
|
||||
* 11 — 4-byte length.
|
||||
*/
|
||||
[1] = 0x0,[2] = 0x1,[4] = 0x3,[8] = 0x2
|
||||
};
|
||||
int n = 0;
|
||||
|
||||
if (stepping)
|
||||
dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
|
||||
|
||||
if (!SLIST_EMPTY(&sw_breakpoints))
|
||||
dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
|
||||
|
||||
if (!SLIST_EMPTY(&hw_breakpoints)) {
|
||||
dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
|
||||
|
||||
/* Enable global breakpointing (across all threads) on the control
|
||||
* debug register. */
|
||||
dbg.arch.debugreg[7] = 1 << 9;
|
||||
dbg.arch.debugreg[7] |= 1 << 10;
|
||||
SLIST_FOREACH(bp, &hw_breakpoints, entries) {
|
||||
assert(bp->type != GDB_BREAKPOINT_SW);
|
||||
dbg.arch.debugreg[n] = bp->addr;
|
||||
/* global breakpointing */
|
||||
dbg.arch.debugreg[7] |= (2 << (n * 2));
|
||||
/* read/write fields */
|
||||
dbg.arch.debugreg[7] |=
|
||||
(type_code[bp->type] << (16 + n * 4));
|
||||
/* Length fields */
|
||||
dbg.arch.debugreg[7] |=
|
||||
((uint32_t) len_code[bp->len] << (18 + n * 4));
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
kvm_ioctl(vcpufd, KVM_SET_GUEST_DEBUG, &dbg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct breakpoint_t *bp_list_find(gdb_breakpoint_type type,
|
||||
uint64_t addr, size_t len)
|
||||
{
|
||||
struct breakpoint_t *bp;
|
||||
|
||||
switch (type) {
|
||||
case GDB_BREAKPOINT_SW:
|
||||
SLIST_FOREACH(bp, &sw_breakpoints, entries) {
|
||||
if (bp->addr == addr && bp->len == len)
|
||||
return bp;
|
||||
}
|
||||
break;
|
||||
|
||||
case GDB_BREAKPOINT_HW:
|
||||
case GDB_WATCHPOINT_WRITE:
|
||||
case GDB_WATCHPOINT_READ:
|
||||
case GDB_WATCHPOINT_ACCESS:
|
||||
/* We only support hardware watchpoints. */
|
||||
SLIST_FOREACH(bp, &hw_breakpoints, entries) {
|
||||
if (bp->addr == addr && bp->len == len)
|
||||
return bp;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Adds a new breakpoint to the list of breakpoints. Returns the found or
|
||||
* created breakpoint. Returns NULL in case of failure or if we reached the max
|
||||
* number of allowed hardware breakpoints (4).
|
||||
*/
|
||||
static struct breakpoint_t *bp_list_insert(gdb_breakpoint_type type,
|
||||
uint64_t addr, size_t len)
|
||||
{
|
||||
struct breakpoint_t *bp;
|
||||
|
||||
bp = bp_list_find(type, addr, len);
|
||||
if (bp) {
|
||||
bp->refcount++;
|
||||
return bp;
|
||||
}
|
||||
|
||||
bp = malloc(sizeof(struct breakpoint_t));
|
||||
if (bp == NULL)
|
||||
return NULL;
|
||||
|
||||
bp->addr = addr;
|
||||
bp->type = type;
|
||||
bp->len = len;
|
||||
bp->refcount = 1;
|
||||
|
||||
switch (type) {
|
||||
case GDB_BREAKPOINT_SW:
|
||||
SLIST_INSERT_HEAD(&sw_breakpoints, bp, entries);
|
||||
break;
|
||||
|
||||
case GDB_BREAKPOINT_HW:
|
||||
case GDB_WATCHPOINT_WRITE:
|
||||
case GDB_WATCHPOINT_READ:
|
||||
case GDB_WATCHPOINT_ACCESS:
|
||||
/* We only support hardware watchpoints. */
|
||||
if (nr_hw_breakpoints == MAX_HW_BREAKPOINTS)
|
||||
return NULL;
|
||||
nr_hw_breakpoints++;
|
||||
SLIST_INSERT_HEAD(&hw_breakpoints, bp, entries);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
return bp;
|
||||
}
|
||||
|
||||
/*
|
||||
* Removes a breakpoint from the list of breakpoints.
|
||||
* Returns -1 if the breakpoint is not in the list.
|
||||
*/
|
||||
static int bp_list_remove(gdb_breakpoint_type type, uint64_t addr, size_t len)
|
||||
{
|
||||
struct breakpoint_t *bp = NULL;
|
||||
|
||||
bp = bp_list_find(type, addr, len);
|
||||
if (!bp)
|
||||
return -1;
|
||||
|
||||
bp->refcount--;
|
||||
if (bp->refcount > 0)
|
||||
return 0;
|
||||
|
||||
switch (type) {
|
||||
case GDB_BREAKPOINT_SW:
|
||||
SLIST_REMOVE(&sw_breakpoints, bp, breakpoint_t, entries);
|
||||
break;
|
||||
|
||||
case GDB_BREAKPOINT_HW:
|
||||
case GDB_WATCHPOINT_WRITE:
|
||||
case GDB_WATCHPOINT_READ:
|
||||
case GDB_WATCHPOINT_ACCESS:
|
||||
/* We only support hardware watchpoints. */
|
||||
SLIST_REMOVE(&hw_breakpoints, bp, breakpoint_t, entries);
|
||||
nr_hw_breakpoints--;
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
free(bp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int uhyve_gdb_read_registers(int vcpufd, uint8_t * registers, size_t * len)
|
||||
{
|
||||
struct kvm_regs kregs;
|
||||
struct kvm_sregs sregs;
|
||||
struct uhyve_gdb_regs *gregs = (struct uhyve_gdb_regs *)registers;
|
||||
int ret;
|
||||
|
||||
kvm_ioctl(vcpufd, KVM_GET_REGS, &kregs);
|
||||
kvm_ioctl(vcpufd, KVM_GET_SREGS, &sregs);
|
||||
|
||||
if (*len < sizeof(struct uhyve_gdb_regs))
|
||||
return -1;
|
||||
|
||||
*len = sizeof(struct uhyve_gdb_regs);
|
||||
|
||||
gregs->rax = kregs.rax;
|
||||
gregs->rbx = kregs.rbx;
|
||||
gregs->rcx = kregs.rcx;
|
||||
gregs->rdx = kregs.rdx;
|
||||
|
||||
gregs->rsi = kregs.rsi;
|
||||
gregs->rdi = kregs.rdi;
|
||||
gregs->rbp = kregs.rbp;
|
||||
gregs->rsp = kregs.rsp;
|
||||
|
||||
gregs->r8 = kregs.r8;
|
||||
gregs->r9 = kregs.r9;
|
||||
gregs->r10 = kregs.r10;
|
||||
gregs->r11 = kregs.r11;
|
||||
|
||||
gregs->rip = kregs.rip;
|
||||
gregs->eflags = kregs.rflags;
|
||||
|
||||
gregs->cs = sregs.cs.selector;
|
||||
gregs->ss = sregs.ss.selector;
|
||||
gregs->ds = sregs.ds.selector;
|
||||
gregs->es = sregs.es.selector;
|
||||
gregs->fs = sregs.fs.selector;
|
||||
gregs->gs = sregs.gs.selector;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int uhyve_gdb_write_registers(int vcpufd, uint8_t * registers, size_t len)
|
||||
{
|
||||
struct kvm_regs kregs;
|
||||
struct kvm_sregs sregs;
|
||||
struct uhyve_gdb_regs *gregs = (struct uhyve_gdb_regs *)registers;
|
||||
int ret;
|
||||
|
||||
/* Let's read all registers just in case we miss filling one of them. */
|
||||
kvm_ioctl(vcpufd, KVM_GET_REGS, &kregs);
|
||||
kvm_ioctl(vcpufd, KVM_GET_SREGS, &sregs);
|
||||
|
||||
if (len < sizeof(struct uhyve_gdb_regs))
|
||||
return -1;
|
||||
|
||||
kregs.rax = gregs->rax;
|
||||
kregs.rbx = gregs->rbx;
|
||||
kregs.rcx = gregs->rcx;
|
||||
kregs.rdx = gregs->rdx;
|
||||
|
||||
kregs.rsi = gregs->rsi;
|
||||
kregs.rdi = gregs->rdi;
|
||||
kregs.rbp = gregs->rbp;
|
||||
kregs.rsp = gregs->rsp;
|
||||
|
||||
kregs.r8 = gregs->r8;
|
||||
kregs.r9 = gregs->r9;
|
||||
kregs.r10 = gregs->r10;
|
||||
kregs.r11 = gregs->r11;
|
||||
|
||||
kregs.rip = gregs->rip;
|
||||
kregs.rflags = gregs->eflags;
|
||||
|
||||
/* XXX: not sure if just setting .selector is enough. */
|
||||
sregs.cs.selector = gregs->cs;
|
||||
sregs.ss.selector = gregs->ss;
|
||||
sregs.ds.selector = gregs->ds;
|
||||
sregs.es.selector = gregs->es;
|
||||
sregs.fs.selector = gregs->fs;
|
||||
sregs.gs.selector = gregs->gs;
|
||||
|
||||
kvm_ioctl(vcpufd, KVM_SET_REGS, &kregs);
|
||||
kvm_ioctl(vcpufd, KVM_SET_SREGS, &sregs);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int uhyve_gdb_add_breakpoint(int vcpufd, gdb_breakpoint_type type,
|
||||
uint64_t addr, size_t len)
|
||||
{
|
||||
struct breakpoint_t *bp;
|
||||
|
||||
assert(type < GDB_BREAKPOINT_MAX);
|
||||
|
||||
if (bp_list_find(type, addr, len))
|
||||
return 0;
|
||||
|
||||
bp = bp_list_insert(type, addr, len);
|
||||
if (bp == NULL)
|
||||
return -1;
|
||||
|
||||
if (type == GDB_BREAKPOINT_SW)
|
||||
kvm_arch_insert_sw_breakpoint(bp);
|
||||
|
||||
if (uhyve_gdb_update_guest_debug(vcpufd) == -1)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int uhyve_gdb_remove_breakpoint(int vcpufd, gdb_breakpoint_type type,
|
||||
uint64_t addr, size_t len)
|
||||
{
|
||||
struct breakpoint_t *bp;
|
||||
|
||||
assert(type < GDB_BREAKPOINT_MAX);
|
||||
|
||||
if (type == GDB_BREAKPOINT_SW) {
|
||||
bp = bp_list_find(type, addr, len);
|
||||
if (bp)
|
||||
kvm_arch_remove_sw_breakpoint(bp);
|
||||
}
|
||||
|
||||
if (bp_list_remove(type, addr, len) == -1)
|
||||
return -1;
|
||||
|
||||
if (uhyve_gdb_update_guest_debug(vcpufd) == -1)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int uhyve_gdb_enable_ss(int vcpufd)
|
||||
{
|
||||
stepping = true;
|
||||
|
||||
if (uhyve_gdb_update_guest_debug(vcpufd) == -1)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int uhyve_gdb_disable_ss(int vcpufd)
|
||||
{
|
||||
stepping = false;
|
||||
|
||||
if (uhyve_gdb_update_guest_debug(vcpufd) == -1)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Convert a guest virtual address into the correspondign physical address */
|
||||
int uhyve_gdb_guest_virt_to_phys(int vcpufd, const uint64_t virt, uint64_t * phys)
|
||||
{
|
||||
struct kvm_translation kt;
|
||||
|
||||
kt.linear_address = virt;
|
||||
kvm_ioctl(vcpufd, KVM_TRANSLATE, &kt);
|
||||
|
||||
*phys = kt.physical_address;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,77 +0,0 @@
|
|||
/*
|
||||
* This file was adapted from the solo5/ukvm code base, initial copyright block
|
||||
* follows:
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file
|
||||
*
|
||||
* This file is part of ukvm, a unikernel monitor.
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software
|
||||
* for any purpose with or without fee is hereby granted, provided
|
||||
* that the above copyright notice and this permission notice appear
|
||||
* in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
||||
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
||||
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UHYVE_GDB_X86_64_H
|
||||
#define UHYVE_GDB_X86_64_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
/*
|
||||
* X86_64
|
||||
* XXX: Can't find any gdb include file with the list of registers per
|
||||
* architecture (something like ia64_regs.h). The closest I can get is a
|
||||
* list of the registers from gdb (debugging an ordinary x86_64 binary):
|
||||
*
|
||||
* (gdb) info registers
|
||||
* rax 0x0 0
|
||||
* rbx 0x0 0
|
||||
* rcx 0x0 0
|
||||
* ...
|
||||
* fs 0x0 0
|
||||
* gs 0x0 0
|
||||
* (gdb)
|
||||
*/
|
||||
|
||||
struct uhyve_gdb_regs {
|
||||
uint64_t rax;
|
||||
uint64_t rbx;
|
||||
uint64_t rcx;
|
||||
uint64_t rdx;
|
||||
uint64_t rsi;
|
||||
uint64_t rdi;
|
||||
uint64_t rbp;
|
||||
uint64_t rsp;
|
||||
uint64_t r8;
|
||||
uint64_t r9;
|
||||
uint64_t r10;
|
||||
uint64_t r11;
|
||||
uint64_t r12;
|
||||
uint64_t r13;
|
||||
uint64_t r14;
|
||||
uint64_t r15;
|
||||
uint64_t rip;
|
||||
|
||||
uint32_t eflags;
|
||||
uint32_t cs;
|
||||
uint32_t ss;
|
||||
uint32_t ds;
|
||||
uint32_t es;
|
||||
uint32_t fs;
|
||||
uint32_t gs;
|
||||
uint8_t st[8][10];
|
||||
};
|
||||
|
||||
#endif /* UHYVE_GDB_X86_64_H */
|
|
@ -1,76 +0,0 @@
|
|||
/*
|
||||
* This file was adapted from the solo5/ukvm code base, initial copyright block
|
||||
* follows:
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file
|
||||
*
|
||||
* This file is part of ukvm, a unikernel monitor.
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software
|
||||
* for any purpose with or without fee is hereby granted, provided
|
||||
* that the above copyright notice and this permission notice appear
|
||||
* in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
||||
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
||||
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UHYVE_GDB_H
|
||||
#define UHYVE_GDB_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
/* GDB breakpoint/watchpoint types */
|
||||
typedef enum _gdb_breakpoint_type {
|
||||
/* Do not change these. The values have to match on the GDB client
|
||||
* side. */
|
||||
GDB_BREAKPOINT_SW = 0,
|
||||
GDB_BREAKPOINT_HW,
|
||||
GDB_WATCHPOINT_WRITE,
|
||||
GDB_WATCHPOINT_READ,
|
||||
GDB_WATCHPOINT_ACCESS,
|
||||
GDB_BREAKPOINT_MAX
|
||||
} gdb_breakpoint_type;
|
||||
|
||||
#define GDB_SIGNAL_FIRST 0
|
||||
#define GDB_SIGNAL_QUIT 3
|
||||
#define GDB_SIGNAL_KILL 9
|
||||
#define GDB_SIGNAL_TRAP 5
|
||||
#define GDB_SIGNAL_SEGV 11
|
||||
#define GDB_SIGNAL_TERM 15
|
||||
#define GDB_SIGNAL_IO 23
|
||||
#define GDB_SIGNAL_DEFAULT 144
|
||||
|
||||
/* prototypes */
|
||||
int uhyve_gdb_enable_ss(int vcpufd);
|
||||
int uhyve_gdb_disable_ss(int vcpufd);
|
||||
int uhyve_gdb_read_registers(int vcpufd, uint8_t *reg, size_t *len);
|
||||
int uhyve_gdb_write_registers(int vcpufd, uint8_t *reg, size_t len);
|
||||
int uhyve_gdb_add_breakpoint(int vcpufd, gdb_breakpoint_type type,
|
||||
uint64_t addr, size_t len);
|
||||
int uhyve_gdb_remove_breakpoint(int vcpufd, gdb_breakpoint_type type,
|
||||
uint64_t addr, size_t len);
|
||||
int uhyve_gdb_guest_virt_to_phys(int vcpufd, const uint64_t virt,
|
||||
uint64_t *phys);
|
||||
|
||||
/* interface with uhyve.c */
|
||||
void uhyve_gdb_handle_exception(int vcpufd, int sigval);
|
||||
void uhyve_gdb_handle_term(void);
|
||||
int uhyve_gdb_init(int vcpufd);
|
||||
|
||||
#ifdef __x86_64__
|
||||
#include "uhyve-gdb-x86_64.h"
|
||||
#else
|
||||
#include "uhyve-gdb-aarch64.h"
|
||||
#endif
|
||||
|
||||
#endif /* UHYVE_GDB_H */
|
|
@ -1,873 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2018, Simon Pickartz, RWTH Aachen University
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <infiniband/verbs.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
|
||||
#include "uhyve-migration.h"
|
||||
#include "uhyve.h"
|
||||
|
||||
|
||||
#ifdef __RDMA_MIGRATION__
|
||||
#define IB_USE_ODP (0)
|
||||
|
||||
#define IB_CQ_ENTRIES (1)
|
||||
#define IB_MAX_INLINE_DATA (0)
|
||||
#define IB_MAX_DEST_RD_ATOMIC (1)
|
||||
#define IB_MIN_RNR_TIMER (1)
|
||||
#define IB_MAX_SEND_WR (8192) // TODO: should be
|
||||
// com_hndl.dev_attr_ex.orig_attr.max_qp_wr
|
||||
// fix for mlx_5 adapter
|
||||
#define IB_MAX_RECV_WR (1)
|
||||
#define IB_MAX_SEND_SGE (1)
|
||||
#define IB_MAX_RECV_SGE (1)
|
||||
|
||||
typedef enum ib_wr_ids {
|
||||
IB_WR_NO_ID = 0,
|
||||
IB_WR_WRITE_LAST_PAGE_ID,
|
||||
IB_WR_RECV_LAST_PAGE_ID,
|
||||
IB_WR_BASE_ID
|
||||
} ib_wr_ids_t;
|
||||
|
||||
uint64_t cur_wr_id = IB_WR_BASE_ID;
|
||||
|
||||
typedef struct qp_info {
|
||||
uint32_t qpn;
|
||||
uint16_t lid;
|
||||
uint16_t psn;
|
||||
uint32_t *keys;
|
||||
uint64_t addr;
|
||||
} qp_info_t;
|
||||
|
||||
typedef struct com_hndl {
|
||||
struct ibv_context *ctx; /* device context */
|
||||
struct ibv_device_attr_ex dev_attr_ex; /* extended device attributes */
|
||||
struct ibv_port_attr port_attr; /* port attributes */
|
||||
struct ibv_pd *pd; /* protection domain */
|
||||
struct ibv_mr **mrs; /* memory regions */
|
||||
struct ibv_cq *cq; /* completion queue */
|
||||
struct ibv_qp *qp; /* queue pair */
|
||||
struct ibv_comp_channel *comp_chan; /* comp. event channel */
|
||||
qp_info_t loc_qp_info;
|
||||
qp_info_t rem_qp_info;
|
||||
uint8_t used_port; /* port of the IB device */
|
||||
uint8_t *buf; /* the guest memory (with potential gaps!) */
|
||||
size_t mr_cnt; /* number of memory regions */
|
||||
} com_hndl_t;
|
||||
|
||||
|
||||
static com_hndl_t com_hndl;
|
||||
static struct ibv_send_wr *send_list = NULL;
|
||||
static struct ibv_send_wr *send_list_last = NULL;
|
||||
static size_t send_list_length = 0;
|
||||
|
||||
/**
|
||||
* \brief Prints info of a send_wr
|
||||
*
|
||||
* \param id the ID of the send_wr
|
||||
*/
|
||||
static inline
|
||||
void print_send_wr_info(uint64_t id)
|
||||
{
|
||||
struct ibv_send_wr *search_wr = send_list;
|
||||
|
||||
/* find send_wr with id */
|
||||
while(search_wr) {
|
||||
if (search_wr->wr_id == id) {
|
||||
fprintf(stderr, "[INFO] WR_ID: %llu; LADDR: 0x%llx; RADDR: 0x%llx; SIZE: %llu\n",
|
||||
search_wr->wr_id,
|
||||
search_wr->sg_list->addr,
|
||||
search_wr->wr.rdma.remote_addr,
|
||||
search_wr->sg_list->length);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
search_wr = search_wr->next;
|
||||
}
|
||||
|
||||
if (search_wr == NULL) {
|
||||
fprintf(stderr, "[ERROR] Could not find send_wr with ID %llu\n", id);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Initializes the IB communication structures
|
||||
*
|
||||
* \param com_hndl the structure containing all communication relevant infos
|
||||
* \param buf the buffer that should be registrered with the QP
|
||||
*
|
||||
* This function sets up the IB communication channel. It registers the 'buf'
|
||||
* with a new protection domain. On its termination there is a QP in the INIT
|
||||
* state ready to be connected with the remote side.
|
||||
*/
|
||||
static void
|
||||
init_com_hndl(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
|
||||
{
|
||||
/* initialize com_hndl */
|
||||
memset(&com_hndl, 0, sizeof(com_hndl));
|
||||
|
||||
/* the guest physical memory is the communication buffer */
|
||||
com_hndl.buf = guest_mem;
|
||||
com_hndl.mr_cnt = mem_chunk_cnt;
|
||||
|
||||
struct ibv_device **device_list = NULL;
|
||||
int num_devices = 0;
|
||||
bool active_port_found = false;
|
||||
|
||||
/* determine first available device */
|
||||
if ((device_list = ibv_get_device_list(&num_devices)) == NULL) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not determine available IB devices "
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* find device with active port */
|
||||
size_t cur_dev = 0;
|
||||
for (cur_dev=0; cur_dev<num_devices; ++cur_dev){
|
||||
/* open the device context */
|
||||
if ((com_hndl.ctx = ibv_open_device(device_list[cur_dev])) == NULL) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not open the device context "
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* query extended device capabilities (e.g., to check for ODP support */
|
||||
struct ibv_query_device_ex_input device_ex_input;
|
||||
if (ibv_query_device_ex(com_hndl.ctx, &device_ex_input, &com_hndl.dev_attr_ex) < 0) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not query extended device attributes "
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* determine port count via normal device query (necessary for mlx_5) */
|
||||
if (ibv_query_device(com_hndl.ctx, &com_hndl.dev_attr_ex.orig_attr) < 0) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not query normal device attributes "
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
|
||||
/* check all ports */
|
||||
size_t num_ports = com_hndl.dev_attr_ex.orig_attr.phys_port_cnt;
|
||||
for (size_t cur_port=0; cur_port<=num_ports; ++cur_port) {
|
||||
/* query current port */
|
||||
if (ibv_query_port(com_hndl.ctx, cur_port, &com_hndl.port_attr) < 0){
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not query port %u "
|
||||
"- %d (%s). Abort!\n",
|
||||
cur_port,
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (com_hndl.port_attr.state == IBV_PORT_ACTIVE) {
|
||||
active_port_found = 1;
|
||||
com_hndl.used_port = cur_port;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* close this device if no active port was found */
|
||||
if (!active_port_found) {
|
||||
if (ibv_close_device(com_hndl.ctx) < 0) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not close the device context "
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!active_port_found) {
|
||||
fprintf(stderr, "[ERROR] No active port found. Abort!\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
fprintf(stderr, "[INFO] Using device '%s' and port %u\n",
|
||||
ibv_get_device_name(device_list[cur_dev]),
|
||||
com_hndl.used_port);
|
||||
/* allocate protection domain */
|
||||
if ((com_hndl.pd = ibv_alloc_pd(com_hndl.ctx)) == NULL) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not allocate protection domain "
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* register guest memory chunks with the protection domain */
|
||||
int i = 0;
|
||||
com_hndl.mrs = (struct ibv_mr**)malloc(sizeof(struct ibv_mr*)*com_hndl.mr_cnt);
|
||||
|
||||
int access_flags = (IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE);
|
||||
if ((IB_USE_ODP) &&
|
||||
(com_hndl.dev_attr_ex.odp_caps.general_caps & IBV_ODP_SUPPORT) &&
|
||||
(com_hndl.dev_attr_ex.odp_caps.per_transport_caps.rc_odp_caps & IBV_ODP_SUPPORT_WRITE)) {
|
||||
access_flags |= IBV_ACCESS_ON_DEMAND;
|
||||
}
|
||||
|
||||
for (i=0; i<com_hndl.mr_cnt; ++i) {
|
||||
if ((com_hndl.mrs[i] = ibv_reg_mr(com_hndl.pd,
|
||||
mem_chunks[i].ptr,
|
||||
mem_chunks[i].size,
|
||||
access_flags)) == NULL) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not register the memory region #%d (ptr: %llx; size: %llu) "
|
||||
"- %d (%s). Abort!\n",
|
||||
i,
|
||||
mem_chunks[i].ptr,
|
||||
mem_chunks[i].size,
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
fprintf(stderr, "[INFO] com_hndl.mrs[%d]->addr = 0x%llx; com_hndl->mrs[%d].length = %llu\n",
|
||||
i,
|
||||
com_hndl.mrs[i]->addr,
|
||||
i,
|
||||
com_hndl.mrs[i]->length);
|
||||
}
|
||||
|
||||
/* create completion event channel */
|
||||
if ((com_hndl.comp_chan =
|
||||
ibv_create_comp_channel(com_hndl.ctx)) == NULL) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not create the completion channel "
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* create the completion queue */
|
||||
if ((com_hndl.cq = ibv_create_cq(com_hndl.ctx,
|
||||
IB_CQ_ENTRIES,
|
||||
NULL,
|
||||
com_hndl.comp_chan,
|
||||
0)) == NULL) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not create the completion queue "
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* create send and recv queue pair and initialize it */
|
||||
struct ibv_qp_init_attr init_attr = {
|
||||
.send_cq = com_hndl.cq,
|
||||
.recv_cq = com_hndl.cq,
|
||||
.cap = {
|
||||
.max_send_wr = IB_MAX_SEND_WR,
|
||||
.max_recv_wr = IB_MAX_RECV_WR,
|
||||
.max_send_sge = IB_MAX_SEND_SGE,
|
||||
.max_recv_sge = IB_MAX_RECV_SGE,
|
||||
.max_inline_data = IB_MAX_INLINE_DATA
|
||||
},
|
||||
.qp_type = IBV_QPT_RC,
|
||||
.sq_sig_all = 0 /* we do not want a CQE for each WR */
|
||||
};
|
||||
if ((com_hndl.qp = ibv_create_qp(com_hndl.pd, &init_attr)) == NULL) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not create the queue pair "
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
struct ibv_qp_attr attr = {
|
||||
.qp_state = IBV_QPS_INIT,
|
||||
.pkey_index = 0,
|
||||
.port_num = com_hndl.used_port,
|
||||
.qp_access_flags = (IBV_ACCESS_REMOTE_WRITE)
|
||||
};
|
||||
if (ibv_modify_qp(com_hndl.qp,
|
||||
&attr,
|
||||
IBV_QP_STATE |
|
||||
IBV_QP_PKEY_INDEX |
|
||||
IBV_QP_PORT |
|
||||
IBV_QP_ACCESS_FLAGS) < 0) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not set QP into init state "
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* fill in local qp_info */
|
||||
com_hndl.loc_qp_info.qpn = com_hndl.qp->qp_num;
|
||||
com_hndl.loc_qp_info.psn = lrand48() & 0xffffff;
|
||||
com_hndl.loc_qp_info.addr = (uint64_t)com_hndl.buf;
|
||||
com_hndl.loc_qp_info.lid = com_hndl.port_attr.lid;
|
||||
|
||||
com_hndl.loc_qp_info.keys = (uint32_t*)malloc(sizeof(uint32_t)*com_hndl.mr_cnt);
|
||||
for (i=0; i<com_hndl.mr_cnt; ++i) {
|
||||
com_hndl.loc_qp_info.keys[i] = com_hndl.mrs[i]->rkey;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Frees IB related resources
|
||||
*
|
||||
* \param com_hndl the structure containing all communication relevant infos
|
||||
*/
|
||||
static void
|
||||
destroy_com_hndl(void)
|
||||
{
|
||||
if (ibv_destroy_qp(com_hndl.qp) < 0) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not destroy the queue pair "
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (ibv_destroy_cq(com_hndl.cq) < 0) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not deallocate the protection domain "
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (ibv_destroy_comp_channel(com_hndl.comp_chan) < 0) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not destroy the completion channel "
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
int i = 0;
|
||||
for (i=0; i<com_hndl.mr_cnt; ++i) {
|
||||
if (ibv_dereg_mr(com_hndl.mrs[i]) < 0) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not deregister MR #%d "
|
||||
"- %d (%s). Abort!\n",
|
||||
i,
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (ibv_dealloc_pd(com_hndl.pd) < 0) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not deallocate the protection domain "
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (ibv_close_device(com_hndl.ctx) < 0) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not close the device context "
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* free dynamic data structures */
|
||||
free(com_hndl.loc_qp_info.keys);
|
||||
free(com_hndl.rem_qp_info.keys);
|
||||
free(com_hndl.mrs);
|
||||
|
||||
com_hndl.loc_qp_info.keys = NULL;
|
||||
com_hndl.rem_qp_info.keys = NULL;
|
||||
com_hndl.mrs = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Connects the QP created within init_com_hndl
|
||||
*
|
||||
* \param com_hndl the structure containing all communication relevant infos
|
||||
*
|
||||
* This function performs the actual connection setup between the two QPs.
|
||||
*/
|
||||
static void
|
||||
con_com_buf(void) {
|
||||
/* transistion to ready-to-receive state */
|
||||
struct ibv_qp_attr qp_attr = {
|
||||
.qp_state = IBV_QPS_RTR,
|
||||
.path_mtu = IBV_MTU_2048,
|
||||
.dest_qp_num = com_hndl.rem_qp_info.qpn,
|
||||
.rq_psn = com_hndl.rem_qp_info.psn,
|
||||
.max_dest_rd_atomic = IB_MAX_DEST_RD_ATOMIC,
|
||||
.min_rnr_timer = IB_MIN_RNR_TIMER,
|
||||
.ah_attr = {
|
||||
.is_global = 0,
|
||||
.sl = 0,
|
||||
.src_path_bits = 0,
|
||||
.dlid = com_hndl.rem_qp_info.lid,
|
||||
.port_num = com_hndl.used_port,
|
||||
}
|
||||
};
|
||||
if (ibv_modify_qp(com_hndl.qp,
|
||||
&qp_attr,
|
||||
IBV_QP_STATE |
|
||||
IBV_QP_PATH_MTU |
|
||||
IBV_QP_DEST_QPN |
|
||||
IBV_QP_RQ_PSN |
|
||||
IBV_QP_MAX_DEST_RD_ATOMIC |
|
||||
IBV_QP_MIN_RNR_TIMER |
|
||||
IBV_QP_AV)) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not put QP into RTR state"
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(errno);
|
||||
}
|
||||
|
||||
/* transistion to ready-to-send state */
|
||||
qp_attr.qp_state = IBV_QPS_RTS;
|
||||
qp_attr.timeout = 14;
|
||||
qp_attr.retry_cnt = 7;
|
||||
qp_attr.rnr_retry = 7; /* infinite retrys on RNR NACK */
|
||||
qp_attr.sq_psn = com_hndl.loc_qp_info.psn;
|
||||
qp_attr.max_rd_atomic = 1;
|
||||
if (ibv_modify_qp(com_hndl.qp, &qp_attr,
|
||||
IBV_QP_STATE |
|
||||
IBV_QP_TIMEOUT |
|
||||
IBV_QP_RETRY_CNT |
|
||||
IBV_QP_RNR_RETRY |
|
||||
IBV_QP_SQ_PSN |
|
||||
IBV_QP_MAX_QP_RD_ATOMIC)) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not put QP into RTS state"
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(errno);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Set the destination node for a migration
|
||||
*
|
||||
* \param ip_str a string containing the IPv4 addr of the destination
|
||||
* \param port the migration port
|
||||
*/
|
||||
static void
|
||||
exchange_qp_info(bool server)
|
||||
{
|
||||
size_t keys_size = sizeof(uint32_t)*com_hndl.mr_cnt;
|
||||
|
||||
int res = 0;
|
||||
if (server) {
|
||||
/* general QP info */
|
||||
res = recv_data(&com_hndl.rem_qp_info, sizeof(qp_info_t));
|
||||
res = send_data(&com_hndl.loc_qp_info, sizeof(qp_info_t));
|
||||
|
||||
/* remote keys */
|
||||
com_hndl.rem_qp_info.keys = (uint32_t*)malloc(keys_size);
|
||||
res = recv_data(com_hndl.rem_qp_info.keys, keys_size);
|
||||
res = send_data(com_hndl.loc_qp_info.keys, keys_size);
|
||||
} else {
|
||||
/* general QP info */
|
||||
res = send_data(&com_hndl.loc_qp_info, sizeof(qp_info_t));
|
||||
res = recv_data(&com_hndl.rem_qp_info, sizeof(qp_info_t));
|
||||
|
||||
/* remote keys */
|
||||
com_hndl.rem_qp_info.keys = (uint32_t*)malloc(keys_size);
|
||||
res = send_data(com_hndl.loc_qp_info.keys, keys_size);
|
||||
res = recv_data(com_hndl.rem_qp_info.keys, keys_size);
|
||||
}
|
||||
|
||||
fprintf(stderr, "[INFO] loc_qp_info (QPN: %lu; LID: %lu; PSN: %lu; ADDR: 0x%x ",
|
||||
com_hndl.loc_qp_info.qpn,
|
||||
com_hndl.loc_qp_info.lid,
|
||||
com_hndl.loc_qp_info.psn,
|
||||
com_hndl.loc_qp_info.addr);
|
||||
int i = 0;
|
||||
for (i=0; i<com_hndl.mr_cnt; ++i) {
|
||||
fprintf(stderr, "KEY[%d]: %lu; ", i, com_hndl.loc_qp_info.keys[i]);
|
||||
}
|
||||
printf("\b\b)\n");
|
||||
|
||||
fprintf(stderr, "[INFO] rem_qp_info (QPN: %lu; LID: %lu; PSN: %lu; ADDR: 0x%x ",
|
||||
com_hndl.rem_qp_info.qpn,
|
||||
com_hndl.rem_qp_info.lid,
|
||||
com_hndl.rem_qp_info.psn,
|
||||
com_hndl.rem_qp_info.addr);
|
||||
for (i=0; i<com_hndl.mr_cnt; ++i) {
|
||||
fprintf(stderr, "KEY[%d]: %lu; ", i, com_hndl.rem_qp_info.keys[i]);
|
||||
}
|
||||
printf("\b\b)\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Prepares the an 'ibv_send_wr'
|
||||
*
|
||||
* This function prepares an 'ibv_send_wr' structure that is prepared for the
|
||||
* transmission of a single memory page using the IBV_WR_RDMA_WRITE verb.
|
||||
*/
|
||||
static inline struct ibv_send_wr *
|
||||
prepare_send_list_elem(void)
|
||||
{
|
||||
/* create work request */
|
||||
struct ibv_send_wr *send_wr = (struct ibv_send_wr*)calloc(1, sizeof(struct ibv_send_wr));
|
||||
struct ibv_sge *sge = (struct ibv_sge*)calloc(1, sizeof(struct ibv_sge));
|
||||
|
||||
/* basic work request configuration */
|
||||
send_wr->next = NULL;
|
||||
send_wr->sg_list = sge;
|
||||
send_wr->num_sge = 1;
|
||||
send_wr->wr_id = ++cur_wr_id;
|
||||
send_wr->opcode = IBV_WR_RDMA_WRITE;
|
||||
|
||||
return send_wr;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Creates an 'ibv_send_wr' and appends it to the send_list
|
||||
*
|
||||
* \param addr the page table entry of the memory page
|
||||
* \param addr_size the size of the page table entry
|
||||
* \param page the buffer to be send in this WR
|
||||
* \param page_size the size of the buffer
|
||||
*
|
||||
* This function creates an 'ibv_send_wr' structure and appends this to the
|
||||
* global send_list. It sets the source/destination information and sets the
|
||||
* IBV_SEND_SIGNALED flag as appropriate.
|
||||
*/
|
||||
static void
|
||||
create_send_list_entry (void *addr, size_t addr_size, void *page, size_t page_size)
|
||||
{
|
||||
/* create work request */
|
||||
struct ibv_send_wr *send_wr = prepare_send_list_elem();
|
||||
|
||||
/* configure source buffer */
|
||||
int i = 0;
|
||||
for (i=0; i<com_hndl.mr_cnt; ++i) {
|
||||
if (((uint64_t)page >= (uint64_t)com_hndl.mrs[i]->addr) &&
|
||||
((uint64_t)page < ((uint64_t)com_hndl.mrs[i]->addr + (uint64_t)com_hndl.mrs[i]->length))) {
|
||||
send_wr->sg_list->addr = (uintptr_t)page;
|
||||
send_wr->sg_list->length = page_size;
|
||||
send_wr->sg_list->lkey = com_hndl.mrs[i]->lkey;
|
||||
|
||||
send_wr->wr.rdma.rkey = com_hndl.rem_qp_info.keys[i];
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* did we find the correct memory region? */
|
||||
if (i == com_hndl.mr_cnt) {
|
||||
fprintf(stderr, "[ERROR] Could not find a valid MR for address 0x%llx!\n", page);
|
||||
return;
|
||||
}
|
||||
|
||||
/* configure destination buffer */
|
||||
if (addr) {
|
||||
send_wr->wr.rdma.remote_addr = com_hndl.rem_qp_info.addr + determine_dest_offset(*(size_t*)addr);
|
||||
} else {
|
||||
send_wr->wr.rdma.remote_addr = com_hndl.rem_qp_info.addr;
|
||||
}
|
||||
|
||||
/* apped work request to send list */
|
||||
if (send_list == NULL) {
|
||||
send_list = send_list_last = send_wr;
|
||||
} else {
|
||||
send_list_last->next = send_wr;
|
||||
send_list_last = send_list_last->next;
|
||||
}
|
||||
/* we have to request a CQE if max_send_wr is reached to avoid overflows */
|
||||
if ((++send_list_length%com_hndl.dev_attr_ex.orig_attr.max_qp_wr) == 0) {
|
||||
send_list_last->send_flags = IBV_SEND_SIGNALED;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Prepares a send_list containing all memory defined by com_hndl.mrs
|
||||
*
|
||||
* This function creates as many send_wr items as required to cover all
|
||||
* com_hndl.mrs in accordance with the maximum message size that can be
|
||||
* transmitted per send_sr (com_hndl.port_attr.max_msg_sz).
|
||||
*/
|
||||
static inline
|
||||
void enqueue_all_mrs(void)
|
||||
{
|
||||
uint64_t max_msg_sz = com_hndl.port_attr.max_msg_sz;
|
||||
int i = 0;
|
||||
|
||||
/* send all MRs */
|
||||
for (i=0; i<com_hndl.mr_cnt; ++i) {
|
||||
uint64_t cur_mr_length = com_hndl.mrs[i]->length;
|
||||
|
||||
/* split the MR if it exceed the max_msg_sz */
|
||||
size_t cur_chunk = 0, max_chunks = cur_mr_length/max_msg_sz;
|
||||
for (cur_chunk; cur_chunk < max_chunks; ++cur_chunk) {
|
||||
size_t cur_offset = cur_chunk*max_msg_sz;
|
||||
size_t cur_glob_offset = cur_offset + (uint64_t)com_hndl.mrs[i]->addr - (uint64_t)guest_mem;
|
||||
create_send_list_entry((void*)&cur_glob_offset, 0, (void*)((uint64_t)com_hndl.mrs[i]->addr+cur_offset), max_msg_sz);
|
||||
}
|
||||
|
||||
/* do we have a remainder? */
|
||||
uint64_t remainder = cur_mr_length%max_msg_sz;
|
||||
if (remainder) {
|
||||
size_t cur_offset = cur_mr_length-remainder;
|
||||
size_t cur_glob_offset = cur_offset + (uint64_t)com_hndl.mrs[i]->addr - (uint64_t)guest_mem;
|
||||
create_send_list_entry((void*)&cur_glob_offset, 0, (void*)((uint64_t)com_hndl.mrs[i]->addr+cur_offset), remainder);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Sends the guest memory to the destination
|
||||
*
|
||||
* \param mode MIG_MODE_COMPLETE_DUMP sends the complete memory and
|
||||
* MIG_MODE_INCREMENTAL_DUMP only the mapped guest pages
|
||||
*/
|
||||
void send_guest_mem(mig_mode_t mode, bool final_dump, size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
|
||||
{
|
||||
int res = 0, i = 0;
|
||||
static bool ib_initialized = false;
|
||||
|
||||
/* prepare IB channel */
|
||||
if (!ib_initialized) {
|
||||
init_com_hndl(mem_chunk_cnt, mem_chunks);
|
||||
exchange_qp_info(false);
|
||||
con_com_buf();
|
||||
|
||||
ib_initialized = true;
|
||||
}
|
||||
|
||||
/* determine migration mode */
|
||||
switch (mode) {
|
||||
case MIG_MODE_COMPLETE_DUMP:
|
||||
enqueue_all_mrs();
|
||||
break;
|
||||
case MIG_MODE_INCREMENTAL_DUMP:
|
||||
/* iterate guest page tables */
|
||||
determine_dirty_pages(create_send_list_entry);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "[ERROR] Unknown migration mode. Abort!\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* create a dumy WR request if there is nothing to send */
|
||||
if (send_list_length == 0)
|
||||
create_send_list_entry(NULL, 0, NULL, 0);
|
||||
|
||||
/* we have to wait for the last WR before informing dest */
|
||||
if ((mode == MIG_MODE_COMPLETE_DUMP) || final_dump) {
|
||||
send_list_last->wr_id = IB_WR_WRITE_LAST_PAGE_ID;
|
||||
send_list_last->opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
|
||||
send_list_last->send_flags = IBV_SEND_SIGNALED | IBV_SEND_SOLICITED;
|
||||
send_list_last->imm_data = htonl(0x1);
|
||||
} else {
|
||||
send_list_last->wr_id = IB_WR_WRITE_LAST_PAGE_ID;
|
||||
send_list_last->send_flags = IBV_SEND_SIGNALED;
|
||||
}
|
||||
|
||||
printf("DEBUG: Send list length %d\n", send_list_length);
|
||||
|
||||
/* we have to call ibv_post_send() as long as 'send_list' contains elements */
|
||||
struct ibv_wc wc;
|
||||
struct ibv_send_wr *remaining_send_wr = NULL;
|
||||
do {
|
||||
/* send data */
|
||||
remaining_send_wr = NULL;
|
||||
if (ibv_post_send(com_hndl.qp, send_list, &remaining_send_wr) && (errno != ENOMEM)) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not post send"
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* wait for send WRs if CQ is full */
|
||||
do {
|
||||
if ((res = ibv_poll_cq(com_hndl.cq, 1, &wc)) < 0) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could not poll on CQ"
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
} while (res < 1);
|
||||
if (wc.status != IBV_WC_SUCCESS) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] WR failed status %s (%d) for wr_id %llu\n",
|
||||
ibv_wc_status_str(wc.status),
|
||||
wc.status,
|
||||
wc.wr_id);
|
||||
|
||||
print_send_wr_info(wc.wr_id);
|
||||
}
|
||||
send_list = remaining_send_wr;
|
||||
} while (remaining_send_wr);
|
||||
|
||||
|
||||
/* ensure that we receive the CQE for the last page */
|
||||
if (wc.wr_id != IB_WR_WRITE_LAST_PAGE_ID) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] WR failed status %s (%d) for wr_id %d\n",
|
||||
ibv_wc_status_str(wc.status),
|
||||
wc.status,
|
||||
(int)wc.wr_id);
|
||||
}
|
||||
|
||||
/* cleanup send_list */
|
||||
struct ibv_send_wr *cur_send_wr = send_list;
|
||||
struct ibv_send_wr *tmp_send_wr = NULL;
|
||||
while (cur_send_wr != NULL) {
|
||||
free(cur_send_wr->sg_list);
|
||||
tmp_send_wr = cur_send_wr;
|
||||
cur_send_wr = cur_send_wr->next;
|
||||
free(tmp_send_wr);
|
||||
}
|
||||
send_list_length = 0;
|
||||
|
||||
/* do not close the channel in a pre-dump */
|
||||
if (!final_dump)
|
||||
return;
|
||||
|
||||
/* free IB-related resources */
|
||||
destroy_com_hndl();
|
||||
ib_initialized = false;
|
||||
|
||||
fprintf(stderr, "Guest memory sent!\n");
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* \brief Receives the guest memory from the source
|
||||
*
|
||||
* The receive participates in the IB connection setup and waits for the
|
||||
* 'solicited' event sent with the last WR issued by the sender.
|
||||
*/
|
||||
void recv_guest_mem(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
|
||||
{
|
||||
int res = 0;
|
||||
|
||||
/* prepare IB channel */
|
||||
init_com_hndl(mem_chunk_cnt, mem_chunks);
|
||||
exchange_qp_info(true);
|
||||
con_com_buf();
|
||||
|
||||
/* request notification on the event channel */
|
||||
if (ibv_req_notify_cq(com_hndl.cq, 1) < 0) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could request notify for completion queue "
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* post recv matching IBV_RDMA_WRITE_WITH_IMM */
|
||||
struct ibv_cq *ev_cq;
|
||||
void *ev_ctx;
|
||||
struct ibv_sge sg;
|
||||
struct ibv_recv_wr recv_wr;
|
||||
struct ibv_recv_wr *bad_wr;
|
||||
uint32_t recv_buf = 0;
|
||||
|
||||
memset(&sg, 0, sizeof(sg));
|
||||
sg.addr = (uintptr_t)&recv_buf;
|
||||
sg.length = sizeof(recv_buf);
|
||||
sg.lkey = com_hndl.mrs[0]->lkey;
|
||||
|
||||
memset(&recv_wr, 0, sizeof(recv_wr));
|
||||
recv_wr.wr_id = 0;
|
||||
recv_wr.sg_list = &sg;
|
||||
recv_wr.num_sge = 1;
|
||||
|
||||
if (ibv_post_recv(com_hndl.qp, &recv_wr, &bad_wr) < 0) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could post recv - %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* wait for requested event */
|
||||
if (ibv_get_cq_event(com_hndl.comp_chan, &ev_cq, &ev_ctx) < 0) {
|
||||
fprintf(stderr,
|
||||
"[ERROR] Could get event from completion channel "
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* acknowledge the event */
|
||||
ibv_ack_cq_events(com_hndl.cq, 1);
|
||||
|
||||
/* free IB-related resources */
|
||||
destroy_com_hndl();
|
||||
|
||||
fprintf(stderr, "Guest memory received!\n");
|
||||
}
|
||||
#endif /* __RDMA_MIGRATION__ */
|
|
@ -1,277 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2018, Simon Pickartz, RWTH Aachen University
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "uhyve-migration.h"
|
||||
#include "uhyve.h"
|
||||
|
||||
static struct sockaddr_in mig_server;
|
||||
static int com_sock = 0;
|
||||
static int listen_sock = 0;
|
||||
|
||||
static mig_type_t mig_type = MIG_TYPE_COLD;
|
||||
|
||||
/**
|
||||
* \brief Returns the configured migration type
|
||||
*/
|
||||
mig_type_t
|
||||
get_migration_type(void)
|
||||
{
|
||||
return mig_type;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Sets the migration type
|
||||
*
|
||||
* \param mig_type_str A string defining the migration type
|
||||
*/
|
||||
void
|
||||
set_migration_type(const char *mig_type_str)
|
||||
{
|
||||
if (mig_type_str == NULL)
|
||||
return;
|
||||
|
||||
int i;
|
||||
bool found_type = false;
|
||||
for (i=0; i<sizeof(mig_type_conv)/sizeof(mig_type_conv[0]); ++i) {
|
||||
if (!strcmp (mig_type_str, mig_type_conv[i].str)) {
|
||||
mig_type = mig_type_conv[i].mig_type;
|
||||
found_type = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* we do not know this migration type */
|
||||
if (!found_type) {
|
||||
fprintf(stderr, "ERROR: Migration type '%s' not supported. Fallback to 'cold'\n", mig_type_str);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Closes a socket
|
||||
*
|
||||
* \param sock the socket to be closed
|
||||
*/
|
||||
static inline void
|
||||
close_sock(int sock)
|
||||
{
|
||||
if (close(sock) < 0) {
|
||||
fprintf(stderr,
|
||||
"ERROR: Could not close the communication socket "
|
||||
"- %d (%s). Abort!\n",
|
||||
errno,
|
||||
strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Set the destination node for a migration
|
||||
*
|
||||
* \param ip_str a string containing the IPv4 addr of the destination
|
||||
* \param port the migration port
|
||||
*/
|
||||
void set_migration_target(const char *ip_str, int port)
|
||||
{
|
||||
/* determine server address */
|
||||
memset(&mig_server, '0', sizeof(mig_server));
|
||||
mig_server.sin_family = AF_INET;
|
||||
mig_server.sin_port = htons(port);
|
||||
|
||||
int res = inet_pton(AF_INET, ip_str, &mig_server.sin_addr);
|
||||
if (res == 0) {
|
||||
fprintf(stderr, "'%s' is not a valid server address\n", ip_str);
|
||||
} else if (res < 0) {
|
||||
fprintf(stderr, "An error occured while retrieving the migration server address\n");
|
||||
perror("inet_pton");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Connects to a migration target via TCP/IP
|
||||
*/
|
||||
void connect_to_server(void)
|
||||
{
|
||||
int res = 0;
|
||||
char buf[INET_ADDRSTRLEN];
|
||||
if (inet_ntop(AF_INET, (const void*)&mig_server.sin_addr, buf, INET_ADDRSTRLEN) == NULL) {
|
||||
perror("inet_ntop");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if((com_sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
|
||||
perror("socket");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
fprintf(stderr, "Trying to connect to migration server: %s\n", buf);
|
||||
if (connect(com_sock, (struct sockaddr *)&mig_server, sizeof(mig_server)) < 0) {
|
||||
perror("connect");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
fprintf(stderr, "Successfully connected to: %s\n", buf);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Waits for a migration source to connect via TCP/IP
|
||||
*
|
||||
* \param listen_portno the port of the migration socket
|
||||
*/
|
||||
void wait_for_client(uint16_t listen_portno)
|
||||
{
|
||||
int client_addr_len = 0, res = 0;
|
||||
struct sockaddr_in serv_addr;
|
||||
struct sockaddr_in client_addr;
|
||||
|
||||
/* open migration socket */
|
||||
fprintf(stderr, "Waiting for incomming migration request ...\n");
|
||||
listen_sock = socket(AF_INET, SOCK_STREAM, 0);
|
||||
memset(&serv_addr, '0', sizeof(serv_addr));
|
||||
|
||||
serv_addr.sin_family = AF_INET;
|
||||
serv_addr.sin_addr.s_addr = htonl(INADDR_ANY);
|
||||
serv_addr.sin_port = htons(listen_portno);
|
||||
|
||||
bind(listen_sock, (struct sockaddr*)&serv_addr, sizeof(serv_addr));
|
||||
|
||||
listen(listen_sock, 10);
|
||||
|
||||
client_addr_len = sizeof(struct sockaddr_in);
|
||||
if ((com_sock = accept(listen_sock, &client_addr, &client_addr_len)) < 0) {
|
||||
perror("accept");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
char buf[INET_ADDRSTRLEN];
|
||||
if (inet_ntop(AF_INET, (const void*)&client_addr.sin_addr, buf, INET_ADDRSTRLEN) == NULL) {
|
||||
perror("inet_ntop");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
fprintf(stderr, "Incomming migration from: %s\n", buf);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Receives data from the migration socket
|
||||
*
|
||||
* \param buffer the destination buffer
|
||||
* \param length the buffer size
|
||||
*/
|
||||
int recv_data(void *buffer, size_t length)
|
||||
{
|
||||
size_t bytes_received = 0;
|
||||
while(bytes_received < length) {
|
||||
bytes_received += recv(
|
||||
com_sock,
|
||||
(void*)((uint64_t)buffer+bytes_received),
|
||||
length-bytes_received,
|
||||
0);
|
||||
}
|
||||
|
||||
return bytes_received;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Sends data via the migration socket
|
||||
*
|
||||
* \param buffer the source buffer
|
||||
* \param length the buffer size
|
||||
*/
|
||||
int send_data(void *buffer, size_t length)
|
||||
{
|
||||
size_t bytes_sent = 0;
|
||||
while(bytes_sent < length) {
|
||||
bytes_sent += send(
|
||||
com_sock,
|
||||
(void*)((uint64_t)buffer+bytes_sent),
|
||||
length-bytes_sent,
|
||||
0);
|
||||
}
|
||||
|
||||
return bytes_sent;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Closes the TCP connection
|
||||
*/
|
||||
void close_migration_channel(void)
|
||||
{
|
||||
if (listen_sock) {
|
||||
close_sock(listen_sock);
|
||||
}
|
||||
close_sock(com_sock);
|
||||
}
|
||||
|
||||
|
||||
#ifndef __RDMA_MIGRATION__
|
||||
void send_guest_mem(mig_mode_t mode, bool final_dump, size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
|
||||
{
|
||||
/* determine migration mode */
|
||||
switch (mode) {
|
||||
case MIG_MODE_INCREMENTAL_DUMP:
|
||||
fprintf(stderr, "ERROR: Incremental dumps currently not supported via TCP/IP. Fallback to complete dump!\n");
|
||||
case MIG_MODE_COMPLETE_DUMP:
|
||||
send_data(guest_mem, guest_size);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "ERROR: Unknown migration mode. Abort!\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
fprintf(stderr, "Guest memory sent!\n");
|
||||
}
|
||||
|
||||
void recv_guest_mem(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
|
||||
{
|
||||
recv_data(guest_mem, guest_size);
|
||||
fprintf(stderr, "Guest memory received!\n");
|
||||
}
|
||||
#endif /* __RDMA_MIGRATION__ */
|
||||
|
||||
#else
|
||||
|
||||
/* dummy implementation for aarch64 */
|
||||
|
||||
void set_migration_target(const char *ip_str, int port)
|
||||
{
|
||||
}
|
||||
|
||||
void set_migration_type(const char *mig_type_str)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,91 +0,0 @@
|
|||
#ifndef __UHYVE_MIGRATION_H__
|
||||
/*
|
||||
* Copyright (c) 2018, Simon Pickartz, RWTH Aachen University
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Simon Pickartz
|
||||
* @file tools/uhyve-migration.h
|
||||
* @brief Migration-related functions
|
||||
*/
|
||||
|
||||
#define __UHYVE_MIGRATION_H__
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
extern size_t guest_size;
|
||||
extern uint8_t* guest_mem;
|
||||
|
||||
#define MIGRATION_PORT 1337
|
||||
|
||||
typedef enum {
|
||||
MIG_MODE_COMPLETE_DUMP = 1,
|
||||
MIG_MODE_INCREMENTAL_DUMP,
|
||||
} mig_mode_t;
|
||||
|
||||
typedef enum {
|
||||
MIG_TYPE_COLD = 0,
|
||||
MIG_TYPE_LIVE,
|
||||
} mig_type_t;
|
||||
|
||||
const static struct {
|
||||
mig_type_t mig_type;
|
||||
const char *str;
|
||||
} mig_type_conv [] = {
|
||||
{MIG_TYPE_COLD, "cold"},
|
||||
{MIG_TYPE_LIVE, "live"},
|
||||
};
|
||||
|
||||
typedef struct _mem_chunk {
|
||||
size_t size;
|
||||
uint8_t *ptr;
|
||||
} mem_chunk_t;
|
||||
|
||||
typedef struct _migration_metadata {
|
||||
uint32_t ncores;
|
||||
size_t guest_size;
|
||||
uint32_t no_checkpoint;
|
||||
uint64_t elf_entry;
|
||||
bool full_checkpoint;
|
||||
} migration_metadata_t;
|
||||
|
||||
void set_migration_type(const char *mig_type_str);
|
||||
mig_type_t get_migration_type(void);
|
||||
|
||||
void wait_for_client(uint16_t listen_portno);
|
||||
void set_migration_target(const char *ip_str, int port);
|
||||
void connect_to_server(void);
|
||||
void close_migration_channel(void);
|
||||
|
||||
int recv_data(void *buffer, size_t length);
|
||||
int send_data(void *buffer, size_t length);
|
||||
|
||||
void send_guest_mem(mig_mode_t mode, bool final_dump, size_t mem_chunk_cnt, mem_chunk_t *mem_chunks);
|
||||
void recv_guest_mem(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks);
|
||||
#endif /* __UHYVE_MIGRATION_H__ */
|
||||
|
||||
|
||||
|
|
@ -1,189 +0,0 @@
|
|||
/* Copyright (c) 2015, IBM
|
||||
* Author(s): Dan Williams <djwillia@us.ibm.com>
|
||||
* Ricardo Koller <kollerr@us.ibm.com>
|
||||
* Copyright (c) 2017, RWTH Aachen University
|
||||
* Author(s): Tim van de Kamp <tim.van.de.kamp@rwth-aachen.de>
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software
|
||||
* for any purpose with or without fee is hereby granted, provided
|
||||
* that the above copyright notice and this permission notice appear
|
||||
* in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
||||
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
||||
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
/* We used several existing projects as guides
|
||||
* kvmtest.c: http://lwn.net/Articles/658512/
|
||||
* lkvm: http://github.com/clearlinux/kvmtool
|
||||
*/
|
||||
|
||||
/*
|
||||
* 15.1.2017: extend original version (https://github.com/Solo5/solo5)
|
||||
* for HermitCore
|
||||
*/
|
||||
|
||||
#include "uhyve-net.h"
|
||||
#include <ctype.h>
|
||||
|
||||
/* TODO: create an array or equal for more then one netif */
|
||||
static uhyve_netinfo_t netinfo;
|
||||
|
||||
//-------------------------------------- ATTACH LINUX TAP -----------------------------------------//
|
||||
int attach_linux_tap(const char *dev)
|
||||
{
|
||||
struct ifreq ifr;
|
||||
int fd, err;
|
||||
|
||||
// @<number> indicates a pre-existing open fd onto the correct device.
|
||||
if (dev[0] == '@') {
|
||||
fd = atoi(&dev[1]);
|
||||
|
||||
if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1)
|
||||
return -1;
|
||||
return fd;
|
||||
}
|
||||
|
||||
fd = open("/dev/net/tun", O_RDWR | O_NONBLOCK);
|
||||
|
||||
// Initialize interface request for TAP interface
|
||||
memset(&ifr, 0x00, sizeof(ifr));
|
||||
|
||||
ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
|
||||
if (strlen(dev) > IFNAMSIZ) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
strncpy(ifr.ifr_name, dev, IFNAMSIZ);
|
||||
|
||||
// Try to create OR attach to an existing device. The Linux API has no way
|
||||
// to differentiate between the two
|
||||
|
||||
// create before a tap device with these commands:
|
||||
//
|
||||
// sudo ip tuntap add <devname> mode tap user <user>
|
||||
// sudo ip addr add 10.0.5.1/24 broadcast 10.0.5.255
|
||||
// sudo ip link set dev <devname> up
|
||||
//
|
||||
|
||||
if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
|
||||
err = errno;
|
||||
close(fd);
|
||||
errno = err;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// If we got back a different device than the one requested, e.g. because
|
||||
// the caller mistakenly passed in '%d' (yes, that's really in the Linux API)
|
||||
// then fail
|
||||
|
||||
if (strncmp(ifr.ifr_name, dev, IFNAMSIZ) != 0) {
|
||||
close(fd);
|
||||
errno = ENODEV;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Attempt a zero-sized write to the device. If the device was freshly created
|
||||
// (as opposed to attached to an existing ine) this will fail with EIO. Ignore
|
||||
// any other error return since that may indicate the device is up
|
||||
//
|
||||
// If this check produces a false positive then caller's later writes to fd will
|
||||
// fali with EIO, which is not great but at least we tried
|
||||
|
||||
char buf[1] = { 0 };
|
||||
if (write(fd, buf, 0) == -1 && errno == EIO) {
|
||||
close(fd);
|
||||
errno = ENODEV;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
//---------------------------------- GET MAC ----------------------------------------------//
|
||||
char* uhyve_get_mac(void)
|
||||
{
|
||||
return netinfo.mac_str;
|
||||
}
|
||||
|
||||
//---------------------------------- SET MAC ----------------------------------------------//
|
||||
|
||||
int uhyve_set_mac(void)
|
||||
{
|
||||
int mac_is_set = 0;
|
||||
uint8_t guest_mac[6];
|
||||
|
||||
char* str = getenv("HERMIT_NETIF_MAC");
|
||||
if (str)
|
||||
{
|
||||
const char *macptr = str;
|
||||
const char *v_macptr = macptr;
|
||||
// checking str is a valid MAC address
|
||||
int i = 0;
|
||||
int s = 0;
|
||||
while(*v_macptr) {
|
||||
if(isxdigit(*v_macptr)) {
|
||||
i++;
|
||||
} else if (*v_macptr == ':') {
|
||||
if (i / 2 - 1 != s++)
|
||||
break;
|
||||
} else {
|
||||
s = -1;
|
||||
}
|
||||
v_macptr++;
|
||||
}
|
||||
if (i != 12 || s != 5) {
|
||||
warnx("Malformed mac address: %s\n", macptr);
|
||||
} else {
|
||||
snprintf(netinfo.mac_str, sizeof(netinfo.mac_str), "%s", macptr);
|
||||
mac_is_set = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!mac_is_set) {
|
||||
int rfd = open("/dev/urandom", O_RDONLY);
|
||||
if(rfd == -1)
|
||||
err(1, "Could not open /dev/urandom\n");
|
||||
int ret;
|
||||
ret = read(rfd, guest_mac, sizeof(guest_mac));
|
||||
// compare the number of bytes read with the size of guest_mac
|
||||
assert(ret == sizeof(guest_mac));
|
||||
close(rfd);
|
||||
|
||||
guest_mac[0] &= 0xfe; // creats a random MAC-address in the locally administered
|
||||
guest_mac[0] |= 0x02; // address range which can be used without conflict with other public devices
|
||||
// save the MAC address in the netinfo
|
||||
snprintf(netinfo.mac_str, sizeof(netinfo.mac_str),
|
||||
"%02x:%02x:%02x:%02x:%02x:%02x",
|
||||
guest_mac[0], guest_mac[1], guest_mac[2],
|
||||
guest_mac[3], guest_mac[4], guest_mac[5]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
//-------------------------------------- SETUP NETWORK ---------------------------------------------//
|
||||
int uhyve_net_init(const char *netif)
|
||||
{
|
||||
if (netif == NULL) {
|
||||
err(1, "ERROR: no netif defined\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// attaching netif
|
||||
netfd = attach_linux_tap(netif);
|
||||
if (netfd < 0) {
|
||||
err(1, "Could not attach interface: %s\n", netif);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
uhyve_set_mac();
|
||||
|
||||
return netfd;
|
||||
}
|
|
@ -1,60 +0,0 @@
|
|||
#ifndef __UHYVE_NET_H__
|
||||
#define __UHYVE_NET_H__
|
||||
|
||||
#include <linux/kvm.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <sys/select.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
/* network interface */
|
||||
#include <sys/socket.h>
|
||||
#include <linux/if.h>
|
||||
#include <linux/if_tun.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <err.h>
|
||||
|
||||
extern int netfd;
|
||||
|
||||
// UHYVE_PORT_NETINFO
|
||||
typedef struct {
|
||||
/* OUT */
|
||||
char mac_str[18];
|
||||
} __attribute__((packed)) uhyve_netinfo_t;
|
||||
|
||||
// UHYVE_PORT_NETWRITE
|
||||
typedef struct {
|
||||
/* IN */
|
||||
const void* data;
|
||||
size_t len;
|
||||
/* OUT */
|
||||
int ret;
|
||||
} __attribute__((packed)) uhyve_netwrite_t;
|
||||
|
||||
// UHYVE_PORT_NETREAD
|
||||
typedef struct {
|
||||
/* IN */
|
||||
void* data;
|
||||
/* IN / OUT */
|
||||
size_t len;
|
||||
/* OUT */
|
||||
int ret;
|
||||
} __attribute__((packed)) uhyve_netread_t;
|
||||
|
||||
// UHYVE_PORT_NETSTAT
|
||||
typedef struct {
|
||||
/* IN */
|
||||
int status;
|
||||
} __attribute__((packed)) uhyve_netstat_t;
|
||||
|
||||
int uhyve_net_init(const char *hermit_netif);
|
||||
char* uhyve_get_mac(void);
|
||||
|
||||
#endif
|
|
@ -1,55 +0,0 @@
|
|||
/* Copyright (c) 2017, RWTH Aachen University
|
||||
* Author(s): Daniel Krebs <github@daniel-krebs.net>
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software
|
||||
* for any purpose with or without fee is hereby granted, provided
|
||||
* that the above copyright notice and this permission notice appear
|
||||
* in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
||||
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
||||
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
#ifndef UHYVE_SYSCALLS_H
|
||||
#define UHYVE_SYSCALLS_H
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stddef.h>
|
||||
|
||||
typedef struct {
|
||||
int fd;
|
||||
const char* buf;
|
||||
size_t len;
|
||||
} __attribute__((packed)) uhyve_write_t;
|
||||
|
||||
typedef struct {
|
||||
const char* name;
|
||||
int flags;
|
||||
int mode;
|
||||
int ret;
|
||||
} __attribute__((packed)) uhyve_open_t;
|
||||
|
||||
typedef struct {
|
||||
int fd;
|
||||
int ret;
|
||||
} __attribute__((packed)) uhyve_close_t;
|
||||
|
||||
typedef struct {
|
||||
int fd;
|
||||
char* buf;
|
||||
size_t len;
|
||||
ssize_t ret;
|
||||
} __attribute__((packed)) uhyve_read_t;
|
||||
|
||||
typedef struct {
|
||||
int fd;
|
||||
off_t offset;
|
||||
int whence;
|
||||
} __attribute__((packed)) uhyve_lseek_t;
|
||||
|
||||
#endif // UHYVE_SYSCALLS_H
|
1255
tools/uhyve-x86_64.c
1255
tools/uhyve-x86_64.c
File diff suppressed because it is too large
Load diff
|
@ -1,98 +0,0 @@
|
|||
#ifndef __UHYVE_CPU_H__
|
||||
#define __UHYVE_CPU_H__
|
||||
|
||||
#ifndef _BITUL
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
#define _AC(X,Y) X
|
||||
#define _AT(T,X) X
|
||||
#else
|
||||
#define __AC(X,Y) (X##Y)
|
||||
#define _AC(X,Y) __AC(X,Y)
|
||||
#define _AT(T,X) ((T)(X))
|
||||
#endif
|
||||
|
||||
#define _BITUL(x) (_AC(1,UL) << (x))
|
||||
#define _BITULL(x) (_AC(1,ULL) << (x))
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* EFLAGS bits
|
||||
*/
|
||||
#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
|
||||
|
||||
/*
|
||||
* Basic CPU control in CR0
|
||||
*/
|
||||
#define X86_CR0_PE_BIT 0 /* Protection Enable */
|
||||
#define X86_CR0_PE _BITUL(X86_CR0_PE_BIT)
|
||||
#define X86_CR0_PG_BIT 31 /* Paging */
|
||||
#define X86_CR0_PG _BITUL(X86_CR0_PG_BIT)
|
||||
|
||||
/*
|
||||
* Intel CPU features in CR4
|
||||
*/
|
||||
#define X86_CR4_PAE_BIT 5 /* enable physical address extensions */
|
||||
#define X86_CR4_PAE _BITUL(X86_CR4_PAE_BIT)
|
||||
|
||||
/*
|
||||
* Intel long mode page directory/table entries
|
||||
*/
|
||||
#define X86_PDPT_P_BIT 0 /* Present */
|
||||
#define X86_PDPT_P _BITUL(X86_PDPT_P_BIT)
|
||||
#define X86_PDPT_RW_BIT 1 /* Writable */
|
||||
#define X86_PDPT_RW _BITUL(X86_PDPT_RW_BIT)
|
||||
#define X86_PDPT_PS_BIT 7 /* Page size */
|
||||
#define X86_PDPT_PS _BITUL(X86_PDPT_PS_BIT)
|
||||
|
||||
/*
|
||||
* GDT and KVM segment manipulation
|
||||
*/
|
||||
|
||||
#define GDT_DESC_OFFSET(n) ((n) * 0x8)
|
||||
|
||||
#define GDT_GET_BASE(x) ( \
|
||||
(((x) & 0xFF00000000000000) >> 32) | \
|
||||
(((x) & 0x000000FF00000000) >> 16) | \
|
||||
(((x) & 0x00000000FFFF0000) >> 16))
|
||||
|
||||
#define GDT_GET_LIMIT(x) (__u32)( \
|
||||
(((x) & 0x000F000000000000) >> 32) | \
|
||||
(((x) & 0x000000000000FFFF)))
|
||||
|
||||
/* Constructor for a conventional segment GDT (or LDT) entry */
|
||||
/* This is a macro so it can be used in initializers */
|
||||
#define GDT_ENTRY(flags, base, limit) \
|
||||
((((base) & _AC(0xff000000, ULL)) << (56-24)) | \
|
||||
(((flags) & _AC(0x0000f0ff, ULL)) << 40) | \
|
||||
(((limit) & _AC(0x000f0000, ULL)) << (48-16)) | \
|
||||
(((base) & _AC(0x00ffffff, ULL)) << 16) | \
|
||||
(((limit) & _AC(0x0000ffff, ULL))))
|
||||
|
||||
#define GDT_GET_G(x) (__u8)(((x) & 0x0080000000000000) >> 55)
|
||||
#define GDT_GET_DB(x) (__u8)(((x) & 0x0040000000000000) >> 54)
|
||||
#define GDT_GET_L(x) (__u8)(((x) & 0x0020000000000000) >> 53)
|
||||
#define GDT_GET_AVL(x) (__u8)(((x) & 0x0010000000000000) >> 52)
|
||||
#define GDT_GET_P(x) (__u8)(((x) & 0x0000800000000000) >> 47)
|
||||
#define GDT_GET_DPL(x) (__u8)(((x) & 0x0000600000000000) >> 45)
|
||||
#define GDT_GET_S(x) (__u8)(((x) & 0x0000100000000000) >> 44)
|
||||
#define GDT_GET_TYPE(x)(__u8)(((x) & 0x00000F0000000000) >> 40)
|
||||
|
||||
#define GDT_TO_KVM_SEGMENT(seg, gdt_table, sel) \
|
||||
do { \
|
||||
__u64 gdt_ent = gdt_table[sel]; \
|
||||
seg.base = GDT_GET_BASE(gdt_ent); \
|
||||
seg.limit = GDT_GET_LIMIT(gdt_ent); \
|
||||
seg.selector = sel * 8; \
|
||||
seg.type = GDT_GET_TYPE(gdt_ent); \
|
||||
seg.present = GDT_GET_P(gdt_ent); \
|
||||
seg.dpl = GDT_GET_DPL(gdt_ent); \
|
||||
seg.db = GDT_GET_DB(gdt_ent); \
|
||||
seg.s = GDT_GET_S(gdt_ent); \
|
||||
seg.l = GDT_GET_L(gdt_ent); \
|
||||
seg.g = GDT_GET_G(gdt_ent); \
|
||||
seg.avl = GDT_GET_AVL(gdt_ent); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
813
tools/uhyve.c
813
tools/uhyve.c
|
@ -1,813 +0,0 @@
|
|||
/* Copyright (c) 2015, IBM
|
||||
* Author(s): Dan Williams <djwillia@us.ibm.com>
|
||||
* Ricardo Koller <kollerr@us.ibm.com>
|
||||
* Copyright (c) 2017, RWTH Aachen University
|
||||
* Author(s): Stefan Lankes <slankes@eonerc.rwth-aachen.de>
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software
|
||||
* for any purpose with or without fee is hereby granted, provided
|
||||
* that the above copyright notice and this permission notice appear
|
||||
* in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
||||
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
||||
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
/* We used several existing projects as guides
|
||||
* kvmtest.c: http://lwn.net/Articles/658512/
|
||||
* Solo5: https://github.com/Solo5/solo5
|
||||
*/
|
||||
|
||||
/*
|
||||
* 15.1.2017: extend original version (https://github.com/Solo5/solo5)
|
||||
* for HermitCore
|
||||
* 25.2.2017: add SMP support to enable more than one core
|
||||
* 24.4.2017: add checkpoint/restore support,
|
||||
* remove memory limit
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <sched.h>
|
||||
#include <signal.h>
|
||||
#include <limits.h>
|
||||
#include <pthread.h>
|
||||
#include <semaphore.h>
|
||||
#include <elf.h>
|
||||
#include <err.h>
|
||||
#include <poll.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/eventfd.h>
|
||||
#include <linux/const.h>
|
||||
#include <linux/kvm.h>
|
||||
|
||||
#include "uhyve.h"
|
||||
#include "uhyve-syscalls.h"
|
||||
#include "uhyve-migration.h"
|
||||
#include "uhyve-net.h"
|
||||
#include "uhyve-gdb.h"
|
||||
#include "proxy.h"
|
||||
|
||||
static bool restart = false;
|
||||
static bool migration = false;
|
||||
static pthread_t net_thread;
|
||||
static int* vcpu_fds = NULL;
|
||||
static pthread_mutex_t kvm_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
extern bool verbose;
|
||||
|
||||
static char* guest_path = NULL;
|
||||
static bool uhyve_gdb_enabled = false;
|
||||
size_t guest_size = 0x20000000ULL;
|
||||
bool full_checkpoint = false;
|
||||
pthread_barrier_t barrier;
|
||||
pthread_barrier_t migration_barrier;
|
||||
pthread_t* vcpu_threads = NULL;
|
||||
uint8_t* klog = NULL;
|
||||
uint8_t* guest_mem = NULL;
|
||||
uint32_t no_checkpoint = 0;
|
||||
uint32_t ncores = 1;
|
||||
uint64_t elf_entry;
|
||||
int kvm = -1, vmfd = -1, netfd = -1, efd = -1;
|
||||
uint8_t* mboot = NULL;
|
||||
__thread struct kvm_run *run = NULL;
|
||||
__thread int vcpufd = -1;
|
||||
__thread uint32_t cpuid = 0;
|
||||
static sem_t net_sem;
|
||||
|
||||
int uhyve_argc = -1;
|
||||
int uhyve_envc = -1;
|
||||
char **uhyve_argv = NULL;
|
||||
extern char **environ;
|
||||
char **uhyve_envp = NULL;
|
||||
|
||||
vcpu_state_t *vcpu_thread_states = NULL;
|
||||
static sigset_t signal_mask;
|
||||
|
||||
typedef struct {
|
||||
int argc;
|
||||
int argsz[MAX_ARGC_ENVC];
|
||||
int envc;
|
||||
int envsz[MAX_ARGC_ENVC];
|
||||
} __attribute__ ((packed)) uhyve_cmdsize_t;
|
||||
|
||||
typedef struct {
|
||||
char **argv;
|
||||
char **envp;
|
||||
} __attribute__ ((packed)) uhyve_cmdval_t;
|
||||
|
||||
static uint64_t memparse(const char *ptr)
|
||||
{
|
||||
// local pointer to end of parsed string
|
||||
char *endptr;
|
||||
|
||||
// parse number
|
||||
uint64_t size = strtoull(ptr, &endptr, 0);
|
||||
|
||||
// parse size extension, intentional fall-through
|
||||
switch (*endptr) {
|
||||
case 'E':
|
||||
case 'e':
|
||||
size <<= 10;
|
||||
case 'P':
|
||||
case 'p':
|
||||
size <<= 10;
|
||||
case 'T':
|
||||
case 't':
|
||||
size <<= 10;
|
||||
case 'G':
|
||||
case 'g':
|
||||
size <<= 10;
|
||||
case 'M':
|
||||
case 'm':
|
||||
size <<= 10;
|
||||
case 'K':
|
||||
case 'k':
|
||||
size <<= 10;
|
||||
endptr++;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
// Just close file descriptor if not already done
|
||||
static void close_fd(int* fd)
|
||||
{
|
||||
if (*fd != -1) {
|
||||
close(*fd);
|
||||
*fd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
static void uhyve_exit(void* arg)
|
||||
{
|
||||
//print_registers();
|
||||
|
||||
if (pthread_mutex_trylock(&kvm_lock))
|
||||
{
|
||||
close_fd(&vcpufd);
|
||||
return;
|
||||
}
|
||||
|
||||
// only the main thread will execute this
|
||||
if (vcpu_threads) {
|
||||
for(uint32_t i=0; i<ncores; i++) {
|
||||
if (pthread_self() == vcpu_threads[i])
|
||||
continue;
|
||||
|
||||
pthread_kill(vcpu_threads[i], SIGTERM);
|
||||
}
|
||||
|
||||
if (netfd > 0)
|
||||
pthread_kill(net_thread, SIGTERM);
|
||||
}
|
||||
|
||||
close_fd(&vcpufd);
|
||||
}
|
||||
|
||||
static void uhyve_atexit(void)
|
||||
{
|
||||
uhyve_exit(NULL);
|
||||
|
||||
if (vcpu_threads) {
|
||||
for(uint32_t i = 0; i < ncores; i++) {
|
||||
if (pthread_self() == vcpu_threads[i])
|
||||
continue;
|
||||
pthread_join(vcpu_threads[i], NULL);
|
||||
}
|
||||
|
||||
free(vcpu_threads);
|
||||
}
|
||||
|
||||
if (vcpu_fds)
|
||||
free(vcpu_fds);
|
||||
|
||||
// clean up and close KVM
|
||||
close_fd(&vmfd);
|
||||
close_fd(&kvm);
|
||||
}
|
||||
|
||||
static void* wait_for_packet(void* arg)
|
||||
{
|
||||
int ret;
|
||||
struct pollfd fds = { .fd = netfd,
|
||||
.events = POLLIN,
|
||||
.revents = 0};
|
||||
|
||||
while(1)
|
||||
{
|
||||
fds.revents = 0;
|
||||
|
||||
ret = poll(&fds, 1, -1000);
|
||||
|
||||
if (ret < 0 && errno == EINTR)
|
||||
continue;
|
||||
|
||||
if (ret < 0)
|
||||
perror("poll()");
|
||||
else if (ret) {
|
||||
uint64_t event_counter = 1;
|
||||
write(efd, &event_counter, sizeof(event_counter));
|
||||
sem_wait(&net_sem);
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void check_network(void)
|
||||
{
|
||||
// should we start the network thread?
|
||||
if ((efd < 0) && (getenv("HERMIT_NETIF"))) {
|
||||
struct kvm_irqfd irqfd = {};
|
||||
|
||||
efd = eventfd(0, 0);
|
||||
irqfd.fd = efd;
|
||||
irqfd.gsi = UHYVE_IRQ;
|
||||
kvm_ioctl(vmfd, KVM_IRQFD, &irqfd);
|
||||
|
||||
sem_init(&net_sem, 0, 0);
|
||||
|
||||
if (pthread_create(&net_thread, NULL, wait_for_packet, NULL))
|
||||
err(1, "unable to create thread");
|
||||
}
|
||||
}
|
||||
|
||||
static int vcpu_loop(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
pthread_barrier_wait(&barrier);
|
||||
|
||||
if (restart) {
|
||||
vcpu_state_t cpu_state = read_cpu_state();
|
||||
restore_cpu_state(cpu_state);
|
||||
} else if (vcpu_thread_states) {
|
||||
restore_cpu_state(vcpu_thread_states[cpuid]);
|
||||
} else {
|
||||
init_cpu_state(elf_entry);
|
||||
}
|
||||
|
||||
if (cpuid == 0) {
|
||||
if (restart) {
|
||||
no_checkpoint++;
|
||||
} else if (migration) {
|
||||
free(vcpu_thread_states);
|
||||
vcpu_thread_states = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* init uhyve gdb support */
|
||||
if (uhyve_gdb_enabled) {
|
||||
if (cpuid == 0)
|
||||
uhyve_gdb_init(vcpufd);
|
||||
|
||||
pthread_barrier_wait(&barrier);
|
||||
}
|
||||
|
||||
while (1) {
|
||||
ret = ioctl(vcpufd, KVM_RUN, NULL);
|
||||
|
||||
if(ret == -1) {
|
||||
switch(errno) {
|
||||
case EINTR:
|
||||
continue;
|
||||
|
||||
case EFAULT: {
|
||||
struct kvm_regs regs;
|
||||
kvm_ioctl(vcpufd, KVM_GET_REGS, ®s);
|
||||
#ifdef __x86_64__
|
||||
err(1, "KVM: host/guest translation fault: rip=0x%llx", regs.rip);
|
||||
#else
|
||||
err(1, "KVM: host/guest translation fault: elr_el1=0x%llx", regs.elr_el1);
|
||||
#endif
|
||||
}
|
||||
|
||||
default:
|
||||
err(1, "KVM: ioctl KVM_RUN in vcpu_loop for cpuid %d failed", cpuid);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t port = 0;
|
||||
unsigned raddr = 0;
|
||||
|
||||
/* handle requests */
|
||||
switch (run->exit_reason) {
|
||||
case KVM_EXIT_HLT:
|
||||
fprintf(stderr, "Guest has halted the CPU, this is considered as a normal exit.\n");
|
||||
if (uhyve_gdb_enabled)
|
||||
uhyve_gdb_handle_term();
|
||||
return 0;
|
||||
|
||||
case KVM_EXIT_MMIO:
|
||||
port = run->mmio.phys_addr;
|
||||
if (run->mmio.is_write)
|
||||
memcpy(&raddr, run->mmio.data, sizeof(raddr) /*run->mmio.len*/);
|
||||
//printf("KVM: handled KVM_EXIT_MMIO at 0x%lx (data %u)\n", port, raddr);
|
||||
|
||||
case KVM_EXIT_IO:
|
||||
if (!port) {
|
||||
port = run->io.port;
|
||||
raddr = *((unsigned*)((size_t)run+run->io.data_offset));
|
||||
}
|
||||
|
||||
//printf("port 0x%x\n", run->io.port);
|
||||
switch (port) {
|
||||
case UHYVE_UART_PORT:
|
||||
if (verbose)
|
||||
putc((unsigned char) raddr, stderr);
|
||||
break;
|
||||
case UHYVE_PORT_WRITE: {
|
||||
uhyve_write_t* uhyve_write = (uhyve_write_t*) (guest_mem+raddr);
|
||||
|
||||
uhyve_write->len = write(uhyve_write->fd, guest_mem+(size_t)uhyve_write->buf, uhyve_write->len);
|
||||
break;
|
||||
}
|
||||
|
||||
case UHYVE_PORT_READ: {
|
||||
uhyve_read_t* uhyve_read = (uhyve_read_t*) (guest_mem+raddr);
|
||||
|
||||
uhyve_read->ret = read(uhyve_read->fd, guest_mem+(size_t)uhyve_read->buf, uhyve_read->len);
|
||||
break;
|
||||
}
|
||||
|
||||
case UHYVE_PORT_EXIT: {
|
||||
if (cpuid)
|
||||
pthread_exit((int*)(guest_mem+raddr));
|
||||
else
|
||||
exit(*(int*)(guest_mem+raddr));
|
||||
break;
|
||||
}
|
||||
|
||||
case UHYVE_PORT_OPEN: {
|
||||
uhyve_open_t* uhyve_open = (uhyve_open_t*) (guest_mem+raddr);
|
||||
char rpath[PATH_MAX];
|
||||
|
||||
// forbid to open the kvm device
|
||||
if (realpath((const char*)guest_mem+(size_t)uhyve_open->name, rpath) < 0)
|
||||
uhyve_open->ret = -1;
|
||||
else if (strcmp(rpath, "/dev/kvm") == 0)
|
||||
uhyve_open->ret = -1;
|
||||
else
|
||||
uhyve_open->ret = open((const char*)guest_mem+(size_t)uhyve_open->name, uhyve_open->flags, uhyve_open->mode);
|
||||
break;
|
||||
}
|
||||
|
||||
case UHYVE_PORT_CLOSE: {
|
||||
uhyve_close_t* uhyve_close = (uhyve_close_t*) (guest_mem+raddr);
|
||||
|
||||
if (uhyve_close->fd > 2)
|
||||
uhyve_close->ret = close(uhyve_close->fd);
|
||||
else
|
||||
uhyve_close->ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
case UHYVE_PORT_NETINFO: {
|
||||
uhyve_netinfo_t* uhyve_netinfo = (uhyve_netinfo_t*)(guest_mem+raddr);
|
||||
memcpy(uhyve_netinfo->mac_str, uhyve_get_mac(), 18);
|
||||
// guest configure the ethernet device => start network thread
|
||||
check_network();
|
||||
break;
|
||||
}
|
||||
|
||||
case UHYVE_PORT_NETWRITE: {
|
||||
uhyve_netwrite_t* uhyve_netwrite = (uhyve_netwrite_t*)(guest_mem + raddr);
|
||||
uhyve_netwrite->ret = 0;
|
||||
ret = write(netfd, guest_mem + (size_t)uhyve_netwrite->data, uhyve_netwrite->len);
|
||||
if (ret >= 0) {
|
||||
uhyve_netwrite->ret = 0;
|
||||
uhyve_netwrite->len = ret;
|
||||
} else {
|
||||
uhyve_netwrite->ret = -1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case UHYVE_PORT_NETREAD: {
|
||||
uhyve_netread_t* uhyve_netread = (uhyve_netread_t*)(guest_mem + raddr);
|
||||
ret = read(netfd, guest_mem + (size_t)uhyve_netread->data, uhyve_netread->len);
|
||||
if (ret > 0) {
|
||||
uhyve_netread->len = ret;
|
||||
uhyve_netread->ret = 0;
|
||||
} else {
|
||||
uhyve_netread->ret = -1;
|
||||
sem_post(&net_sem);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case UHYVE_PORT_NETSTAT: {
|
||||
uhyve_netstat_t* uhyve_netstat = (uhyve_netstat_t*)(guest_mem + raddr);
|
||||
char* str = getenv("HERMIT_NETIF");
|
||||
if (str)
|
||||
uhyve_netstat->status = 1;
|
||||
else
|
||||
uhyve_netstat->status = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
case UHYVE_PORT_LSEEK: {
|
||||
uhyve_lseek_t* uhyve_lseek = (uhyve_lseek_t*) (guest_mem+raddr);
|
||||
|
||||
uhyve_lseek->offset = lseek(uhyve_lseek->fd, uhyve_lseek->offset, uhyve_lseek->whence);
|
||||
break;
|
||||
}
|
||||
|
||||
case UHYVE_PORT_CMDSIZE: {
|
||||
int i;
|
||||
uhyve_cmdsize_t *val = (uhyve_cmdsize_t *) (guest_mem+raddr);
|
||||
|
||||
val->argc = uhyve_argc;
|
||||
for(i=0; i<uhyve_argc; i++)
|
||||
val->argsz[i] = strlen(uhyve_argv[i]) + 1;
|
||||
|
||||
val->envc = uhyve_envc;
|
||||
for(i=0; i<uhyve_envc; i++)
|
||||
val->envsz[i] = strlen(uhyve_envp[i]) + 1;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case UHYVE_PORT_CMDVAL: {
|
||||
int i;
|
||||
char **argv_ptr, **env_ptr;
|
||||
uhyve_cmdval_t *val = (uhyve_cmdval_t *) (guest_mem+raddr);
|
||||
|
||||
/* argv */
|
||||
argv_ptr = (char **)(guest_mem + (size_t)val->argv);
|
||||
for(i=0; i<uhyve_argc; i++)
|
||||
strcpy(guest_mem + (size_t)argv_ptr[i], uhyve_argv[i]);
|
||||
|
||||
/* env */
|
||||
env_ptr = (char **)(guest_mem + (size_t)val->envp);
|
||||
for(i=0; i<uhyve_envc; i++)
|
||||
strcpy(guest_mem + (size_t)env_ptr[i], uhyve_envp[i]);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
err(1, "KVM: unhandled KVM_EXIT_IO / KVM_EXIT_MMIO at port 0x%lx\n", port);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case KVM_EXIT_FAIL_ENTRY:
|
||||
if (uhyve_gdb_enabled)
|
||||
uhyve_gdb_handle_exception(vcpufd, GDB_SIGNAL_SEGV);
|
||||
err(1, "KVM: entry failure: hw_entry_failure_reason=0x%llx\n",
|
||||
run->fail_entry.hardware_entry_failure_reason);
|
||||
break;
|
||||
|
||||
case KVM_EXIT_INTERNAL_ERROR:
|
||||
if (uhyve_gdb_enabled)
|
||||
uhyve_gdb_handle_exception(vcpufd, GDB_SIGNAL_SEGV);
|
||||
err(1, "KVM: internal error exit: suberror = 0x%x\n", run->internal.suberror);
|
||||
break;
|
||||
|
||||
case KVM_EXIT_SHUTDOWN:
|
||||
fprintf(stderr, "KVM: receive shutdown command\n");
|
||||
|
||||
case KVM_EXIT_DEBUG:
|
||||
if (uhyve_gdb_enabled) {
|
||||
uhyve_gdb_handle_exception(vcpufd, GDB_SIGNAL_TRAP);
|
||||
break;
|
||||
} else print_registers();
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
default:
|
||||
fprintf(stderr, "KVM: unhandled exit: exit_reason = 0x%x\n", run->exit_reason);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
close(vcpufd);
|
||||
vcpufd = -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vcpu_init(void)
|
||||
{
|
||||
vcpu_fds[cpuid] = vcpufd = kvm_ioctl(vmfd, KVM_CREATE_VCPU, cpuid);
|
||||
|
||||
/* Map the shared kvm_run structure and following data. */
|
||||
size_t mmap_size = (size_t) kvm_ioctl(kvm, KVM_GET_VCPU_MMAP_SIZE, NULL);
|
||||
|
||||
if (mmap_size < sizeof(*run))
|
||||
err(1, "KVM: invalid VCPU_MMAP_SIZE: %zd", mmap_size);
|
||||
|
||||
run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpufd, 0);
|
||||
if (run == MAP_FAILED)
|
||||
err(1, "KVM: VCPU mmap failed");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void sigusr_handler(int signum)
|
||||
{
|
||||
pthread_barrier_wait(&barrier);
|
||||
write_cpu_state();
|
||||
|
||||
pthread_barrier_wait(&barrier);
|
||||
}
|
||||
|
||||
static void vcpu_thread_mig_handler(int signum)
|
||||
{
|
||||
/* memory should be allocated at this point */
|
||||
assert(vcpu_thread_states != NULL);
|
||||
|
||||
/* ensure consistency among VCPUs */
|
||||
pthread_barrier_wait(&barrier);
|
||||
|
||||
/* save state */
|
||||
vcpu_thread_states[cpuid] = save_cpu_state();
|
||||
|
||||
/* synchronize with migration thread */
|
||||
pthread_barrier_wait(&migration_barrier);
|
||||
|
||||
/* wait to be killed */
|
||||
pthread_barrier_wait(&migration_barrier);
|
||||
}
|
||||
|
||||
static void* uhyve_thread(void* arg)
|
||||
{
|
||||
size_t ret;
|
||||
struct sigaction sa;
|
||||
|
||||
pthread_cleanup_push(uhyve_exit, NULL);
|
||||
|
||||
cpuid = (size_t) arg;
|
||||
|
||||
/* install signal handler for checkpoint */
|
||||
memset(&sa, 0x00, sizeof(sa));
|
||||
sa.sa_handler = &sigusr_handler;
|
||||
sigaction(SIGTHRCHKP, &sa, NULL);
|
||||
|
||||
/* install signal handler for migration */
|
||||
memset(&sa, 0x00, sizeof(sa));
|
||||
sa.sa_handler = &vcpu_thread_mig_handler;
|
||||
sigaction(SIGTHRMIG, &sa, NULL);
|
||||
|
||||
// create new cpu
|
||||
vcpu_init();
|
||||
|
||||
pthread_barrier_wait(&barrier);
|
||||
|
||||
// run cpu loop until thread gets killed
|
||||
ret = vcpu_loop();
|
||||
|
||||
pthread_cleanup_pop(1);
|
||||
|
||||
return (void*) ret;
|
||||
}
|
||||
|
||||
void sigterm_handler(int signum)
|
||||
{
|
||||
pthread_exit(0);
|
||||
}
|
||||
|
||||
int uhyve_init(char *path)
|
||||
{
|
||||
FILE *f = NULL;
|
||||
guest_path = path;
|
||||
|
||||
signal(SIGTERM, sigterm_handler);
|
||||
|
||||
// register routine to close the VM
|
||||
atexit(uhyve_atexit);
|
||||
|
||||
const char *start_mig_server = getenv("HERMIT_MIGRATION_SERVER");
|
||||
|
||||
/*
|
||||
* Three startups
|
||||
* a) incoming migration
|
||||
* b) load existing checkpoint
|
||||
* c) normal run
|
||||
*/
|
||||
if (start_mig_server) {
|
||||
migration = true;
|
||||
migration_metadata_t metadata;
|
||||
wait_for_incomming_migration(&metadata, MIGRATION_PORT);
|
||||
|
||||
ncores = metadata.ncores;
|
||||
guest_size = metadata.guest_size;
|
||||
elf_entry = metadata.elf_entry;
|
||||
full_checkpoint = metadata.full_checkpoint;
|
||||
} else if ((f = fopen("checkpoint/chk_config.txt", "r")) != NULL) {
|
||||
int tmp = 0;
|
||||
restart = true;
|
||||
|
||||
fscanf(f, "number of cores: %u\n", &ncores);
|
||||
fscanf(f, "memory size: 0x%zx\n", &guest_size);
|
||||
fscanf(f, "checkpoint number: %u\n", &no_checkpoint);
|
||||
fscanf(f, "entry point: 0x%zx", &elf_entry);
|
||||
fscanf(f, "full checkpoint: %d", &tmp);
|
||||
full_checkpoint = tmp ? true : false;
|
||||
|
||||
if (verbose)
|
||||
fprintf(stderr,
|
||||
"Restart from checkpoint %u "
|
||||
"(ncores %d, mem size 0x%zx)\n",
|
||||
no_checkpoint, ncores, guest_size);
|
||||
fclose(f);
|
||||
} else {
|
||||
const char* hermit_memory = getenv("HERMIT_MEM");
|
||||
if (hermit_memory)
|
||||
guest_size = memparse(hermit_memory);
|
||||
|
||||
const char* hermit_cpus = getenv("HERMIT_CPUS");
|
||||
if (hermit_cpus)
|
||||
ncores = (uint32_t) atoi(hermit_cpus);
|
||||
|
||||
const char* full_chk = getenv("HERMIT_FULLCHECKPOINT");
|
||||
if (full_chk && (strcmp(full_chk, "0") != 0))
|
||||
full_checkpoint = true;
|
||||
}
|
||||
|
||||
vcpu_threads = (pthread_t*) calloc(ncores, sizeof(pthread_t));
|
||||
if (!vcpu_threads)
|
||||
err(1, "Not enough memory");
|
||||
|
||||
vcpu_fds = (int*) calloc(ncores, sizeof(int));
|
||||
if (!vcpu_fds)
|
||||
err(1, "Not enough memory");
|
||||
|
||||
kvm = open("/dev/kvm", O_RDWR | O_CLOEXEC);
|
||||
if (kvm < 0)
|
||||
err(1, "Could not open: /dev/kvm");
|
||||
|
||||
/* Make sure we have the stable version of the API */
|
||||
int kvm_api_version = kvm_ioctl(kvm, KVM_GET_API_VERSION, NULL);
|
||||
if (kvm_api_version != 12)
|
||||
err(1, "KVM: API version is %d, uhyve requires version 12", kvm_api_version);
|
||||
|
||||
/* Create the virtual machine */
|
||||
vmfd = kvm_ioctl(kvm, KVM_CREATE_VM, 0);
|
||||
|
||||
#ifdef __x86_64__
|
||||
init_kvm_arch();
|
||||
if (restart) {
|
||||
if (load_checkpoint(guest_mem, path) != 0)
|
||||
exit(EXIT_FAILURE);
|
||||
} else if (start_mig_server) {
|
||||
load_migration_data(guest_mem);
|
||||
close_migration_channel();
|
||||
} else {
|
||||
if (load_kernel(guest_mem, path) != 0)
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
#endif
|
||||
|
||||
pthread_barrier_init(&barrier, NULL, ncores);
|
||||
pthread_barrier_init(&migration_barrier, NULL, ncores+1);
|
||||
cpuid = 0;
|
||||
|
||||
// create first CPU, it will be the boot processor by default
|
||||
int ret = vcpu_init();
|
||||
|
||||
const char* netif_str = getenv("HERMIT_NETIF");
|
||||
if (netif_str)
|
||||
{
|
||||
// TODO: strncmp for different network interfaces
|
||||
// for example tun/tap device or uhyvetap device
|
||||
netfd = uhyve_net_init(netif_str);
|
||||
if (netfd < 0)
|
||||
err(1, "unable to initialized network");
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int uhyve_loop(int argc, char **argv)
|
||||
{
|
||||
const char* hermit_check = getenv("HERMIT_CHECKPOINT");
|
||||
const char* hermit_mig_support = getenv("HERMIT_MIGRATION_SUPPORT");
|
||||
const char* hermit_mig_type = getenv("HERMIT_MIGRATION_TYPE");
|
||||
const char* hermit_debug = getenv("HERMIT_DEBUG");
|
||||
int ts = 0, i = 0;
|
||||
|
||||
if (hermit_debug && (atoi(hermit_debug) != 0))
|
||||
uhyve_gdb_enabled = true;
|
||||
|
||||
/* argv[0] is 'proxy', do not count it */
|
||||
uhyve_argc = argc-1;
|
||||
uhyve_argv = &argv[1];
|
||||
uhyve_envp = environ;
|
||||
while(uhyve_envp[i] != NULL)
|
||||
i++;
|
||||
uhyve_envc = i;
|
||||
|
||||
if (uhyve_argc > MAX_ARGC_ENVC) {
|
||||
fprintf(stderr, "uhyve downsiize envc from %d to %d\n", uhyve_argc, MAX_ARGC_ENVC);
|
||||
uhyve_argc = MAX_ARGC_ENVC;
|
||||
}
|
||||
|
||||
if (uhyve_envc > MAX_ARGC_ENVC-1) {
|
||||
fprintf(stderr, "uhyve downsiize envc from %d to %d\n", uhyve_envc, MAX_ARGC_ENVC-1);
|
||||
uhyve_envc = MAX_ARGC_ENVC-1;
|
||||
}
|
||||
|
||||
if (uhyve_argc > MAX_ARGC_ENVC || uhyve_envc > MAX_ARGC_ENVC) {
|
||||
fprintf(stderr, "uhyve cannot forward more than %d command line "
|
||||
"arguments or environment variables, please consider increasing "
|
||||
"the MAX_ARGC_ENVP cmake argument\n", MAX_ARGC_ENVC);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (hermit_check)
|
||||
ts = atoi(hermit_check);
|
||||
|
||||
if (hermit_mig_support) {
|
||||
set_migration_target(hermit_mig_support, MIGRATION_PORT);
|
||||
set_migration_type(hermit_mig_type);
|
||||
|
||||
/* block SIGUSR1 in main thread */
|
||||
sigemptyset (&signal_mask);
|
||||
sigaddset (&signal_mask, SIGUSR1);
|
||||
pthread_sigmask (SIG_BLOCK, &signal_mask, NULL);
|
||||
|
||||
/* start migration thread; handles SIGUSR1 */
|
||||
pthread_t sig_thr_id;
|
||||
pthread_create (&sig_thr_id, NULL, migration_handler, (void *)&signal_mask);
|
||||
|
||||
/* install signal handler for migration */
|
||||
struct sigaction sa;
|
||||
memset(&sa, 0x00, sizeof(sa));
|
||||
sa.sa_handler = &vcpu_thread_mig_handler;
|
||||
sigaction(SIGTHRMIG, &sa, NULL);
|
||||
}
|
||||
|
||||
|
||||
// First CPU is special because it will boot the system. Other CPUs will
|
||||
// be booted linearily after the first one.
|
||||
vcpu_threads[0] = pthread_self();
|
||||
|
||||
// start threads to create VCPUs
|
||||
for(size_t i = 1; i < ncores; i++)
|
||||
pthread_create(&vcpu_threads[i], NULL, uhyve_thread, (void*) i);
|
||||
|
||||
pthread_barrier_wait(&barrier);
|
||||
|
||||
#ifdef __aarch64__
|
||||
init_kvm_arch();
|
||||
if (restart) {
|
||||
if (load_checkpoint(guest_mem, guest_path) != 0)
|
||||
exit(EXIT_FAILURE);
|
||||
} else {
|
||||
if (load_kernel(guest_mem, guest_path) != 0)
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
#endif
|
||||
|
||||
*((uint32_t*) (mboot+0x24)) = ncores;
|
||||
|
||||
if (ts > 0)
|
||||
{
|
||||
struct sigaction sa;
|
||||
struct itimerval timer;
|
||||
|
||||
/* Install timer_handler as the signal handler for SIGVTALRM. */
|
||||
memset(&sa, 0x00, sizeof(sa));
|
||||
sa.sa_handler = &timer_handler;
|
||||
sigaction(SIGALRM, &sa, NULL);
|
||||
|
||||
/* Configure the timer to expire after "ts" sec... */
|
||||
timer.it_value.tv_sec = ts;
|
||||
timer.it_value.tv_usec = 0;
|
||||
/* ... and every "ts" sec after that. */
|
||||
timer.it_interval.tv_sec = ts;
|
||||
timer.it_interval.tv_usec = 0;
|
||||
/* Start a virtual timer. It counts down whenever this process is executing. */
|
||||
setitimer(ITIMER_REAL, &timer, NULL);
|
||||
}
|
||||
|
||||
// Run first CPU
|
||||
return vcpu_loop();
|
||||
}
|
109
tools/uhyve.h
109
tools/uhyve.h
|
@ -1,109 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2018, Stefan Lankes, RWTH Aachen University
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __UHYVE_H__
|
||||
#define __UHYVE_H__
|
||||
|
||||
#include <err.h>
|
||||
#include <linux/kvm.h>
|
||||
|
||||
#define UHYVE_PORT_WRITE 0x400
|
||||
#define UHYVE_PORT_OPEN 0x440
|
||||
#define UHYVE_PORT_CLOSE 0x480
|
||||
#define UHYVE_PORT_READ 0x500
|
||||
#define UHYVE_PORT_EXIT 0x540
|
||||
#define UHYVE_PORT_LSEEK 0x580
|
||||
|
||||
// Networkports
|
||||
#define UHYVE_PORT_NETINFO 0x600
|
||||
#define UHYVE_PORT_NETWRITE 0x640
|
||||
#define UHYVE_PORT_NETREAD 0x680
|
||||
#define UHYVE_PORT_NETSTAT 0x700
|
||||
|
||||
/* Ports and data structures for uhyve command line arguments and envp
|
||||
* forwarding */
|
||||
#define UHYVE_PORT_CMDSIZE 0x740
|
||||
#define UHYVE_PORT_CMDVAL 0x780
|
||||
|
||||
#define UHYVE_UART_PORT 0x800
|
||||
|
||||
#define UHYVE_IRQ 11
|
||||
|
||||
#define SIGTHRCHKP (SIGRTMIN+0)
|
||||
#define SIGTHRMIG (SIGRTMIN+1)
|
||||
|
||||
#define kvm_ioctl(fd, cmd, arg) ({ \
|
||||
const int ret = ioctl(fd, cmd, arg); \
|
||||
if(ret == -1) \
|
||||
err(1, "KVM: ioctl " #cmd " failed"); \
|
||||
ret; \
|
||||
})
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define MAX_MSR_ENTRIES 25
|
||||
struct msr_data {
|
||||
struct kvm_msrs info;
|
||||
struct kvm_msr_entry entries[MAX_MSR_ENTRIES];
|
||||
};
|
||||
|
||||
typedef struct _vcpu_state {
|
||||
struct msr_data msr_data;
|
||||
struct kvm_regs regs;
|
||||
struct kvm_sregs sregs;
|
||||
struct kvm_fpu fpu;
|
||||
struct kvm_lapic_state lapic;
|
||||
struct kvm_xsave xsave;
|
||||
struct kvm_xcrs xcrs;
|
||||
struct kvm_vcpu_events events;
|
||||
struct kvm_mp_state mp_state;
|
||||
} vcpu_state_t;
|
||||
#else
|
||||
typedef struct _vcpu_state {
|
||||
int dummy;
|
||||
} vcpu_state_t;
|
||||
#endif
|
||||
|
||||
typedef struct _migration_metadata migration_metadata_t;
|
||||
|
||||
void print_registers(void);
|
||||
void timer_handler(int signum);
|
||||
void *migration_handler(void *arg);
|
||||
void restore_cpu_state(vcpu_state_t cpu_state);
|
||||
vcpu_state_t read_cpu_state(void);
|
||||
vcpu_state_t save_cpu_state(void);
|
||||
void write_cpu_state(void);
|
||||
void init_cpu_state(uint64_t elf_entry);
|
||||
int load_kernel(uint8_t* mem, char* path);
|
||||
int load_checkpoint(uint8_t* mem, char* path);
|
||||
int load_migration_data(uint8_t* mem);
|
||||
void wait_for_incomming_migration(migration_metadata_t *metadata, uint16_t listen_portno);
|
||||
void init_kvm_arch(void);
|
||||
int load_kernel(uint8_t* mem, char* path);
|
||||
size_t determine_dest_offset(size_t src_addr);
|
||||
void determine_dirty_pages(void (*save_page_handler)(void*, size_t, void*, size_t));
|
||||
|
||||
#endif
|
175
tools/utils.c
175
tools/utils.c
|
@ -1,175 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2017, Stefan Lankes, RWTH Aachen University
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "proxy.h"
|
||||
|
||||
#ifdef __x86_64__
|
||||
inline static void __cpuid(uint32_t code, uint32_t* a, uint32_t* b, uint32_t* c, uint32_t* d)
|
||||
{
|
||||
__asm volatile ("cpuid" : "=a"(*a), "=b"(*b), "=c"(*c), "=d"(*d) : "0"(code), "2"(*c));
|
||||
}
|
||||
|
||||
// Try to determine the frequency from the CPU brand.
|
||||
// Code is derived from the manual "Intel Processor
|
||||
// Identification and the CPUID Instruction".
|
||||
static uint32_t get_frequency_from_brand(void)
|
||||
{
|
||||
char cpu_brand[4*3*sizeof(uint32_t)+1] = {[0 ... 4*3*sizeof(uint32_t)] = 0};
|
||||
uint32_t* bint = (uint32_t*) cpu_brand;
|
||||
uint32_t index, multiplier = 0;
|
||||
uint32_t cpu_freq = 0;
|
||||
uint32_t extended;
|
||||
|
||||
__cpuid(0x80000000, &extended, bint+1, bint+2, bint+3);
|
||||
if (extended < 0x80000004)
|
||||
return 0;
|
||||
|
||||
__cpuid(0x80000002, bint+0, bint+1, bint+2, bint+3);
|
||||
__cpuid(0x80000003, bint+4, bint+5, bint+6, bint+7);
|
||||
__cpuid(0x80000004, bint+8, bint+9, bint+10, bint+11);
|
||||
|
||||
for(index=0; index<sizeof(cpu_brand)-2; index++)
|
||||
{
|
||||
if ((cpu_brand[index+1] == 'H') && (cpu_brand[index+2] == 'z'))
|
||||
{
|
||||
if (cpu_brand[index] == 'M')
|
||||
multiplier = 1;
|
||||
else if (cpu_brand[index] == 'G')
|
||||
multiplier = 1000;
|
||||
else if (cpu_brand[index] == 'T')
|
||||
multiplier = 1000000;
|
||||
}
|
||||
|
||||
if (multiplier > 0) {
|
||||
uint32_t freq;
|
||||
|
||||
// Compute frequency (in MHz) from brand string
|
||||
if (cpu_brand[index-3] == '.') { // If format is “x.xx”
|
||||
freq = (uint32_t)(cpu_brand[index-4] - '0') * multiplier;
|
||||
freq += (uint32_t)(cpu_brand[index-2] - '0') * (multiplier / 10);
|
||||
freq += (uint32_t)(cpu_brand[index-1] - '0') * (multiplier / 100);
|
||||
} else { // If format is xxxx
|
||||
freq = (uint32_t)(cpu_brand[index-4] - '0') * 1000;
|
||||
freq += (uint32_t)(cpu_brand[index-3] - '0') * 100;
|
||||
freq += (uint32_t)(cpu_brand[index-2] - '0') * 10;
|
||||
freq += (uint32_t)(cpu_brand[index-1] - '0');
|
||||
freq *= multiplier;
|
||||
}
|
||||
|
||||
return freq;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
uint32_t get_cpufreq(void)
|
||||
{
|
||||
char line[128];
|
||||
uint32_t freq = 0;
|
||||
char* match;
|
||||
|
||||
#ifdef __x86_64__
|
||||
freq = get_frequency_from_brand();
|
||||
if (freq > 0)
|
||||
return freq;
|
||||
#endif
|
||||
|
||||
// TODO: fallback solution, on some systems is cpuinfo_max_freq the turbo frequency
|
||||
// => wrong value
|
||||
FILE* fp = fopen("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", "r");
|
||||
if (fp != NULL) {
|
||||
if (fgets(line, sizeof(line), fp) != NULL) {
|
||||
// cpuinfo_max_freq is in kHz
|
||||
freq = (uint32_t) atoi(line) / 1000;
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
} else if( (fp = fopen("/proc/cpuinfo", "r")) ) {
|
||||
// Resorting to /proc/cpuinfo, however on most systems this will only
|
||||
// return the current frequency that might change over time.
|
||||
// Currently only needed when running inside a VM
|
||||
|
||||
// read until we find the line indicating cpu frequency
|
||||
while(fgets(line, sizeof(line), fp) != NULL) {
|
||||
match = strstr(line, "cpu MHz");
|
||||
|
||||
if(match != NULL) {
|
||||
// advance pointer to beginning of number
|
||||
while( ((*match < '0') || (*match > '9')) && (*match != '\0') )
|
||||
match++;
|
||||
|
||||
freq = (uint32_t) atoi(match);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
return freq;
|
||||
}
|
||||
|
||||
ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset)
|
||||
{
|
||||
ssize_t total = 0;
|
||||
char *p = buf;
|
||||
|
||||
if (count > SSIZE_MAX) {
|
||||
errno = E2BIG;
|
||||
return -1;
|
||||
}
|
||||
|
||||
while (count > 0) {
|
||||
ssize_t nr;
|
||||
|
||||
nr = pread(fd, p, count, offset);
|
||||
if (nr == 0)
|
||||
return total;
|
||||
else if (nr == -1 && errno == EINTR)
|
||||
continue;
|
||||
else if (nr == -1)
|
||||
return -1;
|
||||
|
||||
count -= nr;
|
||||
total += nr;
|
||||
p += nr;
|
||||
offset += nr;
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
Loading…
Add table
Reference in a new issue