mirror of
https://github.com/hermitcore/libhermit.git
synced 2025-03-09 00:00:03 +01:00
move proxy and hypervisor to a new submodule
This commit is contained in:
parent
69ed9f5354
commit
d62fb9f762
24 changed files with 8 additions and 7567 deletions
3
.gitmodules
vendored
3
.gitmodules
vendored
|
@ -6,3 +6,6 @@
|
||||||
path = usr/libomp
|
path = usr/libomp
|
||||||
url = https://github.com/hermitcore/libomp_oss.git
|
url = https://github.com/hermitcore/libomp_oss.git
|
||||||
branch = hermit
|
branch = hermit
|
||||||
|
[submodule "caves"]
|
||||||
|
path = caves
|
||||||
|
url = https://github.com/hermitcore/hermit-caves.git
|
||||||
|
|
|
@ -127,6 +127,9 @@ install(DIRECTORY include/hermit
|
||||||
FILES_MATCHING
|
FILES_MATCHING
|
||||||
PATTERN *.h)
|
PATTERN *.h)
|
||||||
|
|
||||||
|
install(FILES tools/init.sh
|
||||||
|
DESTINATION tools)
|
||||||
|
|
||||||
# provide custom target to only install libhermit without its runtimes which is
|
# provide custom target to only install libhermit without its runtimes which is
|
||||||
# needed during the compilation of the cross toolchain
|
# needed during the compilation of the cross toolchain
|
||||||
add_custom_target(hermit-bootstrap-install
|
add_custom_target(hermit-bootstrap-install
|
||||||
|
@ -150,7 +153,7 @@ add_custom_target(hermit
|
||||||
# be relocated for installation
|
# be relocated for installation
|
||||||
|
|
||||||
## HermitCore's own tools such as Qemu/KVM proxy
|
## HermitCore's own tools such as Qemu/KVM proxy
|
||||||
build_external(tools ${HERMIT_ROOT}/tools "")
|
build_external(caves ${HERMIT_ROOT}/caves "")
|
||||||
|
|
||||||
if("${TARGET_ARCH}" STREQUAL "x86_64-hermit")
|
if("${TARGET_ARCH}" STREQUAL "x86_64-hermit")
|
||||||
|
|
||||||
|
|
1
caves
Submodule
1
caves
Submodule
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit 155b31e13779b8d2446781b779bfa6a6ae46748c
|
|
@ -1,53 +0,0 @@
|
||||||
cmake_minimum_required(VERSION 3.7)
|
|
||||||
project(hermit_tools)
|
|
||||||
|
|
||||||
include(CheckIncludeFiles)
|
|
||||||
include(../cmake/HermitCore-Paths.cmake)
|
|
||||||
|
|
||||||
option(ENABLE_RDMA_MIGRATION "Migration support via RDMA" OFF)
|
|
||||||
|
|
||||||
|
|
||||||
add_compile_options(-std=c99)
|
|
||||||
|
|
||||||
list(APPEND LIBS "-pthread")
|
|
||||||
set(SRC proxy.c
|
|
||||||
utils.c
|
|
||||||
uhyve.c
|
|
||||||
uhyve-net.c
|
|
||||||
uhyve-migration.c
|
|
||||||
uhyve-x86_64.c
|
|
||||||
uhyve-aarch64.c
|
|
||||||
uhyve-gdb-x86_64.c
|
|
||||||
uhyve-gdb-aarch64.c
|
|
||||||
)
|
|
||||||
|
|
||||||
### Optional migration via RDMA
|
|
||||||
if(ENABLE_RDMA_MIGRATION)
|
|
||||||
add_definitions(-D__RDMA_MIGRATION__)
|
|
||||||
list(APPEND LIBS "-libverbs")
|
|
||||||
set(SRC ${SRC} uhyve-migration-rdma.c)
|
|
||||||
else()
|
|
||||||
remove_definitions(-D__RDMA_MIGRATION__)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
check_include_files(asm/msr-index.h HAVE_MSR_INDEX_H)
|
|
||||||
|
|
||||||
if(HAVE_MSR_INDEX_H)
|
|
||||||
add_definitions(-DHAVE_MSR_INDEX_H=1)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
add_executable(proxy ${SRC})
|
|
||||||
|
|
||||||
target_compile_options(proxy PUBLIC ${LIBS})
|
|
||||||
target_compile_options(proxy PUBLIC -DMAX_ARGC_ENVC=${MAX_ARGC_ENVC})
|
|
||||||
target_link_libraries(proxy ${LIBS})
|
|
||||||
|
|
||||||
install(TARGETS proxy
|
|
||||||
DESTINATION bin)
|
|
||||||
|
|
||||||
install(FILES init.sh
|
|
||||||
DESTINATION tools)
|
|
||||||
|
|
||||||
# Show include files in IDE
|
|
||||||
file(GLOB_RECURSE TOOLS_INCLUDES "*.h")
|
|
||||||
add_custom_target(tools_includes_ide SOURCES ${TOOLS_INCLUDES})
|
|
1064
tools/proxy.c
1064
tools/proxy.c
File diff suppressed because it is too large
Load diff
|
@ -1,55 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2017, Stefan Lankes, RWTH Aachen University
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of the University nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this
|
|
||||||
* software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
||||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
||||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
||||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __PROXY_H__
|
|
||||||
#define __PROXY_H__
|
|
||||||
|
|
||||||
#ifndef _GNU_SOURCE
|
|
||||||
#define _GNU_SOURCE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#define HERMIT_ELFOSABI 0x42
|
|
||||||
|
|
||||||
#define __HERMIT_exit 0
|
|
||||||
#define __HERMIT_write 1
|
|
||||||
#define __HERMIT_open 2
|
|
||||||
#define __HERMIT_close 3
|
|
||||||
#define __HERMIT_read 4
|
|
||||||
#define __HERMIT_lseek 5
|
|
||||||
|
|
||||||
int uhyve_init(char *path);
|
|
||||||
int uhyve_loop(int argc, char **argv);
|
|
||||||
|
|
||||||
// define some helper functions
|
|
||||||
uint32_t get_cpufreq(void);
|
|
||||||
ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset);
|
|
||||||
|
|
||||||
#endif
|
|
638
tools/queue.h
638
tools/queue.h
|
@ -1,638 +0,0 @@
|
||||||
/* $NetBSD: queue.h,v 1.68 2014/11/19 08:10:01 uebayasi Exp $ */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Copyright (c) 1991, 1993
|
|
||||||
* The Regents of the University of California. All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* 1. Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* 3. Neither the name of the University nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
||||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
||||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
||||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
||||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
||||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
||||||
* SUCH DAMAGE.
|
|
||||||
*
|
|
||||||
* @(#)queue.h 8.5 (Berkeley) 8/20/94
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _UHYVE_QUEUE_H_
|
|
||||||
#define _UHYVE_QUEUE_H_
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This file defines five types of data structures: singly-linked lists,
|
|
||||||
* lists, simple queues, tail queues, and circular queues.
|
|
||||||
*
|
|
||||||
* A singly-linked list is headed by a single forward pointer. The
|
|
||||||
* elements are singly linked for minimum space and pointer manipulation
|
|
||||||
* overhead at the expense of O(n) removal for arbitrary elements. New
|
|
||||||
* elements can be added to the list after an existing element or at the
|
|
||||||
* head of the list. Elements being removed from the head of the list
|
|
||||||
* should use the explicit macro for this purpose for optimum
|
|
||||||
* efficiency. A singly-linked list may only be traversed in the forward
|
|
||||||
* direction. Singly-linked lists are ideal for applications with large
|
|
||||||
* datasets and few or no removals or for implementing a LIFO queue.
|
|
||||||
*
|
|
||||||
* A list is headed by a single forward pointer (or an array of forward
|
|
||||||
* pointers for a hash table header). The elements are doubly linked
|
|
||||||
* so that an arbitrary element can be removed without a need to
|
|
||||||
* traverse the list. New elements can be added to the list before
|
|
||||||
* or after an existing element or at the head of the list. A list
|
|
||||||
* may only be traversed in the forward direction.
|
|
||||||
*
|
|
||||||
* A simple queue is headed by a pair of pointers, one the head of the
|
|
||||||
* list and the other to the tail of the list. The elements are singly
|
|
||||||
* linked to save space, so elements can only be removed from the
|
|
||||||
* head of the list. New elements can be added to the list after
|
|
||||||
* an existing element, at the head of the list, or at the end of the
|
|
||||||
* list. A simple queue may only be traversed in the forward direction.
|
|
||||||
*
|
|
||||||
* A tail queue is headed by a pair of pointers, one to the head of the
|
|
||||||
* list and the other to the tail of the list. The elements are doubly
|
|
||||||
* linked so that an arbitrary element can be removed without a need to
|
|
||||||
* traverse the list. New elements can be added to the list before or
|
|
||||||
* after an existing element, at the head of the list, or at the end of
|
|
||||||
* the list. A tail queue may be traversed in either direction.
|
|
||||||
*
|
|
||||||
* A circle queue is headed by a pair of pointers, one to the head of the
|
|
||||||
* list and the other to the tail of the list. The elements are doubly
|
|
||||||
* linked so that an arbitrary element can be removed without a need to
|
|
||||||
* traverse the list. New elements can be added to the list before or after
|
|
||||||
* an existing element, at the head of the list, or at the end of the list.
|
|
||||||
* A circle queue may be traversed in either direction, but has a more
|
|
||||||
* complex end of list detection.
|
|
||||||
*
|
|
||||||
* For details on the use of these macros, see the queue(3) manual page.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Singly-linked List definitions.
|
|
||||||
*/
|
|
||||||
#define SLIST_HEAD(name, type) \
|
|
||||||
struct name { \
|
|
||||||
struct type *slh_first; /* first element */ \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SLIST_HEAD_INITIALIZER(head) \
|
|
||||||
{ NULL }
|
|
||||||
|
|
||||||
#define SLIST_ENTRY(type) \
|
|
||||||
struct { \
|
|
||||||
struct type *sle_next; /* next element */ \
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Singly-linked List access methods.
|
|
||||||
*/
|
|
||||||
#define SLIST_FIRST(head) ((head)->slh_first)
|
|
||||||
#define SLIST_END(head) NULL
|
|
||||||
#define SLIST_EMPTY(head) ((head)->slh_first == NULL)
|
|
||||||
#define SLIST_NEXT(elm, field) ((elm)->field.sle_next)
|
|
||||||
|
|
||||||
#define SLIST_FOREACH(var, head, field) \
|
|
||||||
for((var) = (head)->slh_first; \
|
|
||||||
(var) != SLIST_END(head); \
|
|
||||||
(var) = (var)->field.sle_next)
|
|
||||||
|
|
||||||
#define SLIST_FOREACH_SAFE(var, head, field, tvar) \
|
|
||||||
for ((var) = SLIST_FIRST((head)); \
|
|
||||||
(var) != SLIST_END(head) && \
|
|
||||||
((tvar) = SLIST_NEXT((var), field), 1); \
|
|
||||||
(var) = (tvar))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Singly-linked List functions.
|
|
||||||
*/
|
|
||||||
#define SLIST_INIT(head) do { \
|
|
||||||
(head)->slh_first = SLIST_END(head); \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \
|
|
||||||
(elm)->field.sle_next = (slistelm)->field.sle_next; \
|
|
||||||
(slistelm)->field.sle_next = (elm); \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define SLIST_INSERT_HEAD(head, elm, field) do { \
|
|
||||||
(elm)->field.sle_next = (head)->slh_first; \
|
|
||||||
(head)->slh_first = (elm); \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define SLIST_REMOVE_AFTER(slistelm, field) do { \
|
|
||||||
(slistelm)->field.sle_next = \
|
|
||||||
SLIST_NEXT(SLIST_NEXT((slistelm), field), field); \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define SLIST_REMOVE_HEAD(head, field) do { \
|
|
||||||
(head)->slh_first = (head)->slh_first->field.sle_next; \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define SLIST_REMOVE(head, elm, type, field) do { \
|
|
||||||
if ((head)->slh_first == (elm)) { \
|
|
||||||
SLIST_REMOVE_HEAD((head), field); \
|
|
||||||
} \
|
|
||||||
else { \
|
|
||||||
struct type *curelm = (head)->slh_first; \
|
|
||||||
while(curelm->field.sle_next != (elm)) \
|
|
||||||
curelm = curelm->field.sle_next; \
|
|
||||||
curelm->field.sle_next = \
|
|
||||||
curelm->field.sle_next->field.sle_next; \
|
|
||||||
} \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* List definitions.
|
|
||||||
*/
|
|
||||||
#define LIST_HEAD(name, type) \
|
|
||||||
struct name { \
|
|
||||||
struct type *lh_first; /* first element */ \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define LIST_HEAD_INITIALIZER(head) \
|
|
||||||
{ NULL }
|
|
||||||
|
|
||||||
#define LIST_ENTRY(type) \
|
|
||||||
struct { \
|
|
||||||
struct type *le_next; /* next element */ \
|
|
||||||
struct type **le_prev; /* address of previous next element */ \
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* List access methods.
|
|
||||||
*/
|
|
||||||
#define LIST_FIRST(head) ((head)->lh_first)
|
|
||||||
#define LIST_END(head) NULL
|
|
||||||
#define LIST_EMPTY(head) ((head)->lh_first == LIST_END(head))
|
|
||||||
#define LIST_NEXT(elm, field) ((elm)->field.le_next)
|
|
||||||
|
|
||||||
#define LIST_FOREACH(var, head, field) \
|
|
||||||
for ((var) = ((head)->lh_first); \
|
|
||||||
(var) != LIST_END(head); \
|
|
||||||
(var) = ((var)->field.le_next))
|
|
||||||
|
|
||||||
#define LIST_FOREACH_SAFE(var, head, field, tvar) \
|
|
||||||
for ((var) = LIST_FIRST((head)); \
|
|
||||||
(var) != LIST_END(head) && \
|
|
||||||
((tvar) = LIST_NEXT((var), field), 1); \
|
|
||||||
(var) = (tvar))
|
|
||||||
|
|
||||||
#define LIST_MOVE(head1, head2) do { \
|
|
||||||
LIST_INIT((head2)); \
|
|
||||||
if (!LIST_EMPTY((head1))) { \
|
|
||||||
(head2)->lh_first = (head1)->lh_first; \
|
|
||||||
LIST_INIT((head1)); \
|
|
||||||
} \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* List functions.
|
|
||||||
*/
|
|
||||||
#if defined(QUEUEDEBUG)
|
|
||||||
#define QUEUEDEBUG_LIST_INSERT_HEAD(head, elm, field) \
|
|
||||||
if ((head)->lh_first && \
|
|
||||||
(head)->lh_first->field.le_prev != &(head)->lh_first) \
|
|
||||||
QUEUEDEBUG_ABORT("LIST_INSERT_HEAD %p %s:%d", (head), \
|
|
||||||
__FILE__, __LINE__);
|
|
||||||
#define QUEUEDEBUG_LIST_OP(elm, field) \
|
|
||||||
if ((elm)->field.le_next && \
|
|
||||||
(elm)->field.le_next->field.le_prev != \
|
|
||||||
&(elm)->field.le_next) \
|
|
||||||
QUEUEDEBUG_ABORT("LIST_* forw %p %s:%d", (elm), \
|
|
||||||
__FILE__, __LINE__); \
|
|
||||||
if (*(elm)->field.le_prev != (elm)) \
|
|
||||||
QUEUEDEBUG_ABORT("LIST_* back %p %s:%d", (elm), \
|
|
||||||
__FILE__, __LINE__);
|
|
||||||
#define QUEUEDEBUG_LIST_POSTREMOVE(elm, field) \
|
|
||||||
(elm)->field.le_next = (void *)1L; \
|
|
||||||
(elm)->field.le_prev = (void *)1L;
|
|
||||||
#else
|
|
||||||
#define QUEUEDEBUG_LIST_INSERT_HEAD(head, elm, field)
|
|
||||||
#define QUEUEDEBUG_LIST_OP(elm, field)
|
|
||||||
#define QUEUEDEBUG_LIST_POSTREMOVE(elm, field)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define LIST_INIT(head) do { \
|
|
||||||
(head)->lh_first = LIST_END(head); \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define LIST_INSERT_AFTER(listelm, elm, field) do { \
|
|
||||||
QUEUEDEBUG_LIST_OP((listelm), field) \
|
|
||||||
if (((elm)->field.le_next = (listelm)->field.le_next) != \
|
|
||||||
LIST_END(head)) \
|
|
||||||
(listelm)->field.le_next->field.le_prev = \
|
|
||||||
&(elm)->field.le_next; \
|
|
||||||
(listelm)->field.le_next = (elm); \
|
|
||||||
(elm)->field.le_prev = &(listelm)->field.le_next; \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define LIST_INSERT_BEFORE(listelm, elm, field) do { \
|
|
||||||
QUEUEDEBUG_LIST_OP((listelm), field) \
|
|
||||||
(elm)->field.le_prev = (listelm)->field.le_prev; \
|
|
||||||
(elm)->field.le_next = (listelm); \
|
|
||||||
*(listelm)->field.le_prev = (elm); \
|
|
||||||
(listelm)->field.le_prev = &(elm)->field.le_next; \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define LIST_INSERT_HEAD(head, elm, field) do { \
|
|
||||||
QUEUEDEBUG_LIST_INSERT_HEAD((head), (elm), field) \
|
|
||||||
if (((elm)->field.le_next = (head)->lh_first) != LIST_END(head))\
|
|
||||||
(head)->lh_first->field.le_prev = &(elm)->field.le_next;\
|
|
||||||
(head)->lh_first = (elm); \
|
|
||||||
(elm)->field.le_prev = &(head)->lh_first; \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define LIST_REMOVE(elm, field) do { \
|
|
||||||
QUEUEDEBUG_LIST_OP((elm), field) \
|
|
||||||
if ((elm)->field.le_next != NULL) \
|
|
||||||
(elm)->field.le_next->field.le_prev = \
|
|
||||||
(elm)->field.le_prev; \
|
|
||||||
*(elm)->field.le_prev = (elm)->field.le_next; \
|
|
||||||
QUEUEDEBUG_LIST_POSTREMOVE((elm), field) \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define LIST_REPLACE(elm, elm2, field) do { \
|
|
||||||
if (((elm2)->field.le_next = (elm)->field.le_next) != NULL) \
|
|
||||||
(elm2)->field.le_next->field.le_prev = \
|
|
||||||
&(elm2)->field.le_next; \
|
|
||||||
(elm2)->field.le_prev = (elm)->field.le_prev; \
|
|
||||||
*(elm2)->field.le_prev = (elm2); \
|
|
||||||
QUEUEDEBUG_LIST_POSTREMOVE((elm), field) \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Simple queue definitions.
|
|
||||||
*/
|
|
||||||
#define SIMPLEQ_HEAD(name, type) \
|
|
||||||
struct name { \
|
|
||||||
struct type *sqh_first; /* first element */ \
|
|
||||||
struct type **sqh_last; /* addr of last next element */ \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SIMPLEQ_HEAD_INITIALIZER(head) \
|
|
||||||
{ NULL, &(head).sqh_first }
|
|
||||||
|
|
||||||
#define SIMPLEQ_ENTRY(type) \
|
|
||||||
struct { \
|
|
||||||
struct type *sqe_next; /* next element */ \
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Simple queue access methods.
|
|
||||||
*/
|
|
||||||
#define SIMPLEQ_FIRST(head) ((head)->sqh_first)
|
|
||||||
#define SIMPLEQ_END(head) NULL
|
|
||||||
#define SIMPLEQ_EMPTY(head) ((head)->sqh_first == SIMPLEQ_END(head))
|
|
||||||
#define SIMPLEQ_NEXT(elm, field) ((elm)->field.sqe_next)
|
|
||||||
|
|
||||||
#define SIMPLEQ_FOREACH(var, head, field) \
|
|
||||||
for ((var) = ((head)->sqh_first); \
|
|
||||||
(var) != SIMPLEQ_END(head); \
|
|
||||||
(var) = ((var)->field.sqe_next))
|
|
||||||
|
|
||||||
#define SIMPLEQ_FOREACH_SAFE(var, head, field, next) \
|
|
||||||
for ((var) = ((head)->sqh_first); \
|
|
||||||
(var) != SIMPLEQ_END(head) && \
|
|
||||||
((next = ((var)->field.sqe_next)), 1); \
|
|
||||||
(var) = (next))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Simple queue functions.
|
|
||||||
*/
|
|
||||||
#define SIMPLEQ_INIT(head) do { \
|
|
||||||
(head)->sqh_first = NULL; \
|
|
||||||
(head)->sqh_last = &(head)->sqh_first; \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define SIMPLEQ_INSERT_HEAD(head, elm, field) do { \
|
|
||||||
if (((elm)->field.sqe_next = (head)->sqh_first) == NULL) \
|
|
||||||
(head)->sqh_last = &(elm)->field.sqe_next; \
|
|
||||||
(head)->sqh_first = (elm); \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define SIMPLEQ_INSERT_TAIL(head, elm, field) do { \
|
|
||||||
(elm)->field.sqe_next = NULL; \
|
|
||||||
*(head)->sqh_last = (elm); \
|
|
||||||
(head)->sqh_last = &(elm)->field.sqe_next; \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define SIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do { \
|
|
||||||
if (((elm)->field.sqe_next = (listelm)->field.sqe_next) == NULL)\
|
|
||||||
(head)->sqh_last = &(elm)->field.sqe_next; \
|
|
||||||
(listelm)->field.sqe_next = (elm); \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define SIMPLEQ_REMOVE_HEAD(head, field) do { \
|
|
||||||
if (((head)->sqh_first = (head)->sqh_first->field.sqe_next) == NULL) \
|
|
||||||
(head)->sqh_last = &(head)->sqh_first; \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define SIMPLEQ_REMOVE_AFTER(head, elm, field) do { \
|
|
||||||
if (((elm)->field.sqe_next = (elm)->field.sqe_next->field.sqe_next) \
|
|
||||||
== NULL) \
|
|
||||||
(head)->sqh_last = &(elm)->field.sqe_next; \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define SIMPLEQ_REMOVE(head, elm, type, field) do { \
|
|
||||||
if ((head)->sqh_first == (elm)) { \
|
|
||||||
SIMPLEQ_REMOVE_HEAD((head), field); \
|
|
||||||
} else { \
|
|
||||||
struct type *curelm = (head)->sqh_first; \
|
|
||||||
while (curelm->field.sqe_next != (elm)) \
|
|
||||||
curelm = curelm->field.sqe_next; \
|
|
||||||
if ((curelm->field.sqe_next = \
|
|
||||||
curelm->field.sqe_next->field.sqe_next) == NULL) \
|
|
||||||
(head)->sqh_last = &(curelm)->field.sqe_next; \
|
|
||||||
} \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define SIMPLEQ_CONCAT(head1, head2) do { \
|
|
||||||
if (!SIMPLEQ_EMPTY((head2))) { \
|
|
||||||
*(head1)->sqh_last = (head2)->sqh_first; \
|
|
||||||
(head1)->sqh_last = (head2)->sqh_last; \
|
|
||||||
SIMPLEQ_INIT((head2)); \
|
|
||||||
} \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define SIMPLEQ_LAST(head, type, field) \
|
|
||||||
(SIMPLEQ_EMPTY((head)) ? \
|
|
||||||
NULL : \
|
|
||||||
((struct type *)(void *) \
|
|
||||||
((char *)((head)->sqh_last) - offsetof(struct type, field))))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Tail queue definitions.
|
|
||||||
*/
|
|
||||||
#define _TAILQ_HEAD(name, type, qual) \
|
|
||||||
struct name { \
|
|
||||||
qual type *tqh_first; /* first element */ \
|
|
||||||
qual type *qual *tqh_last; /* addr of last next element */ \
|
|
||||||
}
|
|
||||||
#define TAILQ_HEAD(name, type) _TAILQ_HEAD(name, struct type,)
|
|
||||||
|
|
||||||
#define TAILQ_HEAD_INITIALIZER(head) \
|
|
||||||
{ TAILQ_END(head), &(head).tqh_first }
|
|
||||||
|
|
||||||
#define _TAILQ_ENTRY(type, qual) \
|
|
||||||
struct { \
|
|
||||||
qual type *tqe_next; /* next element */ \
|
|
||||||
qual type *qual *tqe_prev; /* address of previous next element */\
|
|
||||||
}
|
|
||||||
#define TAILQ_ENTRY(type) _TAILQ_ENTRY(struct type,)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Tail queue access methods.
|
|
||||||
*/
|
|
||||||
#define TAILQ_FIRST(head) ((head)->tqh_first)
|
|
||||||
#define TAILQ_END(head) (NULL)
|
|
||||||
#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
|
|
||||||
#define TAILQ_LAST(head, headname) \
|
|
||||||
(*(((struct headname *)((head)->tqh_last))->tqh_last))
|
|
||||||
#define TAILQ_PREV(elm, headname, field) \
|
|
||||||
(*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
|
|
||||||
#define TAILQ_EMPTY(head) (TAILQ_FIRST(head) == TAILQ_END(head))
|
|
||||||
|
|
||||||
|
|
||||||
#define TAILQ_FOREACH(var, head, field) \
|
|
||||||
for ((var) = ((head)->tqh_first); \
|
|
||||||
(var) != TAILQ_END(head); \
|
|
||||||
(var) = ((var)->field.tqe_next))
|
|
||||||
|
|
||||||
#define TAILQ_FOREACH_SAFE(var, head, field, next) \
|
|
||||||
for ((var) = ((head)->tqh_first); \
|
|
||||||
(var) != TAILQ_END(head) && \
|
|
||||||
((next) = TAILQ_NEXT(var, field), 1); (var) = (next))
|
|
||||||
|
|
||||||
#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \
|
|
||||||
for ((var) = (*(((struct headname *)((head)->tqh_last))->tqh_last));\
|
|
||||||
(var) != TAILQ_END(head); \
|
|
||||||
(var) = (*(((struct headname *)((var)->field.tqe_prev))->tqh_last)))
|
|
||||||
|
|
||||||
#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, prev) \
|
|
||||||
for ((var) = TAILQ_LAST((head), headname); \
|
|
||||||
(var) != TAILQ_END(head) && \
|
|
||||||
((prev) = TAILQ_PREV((var), headname, field), 1); (var) = (prev))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Tail queue functions.
|
|
||||||
*/
|
|
||||||
#if defined(QUEUEDEBUG)
|
|
||||||
#define QUEUEDEBUG_TAILQ_INSERT_HEAD(head, elm, field) \
|
|
||||||
if ((head)->tqh_first && \
|
|
||||||
(head)->tqh_first->field.tqe_prev != &(head)->tqh_first) \
|
|
||||||
QUEUEDEBUG_ABORT("TAILQ_INSERT_HEAD %p %s:%d", (head), \
|
|
||||||
__FILE__, __LINE__);
|
|
||||||
#define QUEUEDEBUG_TAILQ_INSERT_TAIL(head, elm, field) \
|
|
||||||
if (*(head)->tqh_last != NULL) \
|
|
||||||
QUEUEDEBUG_ABORT("TAILQ_INSERT_TAIL %p %s:%d", (head), \
|
|
||||||
__FILE__, __LINE__);
|
|
||||||
#define QUEUEDEBUG_TAILQ_OP(elm, field) \
|
|
||||||
if ((elm)->field.tqe_next && \
|
|
||||||
(elm)->field.tqe_next->field.tqe_prev != \
|
|
||||||
&(elm)->field.tqe_next) \
|
|
||||||
QUEUEDEBUG_ABORT("TAILQ_* forw %p %s:%d", (elm), \
|
|
||||||
__FILE__, __LINE__); \
|
|
||||||
if (*(elm)->field.tqe_prev != (elm)) \
|
|
||||||
QUEUEDEBUG_ABORT("TAILQ_* back %p %s:%d", (elm), \
|
|
||||||
__FILE__, __LINE__);
|
|
||||||
#define QUEUEDEBUG_TAILQ_PREREMOVE(head, elm, field) \
|
|
||||||
if ((elm)->field.tqe_next == NULL && \
|
|
||||||
(head)->tqh_last != &(elm)->field.tqe_next) \
|
|
||||||
QUEUEDEBUG_ABORT("TAILQ_PREREMOVE head %p elm %p %s:%d",\
|
|
||||||
(head), (elm), __FILE__, __LINE__);
|
|
||||||
#define QUEUEDEBUG_TAILQ_POSTREMOVE(elm, field) \
|
|
||||||
(elm)->field.tqe_next = (void *)1L; \
|
|
||||||
(elm)->field.tqe_prev = (void *)1L;
|
|
||||||
#else
|
|
||||||
#define QUEUEDEBUG_TAILQ_INSERT_HEAD(head, elm, field)
|
|
||||||
#define QUEUEDEBUG_TAILQ_INSERT_TAIL(head, elm, field)
|
|
||||||
#define QUEUEDEBUG_TAILQ_OP(elm, field)
|
|
||||||
#define QUEUEDEBUG_TAILQ_PREREMOVE(head, elm, field)
|
|
||||||
#define QUEUEDEBUG_TAILQ_POSTREMOVE(elm, field)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define TAILQ_INIT(head) do { \
|
|
||||||
(head)->tqh_first = TAILQ_END(head); \
|
|
||||||
(head)->tqh_last = &(head)->tqh_first; \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define TAILQ_INSERT_HEAD(head, elm, field) do { \
|
|
||||||
QUEUEDEBUG_TAILQ_INSERT_HEAD((head), (elm), field) \
|
|
||||||
if (((elm)->field.tqe_next = (head)->tqh_first) != TAILQ_END(head))\
|
|
||||||
(head)->tqh_first->field.tqe_prev = \
|
|
||||||
&(elm)->field.tqe_next; \
|
|
||||||
else \
|
|
||||||
(head)->tqh_last = &(elm)->field.tqe_next; \
|
|
||||||
(head)->tqh_first = (elm); \
|
|
||||||
(elm)->field.tqe_prev = &(head)->tqh_first; \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define TAILQ_INSERT_TAIL(head, elm, field) do { \
|
|
||||||
QUEUEDEBUG_TAILQ_INSERT_TAIL((head), (elm), field) \
|
|
||||||
(elm)->field.tqe_next = TAILQ_END(head); \
|
|
||||||
(elm)->field.tqe_prev = (head)->tqh_last; \
|
|
||||||
*(head)->tqh_last = (elm); \
|
|
||||||
(head)->tqh_last = &(elm)->field.tqe_next; \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
|
|
||||||
QUEUEDEBUG_TAILQ_OP((listelm), field) \
|
|
||||||
if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != \
|
|
||||||
TAILQ_END(head)) \
|
|
||||||
(elm)->field.tqe_next->field.tqe_prev = \
|
|
||||||
&(elm)->field.tqe_next; \
|
|
||||||
else \
|
|
||||||
(head)->tqh_last = &(elm)->field.tqe_next; \
|
|
||||||
(listelm)->field.tqe_next = (elm); \
|
|
||||||
(elm)->field.tqe_prev = &(listelm)->field.tqe_next; \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \
|
|
||||||
QUEUEDEBUG_TAILQ_OP((listelm), field) \
|
|
||||||
(elm)->field.tqe_prev = (listelm)->field.tqe_prev; \
|
|
||||||
(elm)->field.tqe_next = (listelm); \
|
|
||||||
*(listelm)->field.tqe_prev = (elm); \
|
|
||||||
(listelm)->field.tqe_prev = &(elm)->field.tqe_next; \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define TAILQ_REMOVE(head, elm, field) do { \
|
|
||||||
QUEUEDEBUG_TAILQ_PREREMOVE((head), (elm), field) \
|
|
||||||
QUEUEDEBUG_TAILQ_OP((elm), field) \
|
|
||||||
if (((elm)->field.tqe_next) != TAILQ_END(head)) \
|
|
||||||
(elm)->field.tqe_next->field.tqe_prev = \
|
|
||||||
(elm)->field.tqe_prev; \
|
|
||||||
else \
|
|
||||||
(head)->tqh_last = (elm)->field.tqe_prev; \
|
|
||||||
*(elm)->field.tqe_prev = (elm)->field.tqe_next; \
|
|
||||||
QUEUEDEBUG_TAILQ_POSTREMOVE((elm), field); \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define TAILQ_REPLACE(head, elm, elm2, field) do { \
|
|
||||||
if (((elm2)->field.tqe_next = (elm)->field.tqe_next) != \
|
|
||||||
TAILQ_END(head)) \
|
|
||||||
(elm2)->field.tqe_next->field.tqe_prev = \
|
|
||||||
&(elm2)->field.tqe_next; \
|
|
||||||
else \
|
|
||||||
(head)->tqh_last = &(elm2)->field.tqe_next; \
|
|
||||||
(elm2)->field.tqe_prev = (elm)->field.tqe_prev; \
|
|
||||||
*(elm2)->field.tqe_prev = (elm2); \
|
|
||||||
QUEUEDEBUG_TAILQ_POSTREMOVE((elm), field); \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define TAILQ_CONCAT(head1, head2, field) do { \
|
|
||||||
if (!TAILQ_EMPTY(head2)) { \
|
|
||||||
*(head1)->tqh_last = (head2)->tqh_first; \
|
|
||||||
(head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \
|
|
||||||
(head1)->tqh_last = (head2)->tqh_last; \
|
|
||||||
TAILQ_INIT((head2)); \
|
|
||||||
} \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Singly-linked Tail queue declarations.
|
|
||||||
*/
|
|
||||||
#define STAILQ_HEAD(name, type) \
|
|
||||||
struct name { \
|
|
||||||
struct type *stqh_first; /* first element */ \
|
|
||||||
struct type **stqh_last; /* addr of last next element */ \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define STAILQ_HEAD_INITIALIZER(head) \
|
|
||||||
{ NULL, &(head).stqh_first }
|
|
||||||
|
|
||||||
#define STAILQ_ENTRY(type) \
|
|
||||||
struct { \
|
|
||||||
struct type *stqe_next; /* next element */ \
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Singly-linked Tail queue access methods.
|
|
||||||
*/
|
|
||||||
#define STAILQ_FIRST(head) ((head)->stqh_first)
|
|
||||||
#define STAILQ_END(head) NULL
|
|
||||||
#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next)
|
|
||||||
#define STAILQ_EMPTY(head) (STAILQ_FIRST(head) == STAILQ_END(head))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Singly-linked Tail queue functions.
|
|
||||||
*/
|
|
||||||
#define STAILQ_INIT(head) do { \
|
|
||||||
(head)->stqh_first = NULL; \
|
|
||||||
(head)->stqh_last = &(head)->stqh_first; \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define STAILQ_INSERT_HEAD(head, elm, field) do { \
|
|
||||||
if (((elm)->field.stqe_next = (head)->stqh_first) == NULL) \
|
|
||||||
(head)->stqh_last = &(elm)->field.stqe_next; \
|
|
||||||
(head)->stqh_first = (elm); \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define STAILQ_INSERT_TAIL(head, elm, field) do { \
|
|
||||||
(elm)->field.stqe_next = NULL; \
|
|
||||||
*(head)->stqh_last = (elm); \
|
|
||||||
(head)->stqh_last = &(elm)->field.stqe_next; \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define STAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
|
|
||||||
if (((elm)->field.stqe_next = (listelm)->field.stqe_next) == NULL)\
|
|
||||||
(head)->stqh_last = &(elm)->field.stqe_next; \
|
|
||||||
(listelm)->field.stqe_next = (elm); \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define STAILQ_REMOVE_HEAD(head, field) do { \
|
|
||||||
if (((head)->stqh_first = (head)->stqh_first->field.stqe_next) == NULL) \
|
|
||||||
(head)->stqh_last = &(head)->stqh_first; \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define STAILQ_REMOVE(head, elm, type, field) do { \
|
|
||||||
if ((head)->stqh_first == (elm)) { \
|
|
||||||
STAILQ_REMOVE_HEAD((head), field); \
|
|
||||||
} else { \
|
|
||||||
struct type *curelm = (head)->stqh_first; \
|
|
||||||
while (curelm->field.stqe_next != (elm)) \
|
|
||||||
curelm = curelm->field.stqe_next; \
|
|
||||||
if ((curelm->field.stqe_next = \
|
|
||||||
curelm->field.stqe_next->field.stqe_next) == NULL) \
|
|
||||||
(head)->stqh_last = &(curelm)->field.stqe_next; \
|
|
||||||
} \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define STAILQ_FOREACH(var, head, field) \
|
|
||||||
for ((var) = ((head)->stqh_first); \
|
|
||||||
(var); \
|
|
||||||
(var) = ((var)->field.stqe_next))
|
|
||||||
|
|
||||||
#define STAILQ_FOREACH_SAFE(var, head, field, tvar) \
|
|
||||||
for ((var) = STAILQ_FIRST((head)); \
|
|
||||||
(var) && ((tvar) = STAILQ_NEXT((var), field), 1); \
|
|
||||||
(var) = (tvar))
|
|
||||||
|
|
||||||
#define STAILQ_CONCAT(head1, head2) do { \
|
|
||||||
if (!STAILQ_EMPTY((head2))) { \
|
|
||||||
*(head1)->stqh_last = (head2)->stqh_first; \
|
|
||||||
(head1)->stqh_last = (head2)->stqh_last; \
|
|
||||||
STAILQ_INIT((head2)); \
|
|
||||||
} \
|
|
||||||
} while (/*CONSTCOND*/0)
|
|
||||||
|
|
||||||
#define STAILQ_LAST(head, type, field) \
|
|
||||||
(STAILQ_EMPTY((head)) ? \
|
|
||||||
NULL : \
|
|
||||||
((struct type *)(void *) \
|
|
||||||
((char *)((head)->stqh_last) - offsetof(struct type, field))))
|
|
||||||
|
|
||||||
#endif /* !_UHYVE_QUEUE_H_ */
|
|
|
@ -1,503 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2018, Stefan Lankes, RWTH Aachen University
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of the University nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this
|
|
||||||
* software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
||||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
||||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
||||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifdef __aarch64__
|
|
||||||
|
|
||||||
#define _GNU_SOURCE
|
|
||||||
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <errno.h>
|
|
||||||
#include <fcntl.h>
|
|
||||||
#include <sched.h>
|
|
||||||
#include <signal.h>
|
|
||||||
#include <limits.h>
|
|
||||||
#include <pthread.h>
|
|
||||||
#include <semaphore.h>
|
|
||||||
#include <elf.h>
|
|
||||||
#include <err.h>
|
|
||||||
#include <poll.h>
|
|
||||||
#include <sys/wait.h>
|
|
||||||
#include <sys/ioctl.h>
|
|
||||||
#include <sys/mman.h>
|
|
||||||
#include <sys/ioctl.h>
|
|
||||||
#include <sys/mman.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <sys/time.h>
|
|
||||||
#include <sys/eventfd.h>
|
|
||||||
#include <linux/const.h>
|
|
||||||
#include <linux/kvm.h>
|
|
||||||
|
|
||||||
#include "uhyve.h"
|
|
||||||
#include "proxy.h"
|
|
||||||
|
|
||||||
#define GUEST_OFFSET 0x0
|
|
||||||
|
|
||||||
#define GIC_SPI_IRQ_BASE 32
|
|
||||||
#define GICD_BASE (1ULL << 39)
|
|
||||||
#define GICC_BASE (GICD_BASE + GICD_SIZE)
|
|
||||||
#define GIC_SIZE (GICD_SIZE + GICC_SIZE)
|
|
||||||
#define GICD_SIZE 0x10000ULL
|
|
||||||
#define GICC_SIZE 0x20000ULL
|
|
||||||
|
|
||||||
#define KVM_GAP_SIZE (GIC_SIZE)
|
|
||||||
#define KVM_GAP_START GICD_BASE
|
|
||||||
|
|
||||||
#define PAGE_SIZE 0x1000
|
|
||||||
|
|
||||||
#ifndef offsetof
|
|
||||||
#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 |\
|
|
||||||
KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
|
|
||||||
#define ARM_CPU_ID 3, 0, 0, 0
|
|
||||||
#define ARM_CPU_ID_MPIDR 5
|
|
||||||
|
|
||||||
static bool cap_irqfd = false;
|
|
||||||
static bool cap_read_only = false;
|
|
||||||
static int gic_fd = -1;
|
|
||||||
|
|
||||||
extern size_t guest_size;
|
|
||||||
extern uint64_t elf_entry;
|
|
||||||
extern uint8_t* klog;
|
|
||||||
extern bool verbose;
|
|
||||||
extern uint32_t ncores;
|
|
||||||
extern uint8_t* guest_mem;
|
|
||||||
extern size_t guest_size;
|
|
||||||
extern int kvm, vmfd, netfd, efd;
|
|
||||||
extern uint8_t* mboot;
|
|
||||||
extern __thread struct kvm_run *run;
|
|
||||||
extern __thread int vcpufd;
|
|
||||||
extern __thread uint32_t cpuid;
|
|
||||||
|
|
||||||
void print_registers(void)
|
|
||||||
{
|
|
||||||
struct kvm_one_reg reg;
|
|
||||||
uint64_t data;
|
|
||||||
|
|
||||||
fprintf(stderr, "\n Dump state of CPU %d\n\n", cpuid);
|
|
||||||
fprintf(stderr, " Registers\n");
|
|
||||||
fprintf(stderr, " =========\n");
|
|
||||||
|
|
||||||
reg.addr = (uint64_t)&data;
|
|
||||||
reg.id = ARM64_CORE_REG(regs.pc);
|
|
||||||
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®);
|
|
||||||
fprintf(stderr, " PC: 0x%016lx\n", data);
|
|
||||||
|
|
||||||
reg.id = ARM64_CORE_REG(regs.pstate);
|
|
||||||
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®);
|
|
||||||
fprintf(stderr, " PSTATE: 0x%016lx\n", data);
|
|
||||||
|
|
||||||
reg.id = ARM64_CORE_REG(sp_el1);
|
|
||||||
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®);
|
|
||||||
fprintf(stderr, " SP_EL1: 0x%016lx\n", data);
|
|
||||||
|
|
||||||
reg.id = ARM64_CORE_REG(regs.regs[30]);
|
|
||||||
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®);
|
|
||||||
fprintf(stderr, " LR: 0x%016lx\n", data);
|
|
||||||
|
|
||||||
reg.id = ARM64_SYS_REG(ARM_CPU_ID, ARM_CPU_ID_MPIDR);
|
|
||||||
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®);
|
|
||||||
fprintf(stderr, " MPIDR: 0x%016lx\n", data);
|
|
||||||
|
|
||||||
for(int i=0; i<=29; i+=2)
|
|
||||||
{
|
|
||||||
reg.id = ARM64_CORE_REG(regs.regs[i]);
|
|
||||||
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®);
|
|
||||||
fprintf(stderr, " X%d:\t 0x%016lx\t", i, data);
|
|
||||||
|
|
||||||
reg.id = ARM64_CORE_REG(regs.regs[i+1]);
|
|
||||||
kvm_ioctl(vcpufd, KVM_GET_ONE_REG, ®);
|
|
||||||
fprintf(stderr, " X%d:\t0x%016lx\n", i+1, data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
vcpu_state_t read_cpu_state()
|
|
||||||
{
|
|
||||||
err(1, "Migration is currently not supported!");
|
|
||||||
}
|
|
||||||
|
|
||||||
void* migration_handler(void* arg)
|
|
||||||
{
|
|
||||||
err(1, "Migration is currently not supported!");
|
|
||||||
}
|
|
||||||
|
|
||||||
void timer_handler(int signum)
|
|
||||||
{
|
|
||||||
err(1, "Checkpointing is currently not supported!");
|
|
||||||
}
|
|
||||||
|
|
||||||
void restore_cpu_state(vcpu_state_t state)
|
|
||||||
{
|
|
||||||
err(1, "Checkpointing is currently not supported!");
|
|
||||||
}
|
|
||||||
|
|
||||||
vcpu_state_t save_cpu_state(void)
|
|
||||||
{
|
|
||||||
err(1, "Checkpointing is currently not supported!");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void write_cpu_state(void)
|
|
||||||
{
|
|
||||||
err(1, "Checkpointing is currently not supported!");
|
|
||||||
}
|
|
||||||
|
|
||||||
int load_checkpoint(uint8_t* mem, char* path)
|
|
||||||
{
|
|
||||||
err(1, "Checkpointing is currently not supported!");
|
|
||||||
}
|
|
||||||
|
|
||||||
int load_migration_data(uint8_t* mem)
|
|
||||||
{
|
|
||||||
err(1, "Checkpointing is currently not supported!");
|
|
||||||
}
|
|
||||||
|
|
||||||
void wait_for_incomming_migration(migration_metadata_t *metadata, uint16_t listen_portno)
|
|
||||||
{
|
|
||||||
err(1, "Checkpointing is currently not supported!");
|
|
||||||
}
|
|
||||||
|
|
||||||
void init_cpu_state(uint64_t elf_entry)
|
|
||||||
{
|
|
||||||
struct kvm_vcpu_init vcpu_init = {
|
|
||||||
.features = 0,
|
|
||||||
};
|
|
||||||
struct kvm_vcpu_init preferred_init;
|
|
||||||
|
|
||||||
if (!ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, &preferred_init)) {
|
|
||||||
if ((preferred_init.target == KVM_ARM_TARGET_CORTEX_A57) ||
|
|
||||||
(preferred_init.target == KVM_ARM_TARGET_CORTEX_A53)) {
|
|
||||||
vcpu_init.target = preferred_init.target;
|
|
||||||
} else {
|
|
||||||
vcpu_init.target = KVM_ARM_TARGET_GENERIC_V8;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
vcpu_init.target = KVM_ARM_TARGET_GENERIC_V8;
|
|
||||||
}
|
|
||||||
|
|
||||||
kvm_ioctl(vcpufd, KVM_ARM_VCPU_INIT, &vcpu_init);
|
|
||||||
|
|
||||||
// be sure that the multiprocessor is runable
|
|
||||||
struct kvm_mp_state mp_state = { KVM_MP_STATE_RUNNABLE };
|
|
||||||
kvm_ioctl(vcpufd, KVM_SET_MP_STATE, &mp_state);
|
|
||||||
|
|
||||||
struct kvm_one_reg reg;
|
|
||||||
uint64_t data;
|
|
||||||
|
|
||||||
/* pstate = all interrupts masked */
|
|
||||||
data = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1h;
|
|
||||||
reg.id = ARM64_CORE_REG(regs.pstate);
|
|
||||||
reg.addr = (uint64_t)&data;
|
|
||||||
kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®);
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
/* x0...x3 = 0 */
|
|
||||||
data = 0;
|
|
||||||
reg.id = ARM64_CORE_REG(regs.regs[0]);
|
|
||||||
kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®);
|
|
||||||
|
|
||||||
reg.id = ARM64_CORE_REG(regs.regs[1]);
|
|
||||||
kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®);
|
|
||||||
|
|
||||||
reg.id = ARM64_CORE_REG(regs.regs[2]);
|
|
||||||
kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®);
|
|
||||||
|
|
||||||
reg.id = ARM64_CORE_REG(regs.regs[3]);
|
|
||||||
kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* set start address */
|
|
||||||
data = elf_entry;
|
|
||||||
reg.id = ARM64_CORE_REG(regs.pc);
|
|
||||||
kvm_ioctl(vcpufd, KVM_SET_ONE_REG, ®);
|
|
||||||
|
|
||||||
if (gic_fd > 0) {
|
|
||||||
int lines = 1;
|
|
||||||
uint32_t nr_irqs = lines * 32 + GIC_SPI_IRQ_BASE;
|
|
||||||
struct kvm_device_attr nr_irqs_attr = {
|
|
||||||
.group = KVM_DEV_ARM_VGIC_GRP_NR_IRQS,
|
|
||||||
.addr = (uint64_t)&nr_irqs,
|
|
||||||
};
|
|
||||||
struct kvm_device_attr vgic_init_attr = {
|
|
||||||
.group = KVM_DEV_ARM_VGIC_GRP_CTRL,
|
|
||||||
.attr = KVM_DEV_ARM_VGIC_CTRL_INIT,
|
|
||||||
};
|
|
||||||
|
|
||||||
kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &nr_irqs_attr);
|
|
||||||
kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &vgic_init_attr);
|
|
||||||
}
|
|
||||||
|
|
||||||
// only one core is able to enter startup code
|
|
||||||
// => the wait for the predecessor core
|
|
||||||
while (*((volatile uint32_t*) (mboot + 0x120)) < cpuid)
|
|
||||||
pthread_yield();
|
|
||||||
*((volatile uint32_t*) (mboot + 0x130)) = cpuid;
|
|
||||||
}
|
|
||||||
|
|
||||||
void init_kvm_arch(void)
|
|
||||||
{
|
|
||||||
guest_mem = mmap(NULL, guest_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
|
||||||
if (guest_mem == MAP_FAILED)
|
|
||||||
err(1, "mmap failed");
|
|
||||||
|
|
||||||
const char* merge = getenv("HERMIT_MERGEABLE");
|
|
||||||
if (merge && (strcmp(merge, "0") != 0)) {
|
|
||||||
/*
|
|
||||||
* The KSM feature is intended for applications that generate
|
|
||||||
* many instances of the same data (e.g., virtualization systems
|
|
||||||
* such as KVM). It can consume a lot of processing power!
|
|
||||||
*/
|
|
||||||
madvise(guest_mem, guest_size, MADV_MERGEABLE);
|
|
||||||
if (verbose)
|
|
||||||
fprintf(stderr, "VM uses KSN feature \"mergeable\" to reduce the memory footprint.\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* hugepage = getenv("HERMIT_HUGEPAGE");
|
|
||||||
if (merge && (strcmp(merge, "0") != 0)) {
|
|
||||||
madvise(guest_mem, guest_size, MADV_HUGEPAGE);
|
|
||||||
if (verbose)
|
|
||||||
fprintf(stderr, "VM uses huge pages to improve the performance.\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
cap_read_only = kvm_ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_READONLY_MEM) <= 0 ? false : true;
|
|
||||||
if (!cap_read_only)
|
|
||||||
err(1, "the support of KVM_CAP_READONLY_MEM is curently required");
|
|
||||||
|
|
||||||
struct kvm_userspace_memory_region kvm_region = {
|
|
||||||
.slot = 0,
|
|
||||||
.guest_phys_addr = 0,
|
|
||||||
.memory_size = PAGE_SIZE,
|
|
||||||
.userspace_addr = (uint64_t) guest_mem,
|
|
||||||
.flags = KVM_MEM_READONLY,
|
|
||||||
};
|
|
||||||
kvm_ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region);
|
|
||||||
|
|
||||||
kvm_region = (struct kvm_userspace_memory_region) {
|
|
||||||
.slot = 1,
|
|
||||||
.guest_phys_addr = PAGE_SIZE,
|
|
||||||
.memory_size = guest_size - PAGE_SIZE,
|
|
||||||
.userspace_addr = (uint64_t) guest_mem + PAGE_SIZE,
|
|
||||||
#ifdef USE_DIRTY_LOG
|
|
||||||
.flags = KVM_MEM_LOG_DIRTY_PAGES,
|
|
||||||
#else
|
|
||||||
.flags = 0,
|
|
||||||
#endif
|
|
||||||
};
|
|
||||||
kvm_ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &kvm_region);
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
/* Create interrupt controller GICv2 */
|
|
||||||
uint64_t cpu_if_addr = GICC_BASE;
|
|
||||||
uint64_t dist_addr = GICD_BASE;
|
|
||||||
struct kvm_device_attr cpu_if_attr = {
|
|
||||||
.group = KVM_DEV_ARM_VGIC_GRP_ADDR,
|
|
||||||
.attr = KVM_VGIC_V2_ADDR_TYPE_CPU,
|
|
||||||
.addr = (uint64_t)&cpu_if_addr,
|
|
||||||
};
|
|
||||||
struct kvm_create_device gic_device = {
|
|
||||||
.flags = 0,
|
|
||||||
.type = KVM_DEV_TYPE_ARM_VGIC_V2,
|
|
||||||
};
|
|
||||||
struct kvm_device_attr dist_attr = {
|
|
||||||
.group = KVM_DEV_ARM_VGIC_GRP_ADDR,
|
|
||||||
.attr = KVM_VGIC_V2_ADDR_TYPE_DIST,
|
|
||||||
.addr = (uint64_t)&dist_addr,
|
|
||||||
};
|
|
||||||
kvm_ioctl(vmfd, KVM_CREATE_DEVICE, &gic_device);
|
|
||||||
|
|
||||||
gic_fd = gic_device.fd;
|
|
||||||
kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &cpu_if_attr);
|
|
||||||
kvm_ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &dist_attr);
|
|
||||||
#else
|
|
||||||
/* Create interrupt controller GICv2 */
|
|
||||||
struct kvm_arm_device_addr gic_addr[] = {
|
|
||||||
[0] = {
|
|
||||||
.id = KVM_VGIC_V2_ADDR_TYPE_DIST |
|
|
||||||
(KVM_ARM_DEVICE_VGIC_V2 << KVM_ARM_DEVICE_ID_SHIFT),
|
|
||||||
.addr = GICD_BASE,
|
|
||||||
},
|
|
||||||
[1] = {
|
|
||||||
.id = KVM_VGIC_V2_ADDR_TYPE_CPU |
|
|
||||||
(KVM_ARM_DEVICE_VGIC_V2 << KVM_ARM_DEVICE_ID_SHIFT),
|
|
||||||
.addr = GICC_BASE,
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
kvm_ioctl(vmfd, KVM_CREATE_IRQCHIP, NULL);
|
|
||||||
kvm_ioctl(vmfd, KVM_ARM_SET_DEVICE_ADDR, &gic_addr[0]);
|
|
||||||
kvm_ioctl(vmfd, KVM_ARM_SET_DEVICE_ADDR, &gic_addr[1]);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//fprintf(stderr, "Create gicd at 0x%llx\n", GICD_BASE);
|
|
||||||
//fprintf(stderr, "Create gicc at 0x%llx\n", GICC_BASE);
|
|
||||||
|
|
||||||
cap_irqfd = ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_IRQFD) <= 0 ? false : true;
|
|
||||||
if (!cap_irqfd)
|
|
||||||
err(1, "the support of KVM_CAP_IRQFD is curently required");
|
|
||||||
}
|
|
||||||
|
|
||||||
int load_kernel(uint8_t* mem, char* path)
|
|
||||||
{
|
|
||||||
Elf64_Ehdr hdr;
|
|
||||||
Elf64_Phdr *phdr = NULL;
|
|
||||||
size_t buflen;
|
|
||||||
size_t pstart = 0;
|
|
||||||
int fd, ret;
|
|
||||||
|
|
||||||
fd = open(path, O_RDONLY);
|
|
||||||
if (fd == -1)
|
|
||||||
{
|
|
||||||
perror("Unable to open file");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = pread_in_full(fd, &hdr, sizeof(hdr), 0);
|
|
||||||
if (ret < 0)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
// check if the program is a HermitCore file
|
|
||||||
if (hdr.e_ident[EI_MAG0] != ELFMAG0
|
|
||||||
|| hdr.e_ident[EI_MAG1] != ELFMAG1
|
|
||||||
|| hdr.e_ident[EI_MAG2] != ELFMAG2
|
|
||||||
|| hdr.e_ident[EI_MAG3] != ELFMAG3
|
|
||||||
|| hdr.e_ident[EI_CLASS] != ELFCLASS64
|
|
||||||
|| hdr.e_ident[EI_OSABI] != HERMIT_ELFOSABI
|
|
||||||
|| hdr.e_type != ET_EXEC || hdr.e_machine != EM_AARCH64) {
|
|
||||||
fprintf(stderr, "Invalid HermitCore file!\n");
|
|
||||||
ret = -1;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
elf_entry = hdr.e_entry;
|
|
||||||
|
|
||||||
buflen = hdr.e_phentsize * hdr.e_phnum;
|
|
||||||
phdr = malloc(buflen);
|
|
||||||
if (!phdr) {
|
|
||||||
fprintf(stderr, "Not enough memory\n");
|
|
||||||
ret = -1;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = pread_in_full(fd, phdr, buflen, hdr.e_phoff);
|
|
||||||
if (ret < 0)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Load all segments with type "LOAD" from the file at offset
|
|
||||||
* p_offset, and copy that into in memory.
|
|
||||||
*/
|
|
||||||
for (Elf64_Half ph_i = 0; ph_i < hdr.e_phnum; ph_i++)
|
|
||||||
{
|
|
||||||
uint64_t paddr = phdr[ph_i].p_paddr;
|
|
||||||
size_t offset = phdr[ph_i].p_offset;
|
|
||||||
size_t filesz = phdr[ph_i].p_filesz;
|
|
||||||
size_t memsz = phdr[ph_i].p_memsz;
|
|
||||||
|
|
||||||
if (phdr[ph_i].p_type != PT_LOAD)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
//fprintf(stderr, "Kernel location 0x%zx, file size 0x%zx, memory size 0x%zx\n", paddr, filesz, memsz);
|
|
||||||
|
|
||||||
ret = pread_in_full(fd, mem+paddr-GUEST_OFFSET, filesz, offset);
|
|
||||||
if (ret < 0)
|
|
||||||
goto out;
|
|
||||||
if (!klog)
|
|
||||||
klog = mem+paddr+0x1000-GUEST_OFFSET;
|
|
||||||
if (!mboot)
|
|
||||||
mboot = mem+paddr-GUEST_OFFSET;
|
|
||||||
//fprintf(stderr, "mboot at %p, klog at %p\n", mboot, klog);
|
|
||||||
|
|
||||||
if (!pstart) {
|
|
||||||
pstart = paddr;
|
|
||||||
|
|
||||||
// initialize kernel
|
|
||||||
*((uint64_t*) (mem+paddr-GUEST_OFFSET + 0x100)) = paddr; // physical start address
|
|
||||||
*((uint64_t*) (mem+paddr-GUEST_OFFSET + 0x108)) = guest_size - PAGE_SIZE; // physical limit
|
|
||||||
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x110)) = get_cpufreq();
|
|
||||||
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x128)) = ncores; // number of used cpus
|
|
||||||
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x130)) = 0; // cpuid
|
|
||||||
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x148)) = 1; // announce uhyve
|
|
||||||
|
|
||||||
|
|
||||||
char* str = getenv("HERMIT_IP");
|
|
||||||
if (str) {
|
|
||||||
uint32_t ip[4];
|
|
||||||
|
|
||||||
sscanf(str, "%u.%u.%u.%u", ip+0, ip+1, ip+2, ip+3);
|
|
||||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB0)) = (uint8_t) ip[0];
|
|
||||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB1)) = (uint8_t) ip[1];
|
|
||||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB2)) = (uint8_t) ip[2];
|
|
||||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB3)) = (uint8_t) ip[3];
|
|
||||||
}
|
|
||||||
|
|
||||||
str = getenv("HERMIT_GATEWAY");
|
|
||||||
if (str) {
|
|
||||||
uint32_t ip[4];
|
|
||||||
|
|
||||||
sscanf(str, "%u.%u.%u.%u", ip+0, ip+1, ip+2, ip+3);
|
|
||||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB4)) = (uint8_t) ip[0];
|
|
||||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB5)) = (uint8_t) ip[1];
|
|
||||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB6)) = (uint8_t) ip[2];
|
|
||||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB7)) = (uint8_t) ip[3];
|
|
||||||
}
|
|
||||||
str = getenv("HERMIT_MASK");
|
|
||||||
if (str) {
|
|
||||||
uint32_t ip[4];
|
|
||||||
|
|
||||||
sscanf(str, "%u.%u.%u.%u", ip+0, ip+1, ip+2, ip+3);
|
|
||||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB8)) = (uint8_t) ip[0];
|
|
||||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xB9)) = (uint8_t) ip[1];
|
|
||||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xBA)) = (uint8_t) ip[2];
|
|
||||||
*((uint8_t*) (mem+paddr-GUEST_OFFSET + 0xBB)) = (uint8_t) ip[3];
|
|
||||||
}
|
|
||||||
|
|
||||||
*((uint64_t*) (mem+paddr-GUEST_OFFSET + 0xbc)) = (uint64_t) guest_mem;
|
|
||||||
if (verbose)
|
|
||||||
*((uint32_t*) (mem+paddr-GUEST_OFFSET + 0x174)) = (uint32_t) UHYVE_UART_PORT;
|
|
||||||
}
|
|
||||||
*((uint64_t*) (mem+pstart-GUEST_OFFSET + 0x158)) = paddr + memsz - pstart; // total kernel size
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = 0;
|
|
||||||
|
|
||||||
out:
|
|
||||||
if (phdr)
|
|
||||||
free(phdr);
|
|
||||||
|
|
||||||
close(fd);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
#endif
|
|
|
@ -1,72 +0,0 @@
|
||||||
/*
|
|
||||||
* This file was adapted from the solo5/ukvm code base, initial copyright block
|
|
||||||
* follows:
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file
|
|
||||||
*
|
|
||||||
* This file is part of ukvm, a unikernel monitor.
|
|
||||||
*
|
|
||||||
* Permission to use, copy, modify, and/or distribute this software
|
|
||||||
* for any purpose with or without fee is hereby granted, provided
|
|
||||||
* that the above copyright notice and this permission notice appear
|
|
||||||
* in all copies.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
|
||||||
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
|
||||||
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
|
||||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
|
||||||
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
||||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Based on binutils-gdb/gdb/stubs/i386-stub.c, which is:
|
|
||||||
* Not copyrighted.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifdef __aarch64__
|
|
||||||
#define _GNU_SOURCE
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <sys/ioctl.h>
|
|
||||||
#include <sys/mman.h>
|
|
||||||
#include <err.h>
|
|
||||||
#include <inttypes.h>
|
|
||||||
|
|
||||||
#include <sys/socket.h>
|
|
||||||
#include <netinet/in.h>
|
|
||||||
#include <netinet/tcp.h>
|
|
||||||
#include <arpa/inet.h>
|
|
||||||
#include <netdb.h>
|
|
||||||
#include <assert.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <ctype.h>
|
|
||||||
#include <linux/kvm.h>
|
|
||||||
|
|
||||||
#include "uhyve.h"
|
|
||||||
#include "uhyve-gdb.h"
|
|
||||||
#include "queue.h"
|
|
||||||
|
|
||||||
void uhyve_gdb_handle_exception(int vcpufd, int sigval)
|
|
||||||
{
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
void uhyve_gdb_handle_term(void)
|
|
||||||
{
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
int uhyve_gdb_init(int vcpufd)
|
|
||||||
{
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,40 +0,0 @@
|
||||||
/*
|
|
||||||
* This file was adapted from the solo5/ukvm code base, initial copyright block
|
|
||||||
* follows:
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file
|
|
||||||
*
|
|
||||||
* This file is part of ukvm, a unikernel monitor.
|
|
||||||
*
|
|
||||||
* Permission to use, copy, modify, and/or distribute this software
|
|
||||||
* for any purpose with or without fee is hereby granted, provided
|
|
||||||
* that the above copyright notice and this permission notice appear
|
|
||||||
* in all copies.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
|
||||||
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
|
||||||
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
|
||||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
|
||||||
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
||||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef UHYVE_GDB_AARCH64_H
|
|
||||||
#define UHYVE_GDB_AARCH64_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <inttypes.h>
|
|
||||||
|
|
||||||
struct uhyve_gdb_regs {
|
|
||||||
uint64_t regs[31];
|
|
||||||
uint64_t lr;
|
|
||||||
uint64_t pc;
|
|
||||||
uint64_t pstate;
|
|
||||||
uint64_t sp;
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* UHYVE_GDB_AARCH64_H */
|
|
|
@ -1,993 +0,0 @@
|
||||||
/*
|
|
||||||
* This file was adapted from the solo5/ukvm code base, initial copyright block
|
|
||||||
* follows:
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file
|
|
||||||
*
|
|
||||||
* This file is part of ukvm, a unikernel monitor.
|
|
||||||
*
|
|
||||||
* Permission to use, copy, modify, and/or distribute this software
|
|
||||||
* for any purpose with or without fee is hereby granted, provided
|
|
||||||
* that the above copyright notice and this permission notice appear
|
|
||||||
* in all copies.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
|
||||||
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
|
||||||
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
|
||||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
|
||||||
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
||||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Based on binutils-gdb/gdb/stubs/i386-stub.c, which is:
|
|
||||||
* Not copyrighted.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifdef __x86_64__
|
|
||||||
#define _GNU_SOURCE
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <sys/ioctl.h>
|
|
||||||
#include <sys/mman.h>
|
|
||||||
#include <err.h>
|
|
||||||
#include <inttypes.h>
|
|
||||||
|
|
||||||
#include <sys/socket.h>
|
|
||||||
#include <netinet/in.h>
|
|
||||||
#include <netinet/tcp.h>
|
|
||||||
#include <arpa/inet.h>
|
|
||||||
#include <netdb.h>
|
|
||||||
#include <assert.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <ctype.h>
|
|
||||||
#include <linux/kvm.h>
|
|
||||||
|
|
||||||
#include "uhyve.h"
|
|
||||||
#include "uhyve-gdb.h"
|
|
||||||
#include "queue.h"
|
|
||||||
|
|
||||||
struct breakpoint_t {
|
|
||||||
gdb_breakpoint_type type;
|
|
||||||
uint64_t addr;
|
|
||||||
size_t len;
|
|
||||||
uint32_t refcount;
|
|
||||||
uint8_t saved_insn; /* for software breakpoints */
|
|
||||||
|
|
||||||
SLIST_ENTRY(breakpoint_t) entries;
|
|
||||||
};
|
|
||||||
|
|
||||||
SLIST_HEAD(breakpoints_head, breakpoint_t);
|
|
||||||
static struct breakpoints_head sw_breakpoints;
|
|
||||||
static struct breakpoints_head hw_breakpoints;
|
|
||||||
|
|
||||||
/* The Intel SDM specifies that the DR7 has space for 4 breakpoints. */
|
|
||||||
#define MAX_HW_BREAKPOINTS 4
|
|
||||||
static uint32_t nr_hw_breakpoints = 0;
|
|
||||||
|
|
||||||
/* Stepping is disabled by default. */
|
|
||||||
static bool stepping = false;
|
|
||||||
/* This is the trap instruction used for software breakpoints. */
|
|
||||||
static const uint8_t int3 = 0xcc;
|
|
||||||
|
|
||||||
static int socket_fd = 0;
|
|
||||||
static int portno = 1234; /* Default port number */
|
|
||||||
static const char hexchars[] = "0123456789abcdef";
|
|
||||||
|
|
||||||
#define BUFMAX 4096
|
|
||||||
static char in_buffer[BUFMAX];
|
|
||||||
static unsigned char registers[BUFMAX];
|
|
||||||
|
|
||||||
/* uhyve variables */
|
|
||||||
extern size_t guest_size;
|
|
||||||
extern uint8_t *guest_mem;
|
|
||||||
|
|
||||||
void *uhyve_checked_gpa_p(uint64_t gpa, size_t sz, uint8_t * chk_guest_mem,
|
|
||||||
size_t chk_guest_size, const char *file, int line);
|
|
||||||
|
|
||||||
/* The actual error code is ignored by GDB, so any number will do. */
|
|
||||||
#define GDB_ERROR_MSG "E01"
|
|
||||||
|
|
||||||
static int hex(unsigned char ch)
|
|
||||||
{
|
|
||||||
if ((ch >= 'a') && (ch <= 'f'))
|
|
||||||
return (ch - 'a' + 10);
|
|
||||||
if ((ch >= '0') && (ch <= '9'))
|
|
||||||
return (ch - '0');
|
|
||||||
if ((ch >= 'A') && (ch <= 'F'))
|
|
||||||
return (ch - 'A' + 10);
|
|
||||||
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Converts the (count) bytes of memory pointed to by mem into an hex string in
|
|
||||||
* buf. Returns a pointer to the last char put in buf (null).
|
|
||||||
*/
|
|
||||||
static char *mem2hex(const unsigned char *mem, char *buf, size_t count)
|
|
||||||
{
|
|
||||||
size_t i;
|
|
||||||
unsigned char ch;
|
|
||||||
|
|
||||||
for (i = 0; i < count; i++) {
|
|
||||||
ch = *mem++;
|
|
||||||
*buf++ = hexchars[ch >> 4];
|
|
||||||
*buf++ = hexchars[ch % 16];
|
|
||||||
}
|
|
||||||
*buf = 0;
|
|
||||||
return buf;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Converts the hex string in buf into binary in mem.
|
|
||||||
* Returns a pointer to the character AFTER the last byte written.
|
|
||||||
*/
|
|
||||||
static unsigned char *hex2mem(const char *buf, unsigned char *mem, size_t count)
|
|
||||||
{
|
|
||||||
size_t i;
|
|
||||||
unsigned char ch;
|
|
||||||
|
|
||||||
assert(strlen(buf) >= (2 * count));
|
|
||||||
|
|
||||||
for (i = 0; i < count; i++) {
|
|
||||||
ch = hex(*buf++) << 4;
|
|
||||||
ch = ch + hex(*buf++);
|
|
||||||
*mem++ = ch;
|
|
||||||
}
|
|
||||||
return mem;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int wait_for_connect(void)
|
|
||||||
{
|
|
||||||
int listen_socket_fd;
|
|
||||||
struct sockaddr_in server_addr, client_addr;
|
|
||||||
struct protoent *protoent;
|
|
||||||
struct in_addr ip_addr;
|
|
||||||
socklen_t len;
|
|
||||||
int opt;
|
|
||||||
|
|
||||||
listen_socket_fd = socket(AF_INET, SOCK_STREAM, 0);
|
|
||||||
if (listen_socket_fd == -1) {
|
|
||||||
err(1, "Could not create socket");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
opt = 1;
|
|
||||||
if (setsockopt(listen_socket_fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) == -1)
|
|
||||||
err(1, "setsockopt(SO_REUSEADDR) failed");
|
|
||||||
|
|
||||||
server_addr.sin_family = AF_INET;
|
|
||||||
server_addr.sin_addr.s_addr = htonl(INADDR_ANY);
|
|
||||||
server_addr.sin_port = htons(portno);
|
|
||||||
|
|
||||||
if (bind(listen_socket_fd, (struct sockaddr *)&server_addr,
|
|
||||||
sizeof(server_addr)) == -1) {
|
|
||||||
err(1, "bind failed");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (listen(listen_socket_fd, 0) == -1) {
|
|
||||||
err(1, "listen failed");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
warnx("Waiting for a debugger. Connect to it like this:");
|
|
||||||
warnx("\tgdb --ex=\"target remote localhost:%d\" UNIKERNEL", portno);
|
|
||||||
|
|
||||||
len = sizeof(client_addr);
|
|
||||||
socket_fd =
|
|
||||||
accept(listen_socket_fd, (struct sockaddr *)&client_addr, &len);
|
|
||||||
if (socket_fd == -1) {
|
|
||||||
err(1, "accept failed");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
close(listen_socket_fd);
|
|
||||||
|
|
||||||
protoent = getprotobyname("tcp");
|
|
||||||
if (!protoent) {
|
|
||||||
err(1, "getprotobyname (\"tcp\") failed");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
opt = 1;
|
|
||||||
if (setsockopt(socket_fd, protoent->p_proto, TCP_NODELAY, &opt,
|
|
||||||
sizeof(opt)) == -1)
|
|
||||||
err(1, "setsockopt(TCP_NODELAY) failed");
|
|
||||||
|
|
||||||
ip_addr.s_addr = client_addr.sin_addr.s_addr;
|
|
||||||
warnx("Connection from debugger at %s", inet_ntoa(ip_addr));
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int send_char(char ch)
|
|
||||||
{
|
|
||||||
/* TCP is already buffering, so no need to buffer here as well. */
|
|
||||||
return send(socket_fd, &ch, 1, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static char recv_char(void)
|
|
||||||
{
|
|
||||||
unsigned char ch;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ret = recv(socket_fd, &ch, 1, 0);
|
|
||||||
if (ret < 0) {
|
|
||||||
return -1;
|
|
||||||
} else if (ret == 0) {
|
|
||||||
/* The peer has performed an orderly shutdown (from "man recv"). */
|
|
||||||
warnx("GDB: Connection closed from client");
|
|
||||||
close(socket_fd);
|
|
||||||
socket_fd = -1;
|
|
||||||
return -1;
|
|
||||||
} else {
|
|
||||||
assert(ret == 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* All GDB remote packets are encoded in ASCII. */
|
|
||||||
assert(isascii(ch));
|
|
||||||
|
|
||||||
return (char)ch;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Scan for the sequence $<data>#<checksum>
|
|
||||||
* Returns a null terminated string.
|
|
||||||
*/
|
|
||||||
static char *recv_packet(void)
|
|
||||||
{
|
|
||||||
char *buffer = &in_buffer[0];
|
|
||||||
unsigned char checksum;
|
|
||||||
unsigned char xmitcsum;
|
|
||||||
char ch;
|
|
||||||
int count;
|
|
||||||
|
|
||||||
while (1) {
|
|
||||||
/* wait around for the start character, ignore all other characters */
|
|
||||||
do {
|
|
||||||
ch = recv_char();
|
|
||||||
if (ch == -1)
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
while (ch != '$');
|
|
||||||
|
|
||||||
retry:
|
|
||||||
checksum = 0;
|
|
||||||
xmitcsum = -1;
|
|
||||||
count = 0;
|
|
||||||
|
|
||||||
/* now, read until a # or end of buffer is found */
|
|
||||||
while (count < BUFMAX - 1) {
|
|
||||||
ch = recv_char();
|
|
||||||
if (ch == -1)
|
|
||||||
return NULL;
|
|
||||||
if (ch == '$')
|
|
||||||
goto retry;
|
|
||||||
if (ch == '#')
|
|
||||||
break;
|
|
||||||
checksum = checksum + ch;
|
|
||||||
buffer[count] = ch;
|
|
||||||
count = count + 1;
|
|
||||||
}
|
|
||||||
/* Let's make this a C string. */
|
|
||||||
buffer[count] = '\0';
|
|
||||||
|
|
||||||
if (ch == '#') {
|
|
||||||
ch = recv_char();
|
|
||||||
if (ch == -1)
|
|
||||||
return NULL;
|
|
||||||
xmitcsum = hex(ch) << 4;
|
|
||||||
ch = recv_char();
|
|
||||||
if (ch == -1)
|
|
||||||
return NULL;
|
|
||||||
xmitcsum += hex(ch);
|
|
||||||
|
|
||||||
if (checksum != xmitcsum) {
|
|
||||||
warnx("Failed checksum from GDB. "
|
|
||||||
"My count = 0x%x, sent=0x%x. buf=%s",
|
|
||||||
checksum, xmitcsum, buffer);
|
|
||||||
if (send_char('-') == -1)
|
|
||||||
/* Unsuccessful reply to a failed checksum */
|
|
||||||
err(1,
|
|
||||||
"GDB: Could not send an ACK to the debugger.");
|
|
||||||
} else {
|
|
||||||
if (send_char('+') == -1)
|
|
||||||
/* Unsuccessful reply to a successful transfer */
|
|
||||||
err(1,
|
|
||||||
"GDB: Could not send an ACK to the debugger.");
|
|
||||||
|
|
||||||
/* if a sequence char is present, reply the sequence ID */
|
|
||||||
if (buffer[2] == ':') {
|
|
||||||
send_char(buffer[0]);
|
|
||||||
send_char(buffer[1]);
|
|
||||||
|
|
||||||
return &buffer[3];
|
|
||||||
}
|
|
||||||
|
|
||||||
return &buffer[0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Send packet of the form $<packet info>#<checksum> without waiting for an ACK
|
|
||||||
* from the debugger. Only send_response
|
|
||||||
*/
|
|
||||||
static void send_packet_no_ack(char *buffer)
|
|
||||||
{
|
|
||||||
unsigned char checksum;
|
|
||||||
int count;
|
|
||||||
char ch;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We ignore all send_char errors as we either: (1) care about sending our
|
|
||||||
* packet and we will keep sending it until we get a good ACK from the
|
|
||||||
* debugger, or (2) not care and just send it as a best-effort notification
|
|
||||||
* when dying.
|
|
||||||
*/
|
|
||||||
|
|
||||||
send_char('$');
|
|
||||||
checksum = 0;
|
|
||||||
count = 0;
|
|
||||||
|
|
||||||
ch = buffer[count];
|
|
||||||
while (ch) {
|
|
||||||
send_char(ch);
|
|
||||||
checksum += ch;
|
|
||||||
count += 1;
|
|
||||||
ch = buffer[count];
|
|
||||||
}
|
|
||||||
|
|
||||||
send_char('#');
|
|
||||||
send_char(hexchars[checksum >> 4]);
|
|
||||||
send_char(hexchars[checksum % 16]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Send a packet and wait for a successful ACK of '+' from the debugger.
|
|
||||||
* An ACK of '-' means that we have to resend.
|
|
||||||
*/
|
|
||||||
static void send_packet(char *buffer)
|
|
||||||
{
|
|
||||||
char ch;
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
send_packet_no_ack(buffer);
|
|
||||||
ch = recv_char();
|
|
||||||
if (ch == -1)
|
|
||||||
return;
|
|
||||||
if (ch == '+')
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#define send_error_msg() do { send_packet(GDB_ERROR_MSG); } while (0)
|
|
||||||
|
|
||||||
#define send_not_supported_msg() do { send_packet(""); } while (0)
|
|
||||||
|
|
||||||
#define send_okay_msg() do { send_packet("OK"); } while (0)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is a response to 'c' and 's'. In other words, the VM was
|
|
||||||
* running and it stopped for some reason. This message is to tell the
|
|
||||||
* debugger that whe stopped (and why). The argument code can take these
|
|
||||||
* and some other values:
|
|
||||||
* - 'S AA' received signal AA
|
|
||||||
* - 'W AA' exited with return code AA
|
|
||||||
* - 'X AA' exited with signal AA
|
|
||||||
* https://sourceware.org/gdb/onlinedocs/gdb/Stop-Reply-Packets.html
|
|
||||||
*/
|
|
||||||
static void send_response(char code, int sigval, bool wait_for_ack)
|
|
||||||
{
|
|
||||||
char obuf[BUFMAX];
|
|
||||||
snprintf(obuf, sizeof(obuf), "%c%02x", code, sigval);
|
|
||||||
if (wait_for_ack)
|
|
||||||
send_packet(obuf);
|
|
||||||
else
|
|
||||||
send_packet_no_ack(obuf);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void gdb_handle_exception(int vcpufd, int sigval)
|
|
||||||
{
|
|
||||||
char *packet;
|
|
||||||
char obuf[BUFMAX];
|
|
||||||
|
|
||||||
/* Notify the debugger of our last signal */
|
|
||||||
send_response('S', sigval, true);
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
uint64_t addr = 0, result;
|
|
||||||
gdb_breakpoint_type type;
|
|
||||||
size_t len;
|
|
||||||
int command, ret;
|
|
||||||
|
|
||||||
packet = recv_packet();
|
|
||||||
if (packet == NULL)
|
|
||||||
/* Without a packet with instructions with what to do next there is
|
|
||||||
* really nothing we can do to recover. So, dying. */
|
|
||||||
errx(1,
|
|
||||||
"GDB: Exiting as we could not receive the next command from "
|
|
||||||
"the debugger.");
|
|
||||||
|
|
||||||
/*
|
|
||||||
* From the GDB manual:
|
|
||||||
* "At a minimum, a stub is required to support the ‘g’ and ‘G’
|
|
||||||
* commands for register access, and the ‘m’ and ‘M’ commands
|
|
||||||
* for memory access. Stubs that only control single-threaded
|
|
||||||
* targets can implement run control with the ‘c’ (continue),
|
|
||||||
* and ‘s’ (step) commands."
|
|
||||||
*/
|
|
||||||
command = packet[0];
|
|
||||||
switch (command) {
|
|
||||||
case 's':
|
|
||||||
{
|
|
||||||
/* Step */
|
|
||||||
if (sscanf(packet, "s%" PRIx64, &addr) == 1) {
|
|
||||||
/* not supported, but that's OK as GDB will retry with the
|
|
||||||
* slower version of this: update all registers. */
|
|
||||||
send_not_supported_msg();
|
|
||||||
break; /* Wait for another command. */
|
|
||||||
}
|
|
||||||
if (uhyve_gdb_enable_ss(vcpufd) == -1) {
|
|
||||||
send_error_msg();
|
|
||||||
break; /* Wait for another command. */
|
|
||||||
}
|
|
||||||
return; /* Continue with program */
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'c':
|
|
||||||
{
|
|
||||||
/* Continue (and disable stepping for the next instruction) */
|
|
||||||
if (sscanf(packet, "c%" PRIx64, &addr) == 1) {
|
|
||||||
/* not supported, but that's OK as GDB will retry with the
|
|
||||||
* slower version of this: update all registers. */
|
|
||||||
send_not_supported_msg();
|
|
||||||
break; /* Wait for another command. */
|
|
||||||
}
|
|
||||||
if (uhyve_gdb_disable_ss(vcpufd) == -1) {
|
|
||||||
send_error_msg();
|
|
||||||
break; /* Wait for another command. */
|
|
||||||
}
|
|
||||||
return; /* Continue with program */
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'm':
|
|
||||||
{
|
|
||||||
/* Read memory content */
|
|
||||||
if (sscanf(packet, "m%" PRIx64 ",%zx", &addr, &len) != 2) {
|
|
||||||
send_error_msg();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
/* translate addr into guest phys first. it is
|
|
||||||
* needed if the address falls into the non directly mapped
|
|
||||||
* part of the virtual address space (ex: heap/stack) */
|
|
||||||
uint64_t phys_addr;
|
|
||||||
|
|
||||||
if (uhyve_gdb_guest_virt_to_phys(vcpufd, addr, &phys_addr)) {
|
|
||||||
send_error_msg();
|
|
||||||
} else {
|
|
||||||
mem2hex(guest_mem + phys_addr, obuf, len);
|
|
||||||
send_packet(obuf);
|
|
||||||
}
|
|
||||||
break; /* Wait for another command. */
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'M':
|
|
||||||
{
|
|
||||||
/* Write memory content */
|
|
||||||
uint64_t phys_addr;
|
|
||||||
|
|
||||||
assert(strlen(packet) <= sizeof(obuf));
|
|
||||||
if (sscanf(packet, "M%" PRIx64 ",%zx:%s", &addr, &len, obuf) != 3) {
|
|
||||||
send_error_msg();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* translate to guest physical address first */
|
|
||||||
if (uhyve_gdb_guest_virt_to_phys(vcpufd, addr, &phys_addr)) {
|
|
||||||
send_error_msg();
|
|
||||||
} else {
|
|
||||||
hex2mem(obuf, guest_mem + phys_addr,
|
|
||||||
len);
|
|
||||||
send_okay_msg();
|
|
||||||
}
|
|
||||||
break; /* Wait for another command. */
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'g':
|
|
||||||
{
|
|
||||||
/* Read general registers */
|
|
||||||
len = BUFMAX;
|
|
||||||
if (uhyve_gdb_read_registers(vcpufd, registers, &len) == -1) {
|
|
||||||
send_error_msg();
|
|
||||||
} else {
|
|
||||||
mem2hex(registers, obuf, len);
|
|
||||||
send_packet(obuf);
|
|
||||||
}
|
|
||||||
break; /* Wait for another command. */
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'G':
|
|
||||||
{
|
|
||||||
/* Write general registers */
|
|
||||||
len = BUFMAX;
|
|
||||||
/* Call read_registers just to get len (not very efficient). */
|
|
||||||
if (uhyve_gdb_read_registers(vcpufd, registers, &len) == -1) {
|
|
||||||
send_error_msg();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
/* Packet looks like 'Gxxxxx', so we have to skip the first char */
|
|
||||||
hex2mem(packet + 1, registers, len);
|
|
||||||
if (uhyve_gdb_write_registers(vcpufd, registers, len) == -1) {
|
|
||||||
send_error_msg();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
send_okay_msg();
|
|
||||||
break; /* Wait for another command. */
|
|
||||||
}
|
|
||||||
|
|
||||||
case '?':
|
|
||||||
{
|
|
||||||
/* Return last signal */
|
|
||||||
send_response('S', sigval, true);
|
|
||||||
break; /* Wait for another command. */
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'Z':
|
|
||||||
/* Insert a breakpoint */
|
|
||||||
case 'z':
|
|
||||||
{
|
|
||||||
/* Remove a breakpoint */
|
|
||||||
packet++;
|
|
||||||
if (sscanf(packet, "%" PRIx32 ",%" PRIx64 ",%zx",
|
|
||||||
&type, &addr, &len) != 3) {
|
|
||||||
send_error_msg();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
uint64_t phys_addr;
|
|
||||||
if (uhyve_gdb_guest_virt_to_phys(vcpufd, addr, &phys_addr)) {
|
|
||||||
send_error_msg();
|
|
||||||
} else {
|
|
||||||
if (command == 'Z')
|
|
||||||
ret = uhyve_gdb_add_breakpoint(vcpufd, type, phys_addr, len);
|
|
||||||
else
|
|
||||||
ret = uhyve_gdb_remove_breakpoint(vcpufd, type, phys_addr, len);
|
|
||||||
|
|
||||||
if (ret == -1)
|
|
||||||
send_error_msg();
|
|
||||||
else
|
|
||||||
send_okay_msg();
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'k':
|
|
||||||
{
|
|
||||||
warnx("Debugger asked us to quit");
|
|
||||||
send_okay_msg();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'D':
|
|
||||||
{
|
|
||||||
warnx("Debugger detached");
|
|
||||||
send_okay_msg();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
send_not_supported_msg();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void uhyve_gdb_handle_exception(int vcpufd, int sigval)
|
|
||||||
{
|
|
||||||
gdb_handle_exception(vcpufd, sigval);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void gdb_stub_start(int vcpufd)
|
|
||||||
{
|
|
||||||
wait_for_connect();
|
|
||||||
gdb_handle_exception(vcpufd, GDB_SIGNAL_FIRST);
|
|
||||||
}
|
|
||||||
|
|
||||||
int uhyve_gdb_init(int vcpufd)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* GDB clients can change memory, and software breakpoints work by
|
|
||||||
* replacing instructions with int3's.
|
|
||||||
*/
|
|
||||||
if (mprotect(guest_mem, guest_size, PROT_READ | PROT_WRITE | PROT_EXEC) == -1)
|
|
||||||
err(1, "GDB: Cannot remove guest memory protection");
|
|
||||||
|
|
||||||
/* Notify the debugger that we are dying. */
|
|
||||||
atexit(uhyve_gdb_handle_term);
|
|
||||||
|
|
||||||
gdb_stub_start(vcpufd);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void uhyve_gdb_handle_term(void)
|
|
||||||
{
|
|
||||||
/* TODO: this is graceful shutdown forcing the return value to zero,
|
|
||||||
* any way to pass an error code when things go wrong ? */
|
|
||||||
send_response('W', 0, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int kvm_arch_insert_sw_breakpoint(struct breakpoint_t *bp)
|
|
||||||
{
|
|
||||||
uint8_t *insn = bp->addr + guest_mem;
|
|
||||||
bp->saved_insn = *insn;
|
|
||||||
/*
|
|
||||||
* We just modify the first byte even if the instruction is multi-byte.
|
|
||||||
* The debugger keeps track of the length of the instruction. The
|
|
||||||
* consequence of this is that we don't have to set all other bytes as
|
|
||||||
* NOP's.
|
|
||||||
*/
|
|
||||||
*insn = int3;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int kvm_arch_remove_sw_breakpoint(struct breakpoint_t *bp)
|
|
||||||
{
|
|
||||||
uint8_t *insn = bp->addr + guest_mem;
|
|
||||||
assert(*insn == int3);
|
|
||||||
*insn = bp->saved_insn;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int uhyve_gdb_update_guest_debug(int vcpufd)
|
|
||||||
{
|
|
||||||
struct kvm_guest_debug dbg = { 0 };
|
|
||||||
struct breakpoint_t *bp;
|
|
||||||
const uint8_t type_code[] = {
|
|
||||||
/* Break on instruction execution only. */
|
|
||||||
[GDB_BREAKPOINT_HW] = 0x0,
|
|
||||||
/* Break on data writes only. */
|
|
||||||
[GDB_WATCHPOINT_WRITE] = 0x1,
|
|
||||||
/* Break on data reads only. */
|
|
||||||
[GDB_WATCHPOINT_READ] = 0x2,
|
|
||||||
/* Break on data reads or writes but not instruction fetches. */
|
|
||||||
[GDB_WATCHPOINT_ACCESS] = 0x3
|
|
||||||
};
|
|
||||||
const uint8_t len_code[] = {
|
|
||||||
/*
|
|
||||||
* 00 — 1-byte length.
|
|
||||||
* 01 — 2-byte length.
|
|
||||||
* 10 — 8-byte length.
|
|
||||||
* 11 — 4-byte length.
|
|
||||||
*/
|
|
||||||
[1] = 0x0,[2] = 0x1,[4] = 0x3,[8] = 0x2
|
|
||||||
};
|
|
||||||
int n = 0;
|
|
||||||
|
|
||||||
if (stepping)
|
|
||||||
dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
|
|
||||||
|
|
||||||
if (!SLIST_EMPTY(&sw_breakpoints))
|
|
||||||
dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
|
|
||||||
|
|
||||||
if (!SLIST_EMPTY(&hw_breakpoints)) {
|
|
||||||
dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
|
|
||||||
|
|
||||||
/* Enable global breakpointing (across all threads) on the control
|
|
||||||
* debug register. */
|
|
||||||
dbg.arch.debugreg[7] = 1 << 9;
|
|
||||||
dbg.arch.debugreg[7] |= 1 << 10;
|
|
||||||
SLIST_FOREACH(bp, &hw_breakpoints, entries) {
|
|
||||||
assert(bp->type != GDB_BREAKPOINT_SW);
|
|
||||||
dbg.arch.debugreg[n] = bp->addr;
|
|
||||||
/* global breakpointing */
|
|
||||||
dbg.arch.debugreg[7] |= (2 << (n * 2));
|
|
||||||
/* read/write fields */
|
|
||||||
dbg.arch.debugreg[7] |=
|
|
||||||
(type_code[bp->type] << (16 + n * 4));
|
|
||||||
/* Length fields */
|
|
||||||
dbg.arch.debugreg[7] |=
|
|
||||||
((uint32_t) len_code[bp->len] << (18 + n * 4));
|
|
||||||
n++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
kvm_ioctl(vcpufd, KVM_SET_GUEST_DEBUG, &dbg);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct breakpoint_t *bp_list_find(gdb_breakpoint_type type,
|
|
||||||
uint64_t addr, size_t len)
|
|
||||||
{
|
|
||||||
struct breakpoint_t *bp;
|
|
||||||
|
|
||||||
switch (type) {
|
|
||||||
case GDB_BREAKPOINT_SW:
|
|
||||||
SLIST_FOREACH(bp, &sw_breakpoints, entries) {
|
|
||||||
if (bp->addr == addr && bp->len == len)
|
|
||||||
return bp;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case GDB_BREAKPOINT_HW:
|
|
||||||
case GDB_WATCHPOINT_WRITE:
|
|
||||||
case GDB_WATCHPOINT_READ:
|
|
||||||
case GDB_WATCHPOINT_ACCESS:
|
|
||||||
/* We only support hardware watchpoints. */
|
|
||||||
SLIST_FOREACH(bp, &hw_breakpoints, entries) {
|
|
||||||
if (bp->addr == addr && bp->len == len)
|
|
||||||
return bp;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Adds a new breakpoint to the list of breakpoints. Returns the found or
|
|
||||||
* created breakpoint. Returns NULL in case of failure or if we reached the max
|
|
||||||
* number of allowed hardware breakpoints (4).
|
|
||||||
*/
|
|
||||||
static struct breakpoint_t *bp_list_insert(gdb_breakpoint_type type,
|
|
||||||
uint64_t addr, size_t len)
|
|
||||||
{
|
|
||||||
struct breakpoint_t *bp;
|
|
||||||
|
|
||||||
bp = bp_list_find(type, addr, len);
|
|
||||||
if (bp) {
|
|
||||||
bp->refcount++;
|
|
||||||
return bp;
|
|
||||||
}
|
|
||||||
|
|
||||||
bp = malloc(sizeof(struct breakpoint_t));
|
|
||||||
if (bp == NULL)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
bp->addr = addr;
|
|
||||||
bp->type = type;
|
|
||||||
bp->len = len;
|
|
||||||
bp->refcount = 1;
|
|
||||||
|
|
||||||
switch (type) {
|
|
||||||
case GDB_BREAKPOINT_SW:
|
|
||||||
SLIST_INSERT_HEAD(&sw_breakpoints, bp, entries);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case GDB_BREAKPOINT_HW:
|
|
||||||
case GDB_WATCHPOINT_WRITE:
|
|
||||||
case GDB_WATCHPOINT_READ:
|
|
||||||
case GDB_WATCHPOINT_ACCESS:
|
|
||||||
/* We only support hardware watchpoints. */
|
|
||||||
if (nr_hw_breakpoints == MAX_HW_BREAKPOINTS)
|
|
||||||
return NULL;
|
|
||||||
nr_hw_breakpoints++;
|
|
||||||
SLIST_INSERT_HEAD(&hw_breakpoints, bp, entries);
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return bp;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Removes a breakpoint from the list of breakpoints.
|
|
||||||
* Returns -1 if the breakpoint is not in the list.
|
|
||||||
*/
|
|
||||||
static int bp_list_remove(gdb_breakpoint_type type, uint64_t addr, size_t len)
|
|
||||||
{
|
|
||||||
struct breakpoint_t *bp = NULL;
|
|
||||||
|
|
||||||
bp = bp_list_find(type, addr, len);
|
|
||||||
if (!bp)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
bp->refcount--;
|
|
||||||
if (bp->refcount > 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
switch (type) {
|
|
||||||
case GDB_BREAKPOINT_SW:
|
|
||||||
SLIST_REMOVE(&sw_breakpoints, bp, breakpoint_t, entries);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case GDB_BREAKPOINT_HW:
|
|
||||||
case GDB_WATCHPOINT_WRITE:
|
|
||||||
case GDB_WATCHPOINT_READ:
|
|
||||||
case GDB_WATCHPOINT_ACCESS:
|
|
||||||
/* We only support hardware watchpoints. */
|
|
||||||
SLIST_REMOVE(&hw_breakpoints, bp, breakpoint_t, entries);
|
|
||||||
nr_hw_breakpoints--;
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
free(bp);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int uhyve_gdb_read_registers(int vcpufd, uint8_t * registers, size_t * len)
|
|
||||||
{
|
|
||||||
struct kvm_regs kregs;
|
|
||||||
struct kvm_sregs sregs;
|
|
||||||
struct uhyve_gdb_regs *gregs = (struct uhyve_gdb_regs *)registers;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
kvm_ioctl(vcpufd, KVM_GET_REGS, &kregs);
|
|
||||||
kvm_ioctl(vcpufd, KVM_GET_SREGS, &sregs);
|
|
||||||
|
|
||||||
if (*len < sizeof(struct uhyve_gdb_regs))
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
*len = sizeof(struct uhyve_gdb_regs);
|
|
||||||
|
|
||||||
gregs->rax = kregs.rax;
|
|
||||||
gregs->rbx = kregs.rbx;
|
|
||||||
gregs->rcx = kregs.rcx;
|
|
||||||
gregs->rdx = kregs.rdx;
|
|
||||||
|
|
||||||
gregs->rsi = kregs.rsi;
|
|
||||||
gregs->rdi = kregs.rdi;
|
|
||||||
gregs->rbp = kregs.rbp;
|
|
||||||
gregs->rsp = kregs.rsp;
|
|
||||||
|
|
||||||
gregs->r8 = kregs.r8;
|
|
||||||
gregs->r9 = kregs.r9;
|
|
||||||
gregs->r10 = kregs.r10;
|
|
||||||
gregs->r11 = kregs.r11;
|
|
||||||
|
|
||||||
gregs->rip = kregs.rip;
|
|
||||||
gregs->eflags = kregs.rflags;
|
|
||||||
|
|
||||||
gregs->cs = sregs.cs.selector;
|
|
||||||
gregs->ss = sregs.ss.selector;
|
|
||||||
gregs->ds = sregs.ds.selector;
|
|
||||||
gregs->es = sregs.es.selector;
|
|
||||||
gregs->fs = sregs.fs.selector;
|
|
||||||
gregs->gs = sregs.gs.selector;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int uhyve_gdb_write_registers(int vcpufd, uint8_t * registers, size_t len)
|
|
||||||
{
|
|
||||||
struct kvm_regs kregs;
|
|
||||||
struct kvm_sregs sregs;
|
|
||||||
struct uhyve_gdb_regs *gregs = (struct uhyve_gdb_regs *)registers;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
/* Let's read all registers just in case we miss filling one of them. */
|
|
||||||
kvm_ioctl(vcpufd, KVM_GET_REGS, &kregs);
|
|
||||||
kvm_ioctl(vcpufd, KVM_GET_SREGS, &sregs);
|
|
||||||
|
|
||||||
if (len < sizeof(struct uhyve_gdb_regs))
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
kregs.rax = gregs->rax;
|
|
||||||
kregs.rbx = gregs->rbx;
|
|
||||||
kregs.rcx = gregs->rcx;
|
|
||||||
kregs.rdx = gregs->rdx;
|
|
||||||
|
|
||||||
kregs.rsi = gregs->rsi;
|
|
||||||
kregs.rdi = gregs->rdi;
|
|
||||||
kregs.rbp = gregs->rbp;
|
|
||||||
kregs.rsp = gregs->rsp;
|
|
||||||
|
|
||||||
kregs.r8 = gregs->r8;
|
|
||||||
kregs.r9 = gregs->r9;
|
|
||||||
kregs.r10 = gregs->r10;
|
|
||||||
kregs.r11 = gregs->r11;
|
|
||||||
|
|
||||||
kregs.rip = gregs->rip;
|
|
||||||
kregs.rflags = gregs->eflags;
|
|
||||||
|
|
||||||
/* XXX: not sure if just setting .selector is enough. */
|
|
||||||
sregs.cs.selector = gregs->cs;
|
|
||||||
sregs.ss.selector = gregs->ss;
|
|
||||||
sregs.ds.selector = gregs->ds;
|
|
||||||
sregs.es.selector = gregs->es;
|
|
||||||
sregs.fs.selector = gregs->fs;
|
|
||||||
sregs.gs.selector = gregs->gs;
|
|
||||||
|
|
||||||
kvm_ioctl(vcpufd, KVM_SET_REGS, &kregs);
|
|
||||||
kvm_ioctl(vcpufd, KVM_SET_SREGS, &sregs);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int uhyve_gdb_add_breakpoint(int vcpufd, gdb_breakpoint_type type,
|
|
||||||
uint64_t addr, size_t len)
|
|
||||||
{
|
|
||||||
struct breakpoint_t *bp;
|
|
||||||
|
|
||||||
assert(type < GDB_BREAKPOINT_MAX);
|
|
||||||
|
|
||||||
if (bp_list_find(type, addr, len))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
bp = bp_list_insert(type, addr, len);
|
|
||||||
if (bp == NULL)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
if (type == GDB_BREAKPOINT_SW)
|
|
||||||
kvm_arch_insert_sw_breakpoint(bp);
|
|
||||||
|
|
||||||
if (uhyve_gdb_update_guest_debug(vcpufd) == -1)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int uhyve_gdb_remove_breakpoint(int vcpufd, gdb_breakpoint_type type,
|
|
||||||
uint64_t addr, size_t len)
|
|
||||||
{
|
|
||||||
struct breakpoint_t *bp;
|
|
||||||
|
|
||||||
assert(type < GDB_BREAKPOINT_MAX);
|
|
||||||
|
|
||||||
if (type == GDB_BREAKPOINT_SW) {
|
|
||||||
bp = bp_list_find(type, addr, len);
|
|
||||||
if (bp)
|
|
||||||
kvm_arch_remove_sw_breakpoint(bp);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bp_list_remove(type, addr, len) == -1)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
if (uhyve_gdb_update_guest_debug(vcpufd) == -1)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int uhyve_gdb_enable_ss(int vcpufd)
|
|
||||||
{
|
|
||||||
stepping = true;
|
|
||||||
|
|
||||||
if (uhyve_gdb_update_guest_debug(vcpufd) == -1)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int uhyve_gdb_disable_ss(int vcpufd)
|
|
||||||
{
|
|
||||||
stepping = false;
|
|
||||||
|
|
||||||
if (uhyve_gdb_update_guest_debug(vcpufd) == -1)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Convert a guest virtual address into the correspondign physical address */
|
|
||||||
int uhyve_gdb_guest_virt_to_phys(int vcpufd, const uint64_t virt, uint64_t * phys)
|
|
||||||
{
|
|
||||||
struct kvm_translation kt;
|
|
||||||
|
|
||||||
kt.linear_address = virt;
|
|
||||||
kvm_ioctl(vcpufd, KVM_TRANSLATE, &kt);
|
|
||||||
|
|
||||||
*phys = kt.physical_address;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,77 +0,0 @@
|
||||||
/*
|
|
||||||
* This file was adapted from the solo5/ukvm code base, initial copyright block
|
|
||||||
* follows:
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file
|
|
||||||
*
|
|
||||||
* This file is part of ukvm, a unikernel monitor.
|
|
||||||
*
|
|
||||||
* Permission to use, copy, modify, and/or distribute this software
|
|
||||||
* for any purpose with or without fee is hereby granted, provided
|
|
||||||
* that the above copyright notice and this permission notice appear
|
|
||||||
* in all copies.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
|
||||||
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
|
||||||
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
|
||||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
|
||||||
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
||||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef UHYVE_GDB_X86_64_H
|
|
||||||
#define UHYVE_GDB_X86_64_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <inttypes.h>
|
|
||||||
|
|
||||||
/*
|
|
||||||
* X86_64
|
|
||||||
* XXX: Can't find any gdb include file with the list of registers per
|
|
||||||
* architecture (something like ia64_regs.h). The closest I can get is a
|
|
||||||
* list of the registers from gdb (debugging an ordinary x86_64 binary):
|
|
||||||
*
|
|
||||||
* (gdb) info registers
|
|
||||||
* rax 0x0 0
|
|
||||||
* rbx 0x0 0
|
|
||||||
* rcx 0x0 0
|
|
||||||
* ...
|
|
||||||
* fs 0x0 0
|
|
||||||
* gs 0x0 0
|
|
||||||
* (gdb)
|
|
||||||
*/
|
|
||||||
|
|
||||||
struct uhyve_gdb_regs {
|
|
||||||
uint64_t rax;
|
|
||||||
uint64_t rbx;
|
|
||||||
uint64_t rcx;
|
|
||||||
uint64_t rdx;
|
|
||||||
uint64_t rsi;
|
|
||||||
uint64_t rdi;
|
|
||||||
uint64_t rbp;
|
|
||||||
uint64_t rsp;
|
|
||||||
uint64_t r8;
|
|
||||||
uint64_t r9;
|
|
||||||
uint64_t r10;
|
|
||||||
uint64_t r11;
|
|
||||||
uint64_t r12;
|
|
||||||
uint64_t r13;
|
|
||||||
uint64_t r14;
|
|
||||||
uint64_t r15;
|
|
||||||
uint64_t rip;
|
|
||||||
|
|
||||||
uint32_t eflags;
|
|
||||||
uint32_t cs;
|
|
||||||
uint32_t ss;
|
|
||||||
uint32_t ds;
|
|
||||||
uint32_t es;
|
|
||||||
uint32_t fs;
|
|
||||||
uint32_t gs;
|
|
||||||
uint8_t st[8][10];
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* UHYVE_GDB_X86_64_H */
|
|
|
@ -1,76 +0,0 @@
|
||||||
/*
|
|
||||||
* This file was adapted from the solo5/ukvm code base, initial copyright block
|
|
||||||
* follows:
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2015-2017 Contributors as noted in the AUTHORS file
|
|
||||||
*
|
|
||||||
* This file is part of ukvm, a unikernel monitor.
|
|
||||||
*
|
|
||||||
* Permission to use, copy, modify, and/or distribute this software
|
|
||||||
* for any purpose with or without fee is hereby granted, provided
|
|
||||||
* that the above copyright notice and this permission notice appear
|
|
||||||
* in all copies.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
|
||||||
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
|
||||||
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
|
||||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
|
||||||
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
||||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef UHYVE_GDB_H
|
|
||||||
#define UHYVE_GDB_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <inttypes.h>
|
|
||||||
|
|
||||||
/* GDB breakpoint/watchpoint types */
|
|
||||||
typedef enum _gdb_breakpoint_type {
|
|
||||||
/* Do not change these. The values have to match on the GDB client
|
|
||||||
* side. */
|
|
||||||
GDB_BREAKPOINT_SW = 0,
|
|
||||||
GDB_BREAKPOINT_HW,
|
|
||||||
GDB_WATCHPOINT_WRITE,
|
|
||||||
GDB_WATCHPOINT_READ,
|
|
||||||
GDB_WATCHPOINT_ACCESS,
|
|
||||||
GDB_BREAKPOINT_MAX
|
|
||||||
} gdb_breakpoint_type;
|
|
||||||
|
|
||||||
#define GDB_SIGNAL_FIRST 0
|
|
||||||
#define GDB_SIGNAL_QUIT 3
|
|
||||||
#define GDB_SIGNAL_KILL 9
|
|
||||||
#define GDB_SIGNAL_TRAP 5
|
|
||||||
#define GDB_SIGNAL_SEGV 11
|
|
||||||
#define GDB_SIGNAL_TERM 15
|
|
||||||
#define GDB_SIGNAL_IO 23
|
|
||||||
#define GDB_SIGNAL_DEFAULT 144
|
|
||||||
|
|
||||||
/* prototypes */
|
|
||||||
int uhyve_gdb_enable_ss(int vcpufd);
|
|
||||||
int uhyve_gdb_disable_ss(int vcpufd);
|
|
||||||
int uhyve_gdb_read_registers(int vcpufd, uint8_t *reg, size_t *len);
|
|
||||||
int uhyve_gdb_write_registers(int vcpufd, uint8_t *reg, size_t len);
|
|
||||||
int uhyve_gdb_add_breakpoint(int vcpufd, gdb_breakpoint_type type,
|
|
||||||
uint64_t addr, size_t len);
|
|
||||||
int uhyve_gdb_remove_breakpoint(int vcpufd, gdb_breakpoint_type type,
|
|
||||||
uint64_t addr, size_t len);
|
|
||||||
int uhyve_gdb_guest_virt_to_phys(int vcpufd, const uint64_t virt,
|
|
||||||
uint64_t *phys);
|
|
||||||
|
|
||||||
/* interface with uhyve.c */
|
|
||||||
void uhyve_gdb_handle_exception(int vcpufd, int sigval);
|
|
||||||
void uhyve_gdb_handle_term(void);
|
|
||||||
int uhyve_gdb_init(int vcpufd);
|
|
||||||
|
|
||||||
#ifdef __x86_64__
|
|
||||||
#include "uhyve-gdb-x86_64.h"
|
|
||||||
#else
|
|
||||||
#include "uhyve-gdb-aarch64.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* UHYVE_GDB_H */
|
|
|
@ -1,873 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2018, Simon Pickartz, RWTH Aachen University
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of the University nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this
|
|
||||||
* software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
||||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
||||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
||||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define _GNU_SOURCE
|
|
||||||
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <arpa/inet.h>
|
|
||||||
#include <infiniband/verbs.h>
|
|
||||||
#include <errno.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
|
|
||||||
|
|
||||||
#include "uhyve-migration.h"
|
|
||||||
#include "uhyve.h"
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef __RDMA_MIGRATION__
|
|
||||||
#define IB_USE_ODP (0)
|
|
||||||
|
|
||||||
#define IB_CQ_ENTRIES (1)
|
|
||||||
#define IB_MAX_INLINE_DATA (0)
|
|
||||||
#define IB_MAX_DEST_RD_ATOMIC (1)
|
|
||||||
#define IB_MIN_RNR_TIMER (1)
|
|
||||||
#define IB_MAX_SEND_WR (8192) // TODO: should be
|
|
||||||
// com_hndl.dev_attr_ex.orig_attr.max_qp_wr
|
|
||||||
// fix for mlx_5 adapter
|
|
||||||
#define IB_MAX_RECV_WR (1)
|
|
||||||
#define IB_MAX_SEND_SGE (1)
|
|
||||||
#define IB_MAX_RECV_SGE (1)
|
|
||||||
|
|
||||||
typedef enum ib_wr_ids {
|
|
||||||
IB_WR_NO_ID = 0,
|
|
||||||
IB_WR_WRITE_LAST_PAGE_ID,
|
|
||||||
IB_WR_RECV_LAST_PAGE_ID,
|
|
||||||
IB_WR_BASE_ID
|
|
||||||
} ib_wr_ids_t;
|
|
||||||
|
|
||||||
uint64_t cur_wr_id = IB_WR_BASE_ID;
|
|
||||||
|
|
||||||
typedef struct qp_info {
|
|
||||||
uint32_t qpn;
|
|
||||||
uint16_t lid;
|
|
||||||
uint16_t psn;
|
|
||||||
uint32_t *keys;
|
|
||||||
uint64_t addr;
|
|
||||||
} qp_info_t;
|
|
||||||
|
|
||||||
typedef struct com_hndl {
|
|
||||||
struct ibv_context *ctx; /* device context */
|
|
||||||
struct ibv_device_attr_ex dev_attr_ex; /* extended device attributes */
|
|
||||||
struct ibv_port_attr port_attr; /* port attributes */
|
|
||||||
struct ibv_pd *pd; /* protection domain */
|
|
||||||
struct ibv_mr **mrs; /* memory regions */
|
|
||||||
struct ibv_cq *cq; /* completion queue */
|
|
||||||
struct ibv_qp *qp; /* queue pair */
|
|
||||||
struct ibv_comp_channel *comp_chan; /* comp. event channel */
|
|
||||||
qp_info_t loc_qp_info;
|
|
||||||
qp_info_t rem_qp_info;
|
|
||||||
uint8_t used_port; /* port of the IB device */
|
|
||||||
uint8_t *buf; /* the guest memory (with potential gaps!) */
|
|
||||||
size_t mr_cnt; /* number of memory regions */
|
|
||||||
} com_hndl_t;
|
|
||||||
|
|
||||||
|
|
||||||
static com_hndl_t com_hndl;
|
|
||||||
static struct ibv_send_wr *send_list = NULL;
|
|
||||||
static struct ibv_send_wr *send_list_last = NULL;
|
|
||||||
static size_t send_list_length = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Prints info of a send_wr
|
|
||||||
*
|
|
||||||
* \param id the ID of the send_wr
|
|
||||||
*/
|
|
||||||
static inline
|
|
||||||
void print_send_wr_info(uint64_t id)
|
|
||||||
{
|
|
||||||
struct ibv_send_wr *search_wr = send_list;
|
|
||||||
|
|
||||||
/* find send_wr with id */
|
|
||||||
while(search_wr) {
|
|
||||||
if (search_wr->wr_id == id) {
|
|
||||||
fprintf(stderr, "[INFO] WR_ID: %llu; LADDR: 0x%llx; RADDR: 0x%llx; SIZE: %llu\n",
|
|
||||||
search_wr->wr_id,
|
|
||||||
search_wr->sg_list->addr,
|
|
||||||
search_wr->wr.rdma.remote_addr,
|
|
||||||
search_wr->sg_list->length);
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
search_wr = search_wr->next;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (search_wr == NULL) {
|
|
||||||
fprintf(stderr, "[ERROR] Could not find send_wr with ID %llu\n", id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Initializes the IB communication structures
|
|
||||||
*
|
|
||||||
* \param com_hndl the structure containing all communication relevant infos
|
|
||||||
* \param buf the buffer that should be registrered with the QP
|
|
||||||
*
|
|
||||||
* This function sets up the IB communication channel. It registers the 'buf'
|
|
||||||
* with a new protection domain. On its termination there is a QP in the INIT
|
|
||||||
* state ready to be connected with the remote side.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
init_com_hndl(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
|
|
||||||
{
|
|
||||||
/* initialize com_hndl */
|
|
||||||
memset(&com_hndl, 0, sizeof(com_hndl));
|
|
||||||
|
|
||||||
/* the guest physical memory is the communication buffer */
|
|
||||||
com_hndl.buf = guest_mem;
|
|
||||||
com_hndl.mr_cnt = mem_chunk_cnt;
|
|
||||||
|
|
||||||
struct ibv_device **device_list = NULL;
|
|
||||||
int num_devices = 0;
|
|
||||||
bool active_port_found = false;
|
|
||||||
|
|
||||||
/* determine first available device */
|
|
||||||
if ((device_list = ibv_get_device_list(&num_devices)) == NULL) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not determine available IB devices "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* find device with active port */
|
|
||||||
size_t cur_dev = 0;
|
|
||||||
for (cur_dev=0; cur_dev<num_devices; ++cur_dev){
|
|
||||||
/* open the device context */
|
|
||||||
if ((com_hndl.ctx = ibv_open_device(device_list[cur_dev])) == NULL) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not open the device context "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* query extended device capabilities (e.g., to check for ODP support */
|
|
||||||
struct ibv_query_device_ex_input device_ex_input;
|
|
||||||
if (ibv_query_device_ex(com_hndl.ctx, &device_ex_input, &com_hndl.dev_attr_ex) < 0) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not query extended device attributes "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* determine port count via normal device query (necessary for mlx_5) */
|
|
||||||
if (ibv_query_device(com_hndl.ctx, &com_hndl.dev_attr_ex.orig_attr) < 0) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not query normal device attributes "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* check all ports */
|
|
||||||
size_t num_ports = com_hndl.dev_attr_ex.orig_attr.phys_port_cnt;
|
|
||||||
for (size_t cur_port=0; cur_port<=num_ports; ++cur_port) {
|
|
||||||
/* query current port */
|
|
||||||
if (ibv_query_port(com_hndl.ctx, cur_port, &com_hndl.port_attr) < 0){
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not query port %u "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
cur_port,
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (com_hndl.port_attr.state == IBV_PORT_ACTIVE) {
|
|
||||||
active_port_found = 1;
|
|
||||||
com_hndl.used_port = cur_port;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* close this device if no active port was found */
|
|
||||||
if (!active_port_found) {
|
|
||||||
if (ibv_close_device(com_hndl.ctx) < 0) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not close the device context "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!active_port_found) {
|
|
||||||
fprintf(stderr, "[ERROR] No active port found. Abort!\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(stderr, "[INFO] Using device '%s' and port %u\n",
|
|
||||||
ibv_get_device_name(device_list[cur_dev]),
|
|
||||||
com_hndl.used_port);
|
|
||||||
/* allocate protection domain */
|
|
||||||
if ((com_hndl.pd = ibv_alloc_pd(com_hndl.ctx)) == NULL) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not allocate protection domain "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* register guest memory chunks with the protection domain */
|
|
||||||
int i = 0;
|
|
||||||
com_hndl.mrs = (struct ibv_mr**)malloc(sizeof(struct ibv_mr*)*com_hndl.mr_cnt);
|
|
||||||
|
|
||||||
int access_flags = (IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE);
|
|
||||||
if ((IB_USE_ODP) &&
|
|
||||||
(com_hndl.dev_attr_ex.odp_caps.general_caps & IBV_ODP_SUPPORT) &&
|
|
||||||
(com_hndl.dev_attr_ex.odp_caps.per_transport_caps.rc_odp_caps & IBV_ODP_SUPPORT_WRITE)) {
|
|
||||||
access_flags |= IBV_ACCESS_ON_DEMAND;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i=0; i<com_hndl.mr_cnt; ++i) {
|
|
||||||
if ((com_hndl.mrs[i] = ibv_reg_mr(com_hndl.pd,
|
|
||||||
mem_chunks[i].ptr,
|
|
||||||
mem_chunks[i].size,
|
|
||||||
access_flags)) == NULL) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not register the memory region #%d (ptr: %llx; size: %llu) "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
i,
|
|
||||||
mem_chunks[i].ptr,
|
|
||||||
mem_chunks[i].size,
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
fprintf(stderr, "[INFO] com_hndl.mrs[%d]->addr = 0x%llx; com_hndl->mrs[%d].length = %llu\n",
|
|
||||||
i,
|
|
||||||
com_hndl.mrs[i]->addr,
|
|
||||||
i,
|
|
||||||
com_hndl.mrs[i]->length);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* create completion event channel */
|
|
||||||
if ((com_hndl.comp_chan =
|
|
||||||
ibv_create_comp_channel(com_hndl.ctx)) == NULL) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not create the completion channel "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* create the completion queue */
|
|
||||||
if ((com_hndl.cq = ibv_create_cq(com_hndl.ctx,
|
|
||||||
IB_CQ_ENTRIES,
|
|
||||||
NULL,
|
|
||||||
com_hndl.comp_chan,
|
|
||||||
0)) == NULL) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not create the completion queue "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* create send and recv queue pair and initialize it */
|
|
||||||
struct ibv_qp_init_attr init_attr = {
|
|
||||||
.send_cq = com_hndl.cq,
|
|
||||||
.recv_cq = com_hndl.cq,
|
|
||||||
.cap = {
|
|
||||||
.max_send_wr = IB_MAX_SEND_WR,
|
|
||||||
.max_recv_wr = IB_MAX_RECV_WR,
|
|
||||||
.max_send_sge = IB_MAX_SEND_SGE,
|
|
||||||
.max_recv_sge = IB_MAX_RECV_SGE,
|
|
||||||
.max_inline_data = IB_MAX_INLINE_DATA
|
|
||||||
},
|
|
||||||
.qp_type = IBV_QPT_RC,
|
|
||||||
.sq_sig_all = 0 /* we do not want a CQE for each WR */
|
|
||||||
};
|
|
||||||
if ((com_hndl.qp = ibv_create_qp(com_hndl.pd, &init_attr)) == NULL) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not create the queue pair "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct ibv_qp_attr attr = {
|
|
||||||
.qp_state = IBV_QPS_INIT,
|
|
||||||
.pkey_index = 0,
|
|
||||||
.port_num = com_hndl.used_port,
|
|
||||||
.qp_access_flags = (IBV_ACCESS_REMOTE_WRITE)
|
|
||||||
};
|
|
||||||
if (ibv_modify_qp(com_hndl.qp,
|
|
||||||
&attr,
|
|
||||||
IBV_QP_STATE |
|
|
||||||
IBV_QP_PKEY_INDEX |
|
|
||||||
IBV_QP_PORT |
|
|
||||||
IBV_QP_ACCESS_FLAGS) < 0) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not set QP into init state "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* fill in local qp_info */
|
|
||||||
com_hndl.loc_qp_info.qpn = com_hndl.qp->qp_num;
|
|
||||||
com_hndl.loc_qp_info.psn = lrand48() & 0xffffff;
|
|
||||||
com_hndl.loc_qp_info.addr = (uint64_t)com_hndl.buf;
|
|
||||||
com_hndl.loc_qp_info.lid = com_hndl.port_attr.lid;
|
|
||||||
|
|
||||||
com_hndl.loc_qp_info.keys = (uint32_t*)malloc(sizeof(uint32_t)*com_hndl.mr_cnt);
|
|
||||||
for (i=0; i<com_hndl.mr_cnt; ++i) {
|
|
||||||
com_hndl.loc_qp_info.keys[i] = com_hndl.mrs[i]->rkey;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Frees IB related resources
|
|
||||||
*
|
|
||||||
* \param com_hndl the structure containing all communication relevant infos
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
destroy_com_hndl(void)
|
|
||||||
{
|
|
||||||
if (ibv_destroy_qp(com_hndl.qp) < 0) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not destroy the queue pair "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ibv_destroy_cq(com_hndl.cq) < 0) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not deallocate the protection domain "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ibv_destroy_comp_channel(com_hndl.comp_chan) < 0) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not destroy the completion channel "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
int i = 0;
|
|
||||||
for (i=0; i<com_hndl.mr_cnt; ++i) {
|
|
||||||
if (ibv_dereg_mr(com_hndl.mrs[i]) < 0) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not deregister MR #%d "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
i,
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (ibv_dealloc_pd(com_hndl.pd) < 0) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not deallocate the protection domain "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ibv_close_device(com_hndl.ctx) < 0) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not close the device context "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* free dynamic data structures */
|
|
||||||
free(com_hndl.loc_qp_info.keys);
|
|
||||||
free(com_hndl.rem_qp_info.keys);
|
|
||||||
free(com_hndl.mrs);
|
|
||||||
|
|
||||||
com_hndl.loc_qp_info.keys = NULL;
|
|
||||||
com_hndl.rem_qp_info.keys = NULL;
|
|
||||||
com_hndl.mrs = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Connects the QP created within init_com_hndl
|
|
||||||
*
|
|
||||||
* \param com_hndl the structure containing all communication relevant infos
|
|
||||||
*
|
|
||||||
* This function performs the actual connection setup between the two QPs.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
con_com_buf(void) {
|
|
||||||
/* transistion to ready-to-receive state */
|
|
||||||
struct ibv_qp_attr qp_attr = {
|
|
||||||
.qp_state = IBV_QPS_RTR,
|
|
||||||
.path_mtu = IBV_MTU_2048,
|
|
||||||
.dest_qp_num = com_hndl.rem_qp_info.qpn,
|
|
||||||
.rq_psn = com_hndl.rem_qp_info.psn,
|
|
||||||
.max_dest_rd_atomic = IB_MAX_DEST_RD_ATOMIC,
|
|
||||||
.min_rnr_timer = IB_MIN_RNR_TIMER,
|
|
||||||
.ah_attr = {
|
|
||||||
.is_global = 0,
|
|
||||||
.sl = 0,
|
|
||||||
.src_path_bits = 0,
|
|
||||||
.dlid = com_hndl.rem_qp_info.lid,
|
|
||||||
.port_num = com_hndl.used_port,
|
|
||||||
}
|
|
||||||
};
|
|
||||||
if (ibv_modify_qp(com_hndl.qp,
|
|
||||||
&qp_attr,
|
|
||||||
IBV_QP_STATE |
|
|
||||||
IBV_QP_PATH_MTU |
|
|
||||||
IBV_QP_DEST_QPN |
|
|
||||||
IBV_QP_RQ_PSN |
|
|
||||||
IBV_QP_MAX_DEST_RD_ATOMIC |
|
|
||||||
IBV_QP_MIN_RNR_TIMER |
|
|
||||||
IBV_QP_AV)) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not put QP into RTR state"
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(errno);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* transistion to ready-to-send state */
|
|
||||||
qp_attr.qp_state = IBV_QPS_RTS;
|
|
||||||
qp_attr.timeout = 14;
|
|
||||||
qp_attr.retry_cnt = 7;
|
|
||||||
qp_attr.rnr_retry = 7; /* infinite retrys on RNR NACK */
|
|
||||||
qp_attr.sq_psn = com_hndl.loc_qp_info.psn;
|
|
||||||
qp_attr.max_rd_atomic = 1;
|
|
||||||
if (ibv_modify_qp(com_hndl.qp, &qp_attr,
|
|
||||||
IBV_QP_STATE |
|
|
||||||
IBV_QP_TIMEOUT |
|
|
||||||
IBV_QP_RETRY_CNT |
|
|
||||||
IBV_QP_RNR_RETRY |
|
|
||||||
IBV_QP_SQ_PSN |
|
|
||||||
IBV_QP_MAX_QP_RD_ATOMIC)) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not put QP into RTS state"
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(errno);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Set the destination node for a migration
|
|
||||||
*
|
|
||||||
* \param ip_str a string containing the IPv4 addr of the destination
|
|
||||||
* \param port the migration port
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
exchange_qp_info(bool server)
|
|
||||||
{
|
|
||||||
size_t keys_size = sizeof(uint32_t)*com_hndl.mr_cnt;
|
|
||||||
|
|
||||||
int res = 0;
|
|
||||||
if (server) {
|
|
||||||
/* general QP info */
|
|
||||||
res = recv_data(&com_hndl.rem_qp_info, sizeof(qp_info_t));
|
|
||||||
res = send_data(&com_hndl.loc_qp_info, sizeof(qp_info_t));
|
|
||||||
|
|
||||||
/* remote keys */
|
|
||||||
com_hndl.rem_qp_info.keys = (uint32_t*)malloc(keys_size);
|
|
||||||
res = recv_data(com_hndl.rem_qp_info.keys, keys_size);
|
|
||||||
res = send_data(com_hndl.loc_qp_info.keys, keys_size);
|
|
||||||
} else {
|
|
||||||
/* general QP info */
|
|
||||||
res = send_data(&com_hndl.loc_qp_info, sizeof(qp_info_t));
|
|
||||||
res = recv_data(&com_hndl.rem_qp_info, sizeof(qp_info_t));
|
|
||||||
|
|
||||||
/* remote keys */
|
|
||||||
com_hndl.rem_qp_info.keys = (uint32_t*)malloc(keys_size);
|
|
||||||
res = send_data(com_hndl.loc_qp_info.keys, keys_size);
|
|
||||||
res = recv_data(com_hndl.rem_qp_info.keys, keys_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(stderr, "[INFO] loc_qp_info (QPN: %lu; LID: %lu; PSN: %lu; ADDR: 0x%x ",
|
|
||||||
com_hndl.loc_qp_info.qpn,
|
|
||||||
com_hndl.loc_qp_info.lid,
|
|
||||||
com_hndl.loc_qp_info.psn,
|
|
||||||
com_hndl.loc_qp_info.addr);
|
|
||||||
int i = 0;
|
|
||||||
for (i=0; i<com_hndl.mr_cnt; ++i) {
|
|
||||||
fprintf(stderr, "KEY[%d]: %lu; ", i, com_hndl.loc_qp_info.keys[i]);
|
|
||||||
}
|
|
||||||
printf("\b\b)\n");
|
|
||||||
|
|
||||||
fprintf(stderr, "[INFO] rem_qp_info (QPN: %lu; LID: %lu; PSN: %lu; ADDR: 0x%x ",
|
|
||||||
com_hndl.rem_qp_info.qpn,
|
|
||||||
com_hndl.rem_qp_info.lid,
|
|
||||||
com_hndl.rem_qp_info.psn,
|
|
||||||
com_hndl.rem_qp_info.addr);
|
|
||||||
for (i=0; i<com_hndl.mr_cnt; ++i) {
|
|
||||||
fprintf(stderr, "KEY[%d]: %lu; ", i, com_hndl.rem_qp_info.keys[i]);
|
|
||||||
}
|
|
||||||
printf("\b\b)\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Prepares the an 'ibv_send_wr'
|
|
||||||
*
|
|
||||||
* This function prepares an 'ibv_send_wr' structure that is prepared for the
|
|
||||||
* transmission of a single memory page using the IBV_WR_RDMA_WRITE verb.
|
|
||||||
*/
|
|
||||||
static inline struct ibv_send_wr *
|
|
||||||
prepare_send_list_elem(void)
|
|
||||||
{
|
|
||||||
/* create work request */
|
|
||||||
struct ibv_send_wr *send_wr = (struct ibv_send_wr*)calloc(1, sizeof(struct ibv_send_wr));
|
|
||||||
struct ibv_sge *sge = (struct ibv_sge*)calloc(1, sizeof(struct ibv_sge));
|
|
||||||
|
|
||||||
/* basic work request configuration */
|
|
||||||
send_wr->next = NULL;
|
|
||||||
send_wr->sg_list = sge;
|
|
||||||
send_wr->num_sge = 1;
|
|
||||||
send_wr->wr_id = ++cur_wr_id;
|
|
||||||
send_wr->opcode = IBV_WR_RDMA_WRITE;
|
|
||||||
|
|
||||||
return send_wr;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Creates an 'ibv_send_wr' and appends it to the send_list
|
|
||||||
*
|
|
||||||
* \param addr the page table entry of the memory page
|
|
||||||
* \param addr_size the size of the page table entry
|
|
||||||
* \param page the buffer to be send in this WR
|
|
||||||
* \param page_size the size of the buffer
|
|
||||||
*
|
|
||||||
* This function creates an 'ibv_send_wr' structure and appends this to the
|
|
||||||
* global send_list. It sets the source/destination information and sets the
|
|
||||||
* IBV_SEND_SIGNALED flag as appropriate.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
create_send_list_entry (void *addr, size_t addr_size, void *page, size_t page_size)
|
|
||||||
{
|
|
||||||
/* create work request */
|
|
||||||
struct ibv_send_wr *send_wr = prepare_send_list_elem();
|
|
||||||
|
|
||||||
/* configure source buffer */
|
|
||||||
int i = 0;
|
|
||||||
for (i=0; i<com_hndl.mr_cnt; ++i) {
|
|
||||||
if (((uint64_t)page >= (uint64_t)com_hndl.mrs[i]->addr) &&
|
|
||||||
((uint64_t)page < ((uint64_t)com_hndl.mrs[i]->addr + (uint64_t)com_hndl.mrs[i]->length))) {
|
|
||||||
send_wr->sg_list->addr = (uintptr_t)page;
|
|
||||||
send_wr->sg_list->length = page_size;
|
|
||||||
send_wr->sg_list->lkey = com_hndl.mrs[i]->lkey;
|
|
||||||
|
|
||||||
send_wr->wr.rdma.rkey = com_hndl.rem_qp_info.keys[i];
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* did we find the correct memory region? */
|
|
||||||
if (i == com_hndl.mr_cnt) {
|
|
||||||
fprintf(stderr, "[ERROR] Could not find a valid MR for address 0x%llx!\n", page);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* configure destination buffer */
|
|
||||||
if (addr) {
|
|
||||||
send_wr->wr.rdma.remote_addr = com_hndl.rem_qp_info.addr + determine_dest_offset(*(size_t*)addr);
|
|
||||||
} else {
|
|
||||||
send_wr->wr.rdma.remote_addr = com_hndl.rem_qp_info.addr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* apped work request to send list */
|
|
||||||
if (send_list == NULL) {
|
|
||||||
send_list = send_list_last = send_wr;
|
|
||||||
} else {
|
|
||||||
send_list_last->next = send_wr;
|
|
||||||
send_list_last = send_list_last->next;
|
|
||||||
}
|
|
||||||
/* we have to request a CQE if max_send_wr is reached to avoid overflows */
|
|
||||||
if ((++send_list_length%com_hndl.dev_attr_ex.orig_attr.max_qp_wr) == 0) {
|
|
||||||
send_list_last->send_flags = IBV_SEND_SIGNALED;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Prepares a send_list containing all memory defined by com_hndl.mrs
|
|
||||||
*
|
|
||||||
* This function creates as many send_wr items as required to cover all
|
|
||||||
* com_hndl.mrs in accordance with the maximum message size that can be
|
|
||||||
* transmitted per send_sr (com_hndl.port_attr.max_msg_sz).
|
|
||||||
*/
|
|
||||||
static inline
|
|
||||||
void enqueue_all_mrs(void)
|
|
||||||
{
|
|
||||||
uint64_t max_msg_sz = com_hndl.port_attr.max_msg_sz;
|
|
||||||
int i = 0;
|
|
||||||
|
|
||||||
/* send all MRs */
|
|
||||||
for (i=0; i<com_hndl.mr_cnt; ++i) {
|
|
||||||
uint64_t cur_mr_length = com_hndl.mrs[i]->length;
|
|
||||||
|
|
||||||
/* split the MR if it exceed the max_msg_sz */
|
|
||||||
size_t cur_chunk = 0, max_chunks = cur_mr_length/max_msg_sz;
|
|
||||||
for (cur_chunk; cur_chunk < max_chunks; ++cur_chunk) {
|
|
||||||
size_t cur_offset = cur_chunk*max_msg_sz;
|
|
||||||
size_t cur_glob_offset = cur_offset + (uint64_t)com_hndl.mrs[i]->addr - (uint64_t)guest_mem;
|
|
||||||
create_send_list_entry((void*)&cur_glob_offset, 0, (void*)((uint64_t)com_hndl.mrs[i]->addr+cur_offset), max_msg_sz);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* do we have a remainder? */
|
|
||||||
uint64_t remainder = cur_mr_length%max_msg_sz;
|
|
||||||
if (remainder) {
|
|
||||||
size_t cur_offset = cur_mr_length-remainder;
|
|
||||||
size_t cur_glob_offset = cur_offset + (uint64_t)com_hndl.mrs[i]->addr - (uint64_t)guest_mem;
|
|
||||||
create_send_list_entry((void*)&cur_glob_offset, 0, (void*)((uint64_t)com_hndl.mrs[i]->addr+cur_offset), remainder);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Sends the guest memory to the destination
|
|
||||||
*
|
|
||||||
* \param mode MIG_MODE_COMPLETE_DUMP sends the complete memory and
|
|
||||||
* MIG_MODE_INCREMENTAL_DUMP only the mapped guest pages
|
|
||||||
*/
|
|
||||||
void send_guest_mem(mig_mode_t mode, bool final_dump, size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
|
|
||||||
{
|
|
||||||
int res = 0, i = 0;
|
|
||||||
static bool ib_initialized = false;
|
|
||||||
|
|
||||||
/* prepare IB channel */
|
|
||||||
if (!ib_initialized) {
|
|
||||||
init_com_hndl(mem_chunk_cnt, mem_chunks);
|
|
||||||
exchange_qp_info(false);
|
|
||||||
con_com_buf();
|
|
||||||
|
|
||||||
ib_initialized = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* determine migration mode */
|
|
||||||
switch (mode) {
|
|
||||||
case MIG_MODE_COMPLETE_DUMP:
|
|
||||||
enqueue_all_mrs();
|
|
||||||
break;
|
|
||||||
case MIG_MODE_INCREMENTAL_DUMP:
|
|
||||||
/* iterate guest page tables */
|
|
||||||
determine_dirty_pages(create_send_list_entry);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
fprintf(stderr, "[ERROR] Unknown migration mode. Abort!\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* create a dumy WR request if there is nothing to send */
|
|
||||||
if (send_list_length == 0)
|
|
||||||
create_send_list_entry(NULL, 0, NULL, 0);
|
|
||||||
|
|
||||||
/* we have to wait for the last WR before informing dest */
|
|
||||||
if ((mode == MIG_MODE_COMPLETE_DUMP) || final_dump) {
|
|
||||||
send_list_last->wr_id = IB_WR_WRITE_LAST_PAGE_ID;
|
|
||||||
send_list_last->opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
|
|
||||||
send_list_last->send_flags = IBV_SEND_SIGNALED | IBV_SEND_SOLICITED;
|
|
||||||
send_list_last->imm_data = htonl(0x1);
|
|
||||||
} else {
|
|
||||||
send_list_last->wr_id = IB_WR_WRITE_LAST_PAGE_ID;
|
|
||||||
send_list_last->send_flags = IBV_SEND_SIGNALED;
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("DEBUG: Send list length %d\n", send_list_length);
|
|
||||||
|
|
||||||
/* we have to call ibv_post_send() as long as 'send_list' contains elements */
|
|
||||||
struct ibv_wc wc;
|
|
||||||
struct ibv_send_wr *remaining_send_wr = NULL;
|
|
||||||
do {
|
|
||||||
/* send data */
|
|
||||||
remaining_send_wr = NULL;
|
|
||||||
if (ibv_post_send(com_hndl.qp, send_list, &remaining_send_wr) && (errno != ENOMEM)) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not post send"
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* wait for send WRs if CQ is full */
|
|
||||||
do {
|
|
||||||
if ((res = ibv_poll_cq(com_hndl.cq, 1, &wc)) < 0) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not poll on CQ"
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
} while (res < 1);
|
|
||||||
if (wc.status != IBV_WC_SUCCESS) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] WR failed status %s (%d) for wr_id %llu\n",
|
|
||||||
ibv_wc_status_str(wc.status),
|
|
||||||
wc.status,
|
|
||||||
wc.wr_id);
|
|
||||||
|
|
||||||
print_send_wr_info(wc.wr_id);
|
|
||||||
}
|
|
||||||
send_list = remaining_send_wr;
|
|
||||||
} while (remaining_send_wr);
|
|
||||||
|
|
||||||
|
|
||||||
/* ensure that we receive the CQE for the last page */
|
|
||||||
if (wc.wr_id != IB_WR_WRITE_LAST_PAGE_ID) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] WR failed status %s (%d) for wr_id %d\n",
|
|
||||||
ibv_wc_status_str(wc.status),
|
|
||||||
wc.status,
|
|
||||||
(int)wc.wr_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* cleanup send_list */
|
|
||||||
struct ibv_send_wr *cur_send_wr = send_list;
|
|
||||||
struct ibv_send_wr *tmp_send_wr = NULL;
|
|
||||||
while (cur_send_wr != NULL) {
|
|
||||||
free(cur_send_wr->sg_list);
|
|
||||||
tmp_send_wr = cur_send_wr;
|
|
||||||
cur_send_wr = cur_send_wr->next;
|
|
||||||
free(tmp_send_wr);
|
|
||||||
}
|
|
||||||
send_list_length = 0;
|
|
||||||
|
|
||||||
/* do not close the channel in a pre-dump */
|
|
||||||
if (!final_dump)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* free IB-related resources */
|
|
||||||
destroy_com_hndl();
|
|
||||||
ib_initialized = false;
|
|
||||||
|
|
||||||
fprintf(stderr, "Guest memory sent!\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Receives the guest memory from the source
|
|
||||||
*
|
|
||||||
* The receive participates in the IB connection setup and waits for the
|
|
||||||
* 'solicited' event sent with the last WR issued by the sender.
|
|
||||||
*/
|
|
||||||
void recv_guest_mem(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
|
|
||||||
{
|
|
||||||
int res = 0;
|
|
||||||
|
|
||||||
/* prepare IB channel */
|
|
||||||
init_com_hndl(mem_chunk_cnt, mem_chunks);
|
|
||||||
exchange_qp_info(true);
|
|
||||||
con_com_buf();
|
|
||||||
|
|
||||||
/* request notification on the event channel */
|
|
||||||
if (ibv_req_notify_cq(com_hndl.cq, 1) < 0) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could request notify for completion queue "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* post recv matching IBV_RDMA_WRITE_WITH_IMM */
|
|
||||||
struct ibv_cq *ev_cq;
|
|
||||||
void *ev_ctx;
|
|
||||||
struct ibv_sge sg;
|
|
||||||
struct ibv_recv_wr recv_wr;
|
|
||||||
struct ibv_recv_wr *bad_wr;
|
|
||||||
uint32_t recv_buf = 0;
|
|
||||||
|
|
||||||
memset(&sg, 0, sizeof(sg));
|
|
||||||
sg.addr = (uintptr_t)&recv_buf;
|
|
||||||
sg.length = sizeof(recv_buf);
|
|
||||||
sg.lkey = com_hndl.mrs[0]->lkey;
|
|
||||||
|
|
||||||
memset(&recv_wr, 0, sizeof(recv_wr));
|
|
||||||
recv_wr.wr_id = 0;
|
|
||||||
recv_wr.sg_list = &sg;
|
|
||||||
recv_wr.num_sge = 1;
|
|
||||||
|
|
||||||
if (ibv_post_recv(com_hndl.qp, &recv_wr, &bad_wr) < 0) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could post recv - %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* wait for requested event */
|
|
||||||
if (ibv_get_cq_event(com_hndl.comp_chan, &ev_cq, &ev_ctx) < 0) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could get event from completion channel "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* acknowledge the event */
|
|
||||||
ibv_ack_cq_events(com_hndl.cq, 1);
|
|
||||||
|
|
||||||
/* free IB-related resources */
|
|
||||||
destroy_com_hndl();
|
|
||||||
|
|
||||||
fprintf(stderr, "Guest memory received!\n");
|
|
||||||
}
|
|
||||||
#endif /* __RDMA_MIGRATION__ */
|
|
|
@ -1,277 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2018, Simon Pickartz, RWTH Aachen University
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of the University nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this
|
|
||||||
* software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
||||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
||||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
||||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifdef __x86_64__
|
|
||||||
#define _GNU_SOURCE
|
|
||||||
|
|
||||||
#include <arpa/inet.h>
|
|
||||||
#include <errno.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
|
|
||||||
#include "uhyve-migration.h"
|
|
||||||
#include "uhyve.h"
|
|
||||||
|
|
||||||
static struct sockaddr_in mig_server;
|
|
||||||
static int com_sock = 0;
|
|
||||||
static int listen_sock = 0;
|
|
||||||
|
|
||||||
static mig_type_t mig_type = MIG_TYPE_COLD;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Returns the configured migration type
|
|
||||||
*/
|
|
||||||
mig_type_t
|
|
||||||
get_migration_type(void)
|
|
||||||
{
|
|
||||||
return mig_type;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Sets the migration type
|
|
||||||
*
|
|
||||||
* \param mig_type_str A string defining the migration type
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
set_migration_type(const char *mig_type_str)
|
|
||||||
{
|
|
||||||
if (mig_type_str == NULL)
|
|
||||||
return;
|
|
||||||
|
|
||||||
int i;
|
|
||||||
bool found_type = false;
|
|
||||||
for (i=0; i<sizeof(mig_type_conv)/sizeof(mig_type_conv[0]); ++i) {
|
|
||||||
if (!strcmp (mig_type_str, mig_type_conv[i].str)) {
|
|
||||||
mig_type = mig_type_conv[i].mig_type;
|
|
||||||
found_type = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* we do not know this migration type */
|
|
||||||
if (!found_type) {
|
|
||||||
fprintf(stderr, "ERROR: Migration type '%s' not supported. Fallback to 'cold'\n", mig_type_str);
|
|
||||||
}
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Closes a socket
|
|
||||||
*
|
|
||||||
* \param sock the socket to be closed
|
|
||||||
*/
|
|
||||||
static inline void
|
|
||||||
close_sock(int sock)
|
|
||||||
{
|
|
||||||
if (close(sock) < 0) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"ERROR: Could not close the communication socket "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Set the destination node for a migration
|
|
||||||
*
|
|
||||||
* \param ip_str a string containing the IPv4 addr of the destination
|
|
||||||
* \param port the migration port
|
|
||||||
*/
|
|
||||||
void set_migration_target(const char *ip_str, int port)
|
|
||||||
{
|
|
||||||
/* determine server address */
|
|
||||||
memset(&mig_server, '0', sizeof(mig_server));
|
|
||||||
mig_server.sin_family = AF_INET;
|
|
||||||
mig_server.sin_port = htons(port);
|
|
||||||
|
|
||||||
int res = inet_pton(AF_INET, ip_str, &mig_server.sin_addr);
|
|
||||||
if (res == 0) {
|
|
||||||
fprintf(stderr, "'%s' is not a valid server address\n", ip_str);
|
|
||||||
} else if (res < 0) {
|
|
||||||
fprintf(stderr, "An error occured while retrieving the migration server address\n");
|
|
||||||
perror("inet_pton");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Connects to a migration target via TCP/IP
|
|
||||||
*/
|
|
||||||
void connect_to_server(void)
|
|
||||||
{
|
|
||||||
int res = 0;
|
|
||||||
char buf[INET_ADDRSTRLEN];
|
|
||||||
if (inet_ntop(AF_INET, (const void*)&mig_server.sin_addr, buf, INET_ADDRSTRLEN) == NULL) {
|
|
||||||
perror("inet_ntop");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
if((com_sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
|
|
||||||
perror("socket");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(stderr, "Trying to connect to migration server: %s\n", buf);
|
|
||||||
if (connect(com_sock, (struct sockaddr *)&mig_server, sizeof(mig_server)) < 0) {
|
|
||||||
perror("connect");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
fprintf(stderr, "Successfully connected to: %s\n", buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Waits for a migration source to connect via TCP/IP
|
|
||||||
*
|
|
||||||
* \param listen_portno the port of the migration socket
|
|
||||||
*/
|
|
||||||
void wait_for_client(uint16_t listen_portno)
|
|
||||||
{
|
|
||||||
int client_addr_len = 0, res = 0;
|
|
||||||
struct sockaddr_in serv_addr;
|
|
||||||
struct sockaddr_in client_addr;
|
|
||||||
|
|
||||||
/* open migration socket */
|
|
||||||
fprintf(stderr, "Waiting for incomming migration request ...\n");
|
|
||||||
listen_sock = socket(AF_INET, SOCK_STREAM, 0);
|
|
||||||
memset(&serv_addr, '0', sizeof(serv_addr));
|
|
||||||
|
|
||||||
serv_addr.sin_family = AF_INET;
|
|
||||||
serv_addr.sin_addr.s_addr = htonl(INADDR_ANY);
|
|
||||||
serv_addr.sin_port = htons(listen_portno);
|
|
||||||
|
|
||||||
bind(listen_sock, (struct sockaddr*)&serv_addr, sizeof(serv_addr));
|
|
||||||
|
|
||||||
listen(listen_sock, 10);
|
|
||||||
|
|
||||||
client_addr_len = sizeof(struct sockaddr_in);
|
|
||||||
if ((com_sock = accept(listen_sock, &client_addr, &client_addr_len)) < 0) {
|
|
||||||
perror("accept");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
char buf[INET_ADDRSTRLEN];
|
|
||||||
if (inet_ntop(AF_INET, (const void*)&client_addr.sin_addr, buf, INET_ADDRSTRLEN) == NULL) {
|
|
||||||
perror("inet_ntop");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
fprintf(stderr, "Incomming migration from: %s\n", buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Receives data from the migration socket
|
|
||||||
*
|
|
||||||
* \param buffer the destination buffer
|
|
||||||
* \param length the buffer size
|
|
||||||
*/
|
|
||||||
int recv_data(void *buffer, size_t length)
|
|
||||||
{
|
|
||||||
size_t bytes_received = 0;
|
|
||||||
while(bytes_received < length) {
|
|
||||||
bytes_received += recv(
|
|
||||||
com_sock,
|
|
||||||
(void*)((uint64_t)buffer+bytes_received),
|
|
||||||
length-bytes_received,
|
|
||||||
0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return bytes_received;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Sends data via the migration socket
|
|
||||||
*
|
|
||||||
* \param buffer the source buffer
|
|
||||||
* \param length the buffer size
|
|
||||||
*/
|
|
||||||
int send_data(void *buffer, size_t length)
|
|
||||||
{
|
|
||||||
size_t bytes_sent = 0;
|
|
||||||
while(bytes_sent < length) {
|
|
||||||
bytes_sent += send(
|
|
||||||
com_sock,
|
|
||||||
(void*)((uint64_t)buffer+bytes_sent),
|
|
||||||
length-bytes_sent,
|
|
||||||
0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return bytes_sent;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Closes the TCP connection
|
|
||||||
*/
|
|
||||||
void close_migration_channel(void)
|
|
||||||
{
|
|
||||||
if (listen_sock) {
|
|
||||||
close_sock(listen_sock);
|
|
||||||
}
|
|
||||||
close_sock(com_sock);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef __RDMA_MIGRATION__
|
|
||||||
void send_guest_mem(mig_mode_t mode, bool final_dump, size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
|
|
||||||
{
|
|
||||||
/* determine migration mode */
|
|
||||||
switch (mode) {
|
|
||||||
case MIG_MODE_INCREMENTAL_DUMP:
|
|
||||||
fprintf(stderr, "ERROR: Incremental dumps currently not supported via TCP/IP. Fallback to complete dump!\n");
|
|
||||||
case MIG_MODE_COMPLETE_DUMP:
|
|
||||||
send_data(guest_mem, guest_size);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
fprintf(stderr, "ERROR: Unknown migration mode. Abort!\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(stderr, "Guest memory sent!\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
void recv_guest_mem(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
|
|
||||||
{
|
|
||||||
recv_data(guest_mem, guest_size);
|
|
||||||
fprintf(stderr, "Guest memory received!\n");
|
|
||||||
}
|
|
||||||
#endif /* __RDMA_MIGRATION__ */
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
/* dummy implementation for aarch64 */
|
|
||||||
|
|
||||||
void set_migration_target(const char *ip_str, int port)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_migration_type(const char *mig_type_str)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,91 +0,0 @@
|
||||||
#ifndef __UHYVE_MIGRATION_H__
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2018, Simon Pickartz, RWTH Aachen University
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of the University nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this
|
|
||||||
* software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
||||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
||||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
||||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @author Simon Pickartz
|
|
||||||
* @file tools/uhyve-migration.h
|
|
||||||
* @brief Migration-related functions
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define __UHYVE_MIGRATION_H__
|
|
||||||
|
|
||||||
#include <stdbool.h>
|
|
||||||
|
|
||||||
extern size_t guest_size;
|
|
||||||
extern uint8_t* guest_mem;
|
|
||||||
|
|
||||||
#define MIGRATION_PORT 1337
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
MIG_MODE_COMPLETE_DUMP = 1,
|
|
||||||
MIG_MODE_INCREMENTAL_DUMP,
|
|
||||||
} mig_mode_t;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
MIG_TYPE_COLD = 0,
|
|
||||||
MIG_TYPE_LIVE,
|
|
||||||
} mig_type_t;
|
|
||||||
|
|
||||||
const static struct {
|
|
||||||
mig_type_t mig_type;
|
|
||||||
const char *str;
|
|
||||||
} mig_type_conv [] = {
|
|
||||||
{MIG_TYPE_COLD, "cold"},
|
|
||||||
{MIG_TYPE_LIVE, "live"},
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct _mem_chunk {
|
|
||||||
size_t size;
|
|
||||||
uint8_t *ptr;
|
|
||||||
} mem_chunk_t;
|
|
||||||
|
|
||||||
typedef struct _migration_metadata {
|
|
||||||
uint32_t ncores;
|
|
||||||
size_t guest_size;
|
|
||||||
uint32_t no_checkpoint;
|
|
||||||
uint64_t elf_entry;
|
|
||||||
bool full_checkpoint;
|
|
||||||
} migration_metadata_t;
|
|
||||||
|
|
||||||
void set_migration_type(const char *mig_type_str);
|
|
||||||
mig_type_t get_migration_type(void);
|
|
||||||
|
|
||||||
void wait_for_client(uint16_t listen_portno);
|
|
||||||
void set_migration_target(const char *ip_str, int port);
|
|
||||||
void connect_to_server(void);
|
|
||||||
void close_migration_channel(void);
|
|
||||||
|
|
||||||
int recv_data(void *buffer, size_t length);
|
|
||||||
int send_data(void *buffer, size_t length);
|
|
||||||
|
|
||||||
void send_guest_mem(mig_mode_t mode, bool final_dump, size_t mem_chunk_cnt, mem_chunk_t *mem_chunks);
|
|
||||||
void recv_guest_mem(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks);
|
|
||||||
#endif /* __UHYVE_MIGRATION_H__ */
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,189 +0,0 @@
|
||||||
/* Copyright (c) 2015, IBM
|
|
||||||
* Author(s): Dan Williams <djwillia@us.ibm.com>
|
|
||||||
* Ricardo Koller <kollerr@us.ibm.com>
|
|
||||||
* Copyright (c) 2017, RWTH Aachen University
|
|
||||||
* Author(s): Tim van de Kamp <tim.van.de.kamp@rwth-aachen.de>
|
|
||||||
*
|
|
||||||
* Permission to use, copy, modify, and/or distribute this software
|
|
||||||
* for any purpose with or without fee is hereby granted, provided
|
|
||||||
* that the above copyright notice and this permission notice appear
|
|
||||||
* in all copies.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
|
||||||
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
|
||||||
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
|
||||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
|
||||||
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
||||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* We used several existing projects as guides
|
|
||||||
* kvmtest.c: http://lwn.net/Articles/658512/
|
|
||||||
* lkvm: http://github.com/clearlinux/kvmtool
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* 15.1.2017: extend original version (https://github.com/Solo5/solo5)
|
|
||||||
* for HermitCore
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "uhyve-net.h"
|
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
/* TODO: create an array or equal for more then one netif */
|
|
||||||
static uhyve_netinfo_t netinfo;
|
|
||||||
|
|
||||||
//-------------------------------------- ATTACH LINUX TAP -----------------------------------------//
|
|
||||||
int attach_linux_tap(const char *dev)
|
|
||||||
{
|
|
||||||
struct ifreq ifr;
|
|
||||||
int fd, err;
|
|
||||||
|
|
||||||
// @<number> indicates a pre-existing open fd onto the correct device.
|
|
||||||
if (dev[0] == '@') {
|
|
||||||
fd = atoi(&dev[1]);
|
|
||||||
|
|
||||||
if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1)
|
|
||||||
return -1;
|
|
||||||
return fd;
|
|
||||||
}
|
|
||||||
|
|
||||||
fd = open("/dev/net/tun", O_RDWR | O_NONBLOCK);
|
|
||||||
|
|
||||||
// Initialize interface request for TAP interface
|
|
||||||
memset(&ifr, 0x00, sizeof(ifr));
|
|
||||||
|
|
||||||
ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
|
|
||||||
if (strlen(dev) > IFNAMSIZ) {
|
|
||||||
errno = EINVAL;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
strncpy(ifr.ifr_name, dev, IFNAMSIZ);
|
|
||||||
|
|
||||||
// Try to create OR attach to an existing device. The Linux API has no way
|
|
||||||
// to differentiate between the two
|
|
||||||
|
|
||||||
// create before a tap device with these commands:
|
|
||||||
//
|
|
||||||
// sudo ip tuntap add <devname> mode tap user <user>
|
|
||||||
// sudo ip addr add 10.0.5.1/24 broadcast 10.0.5.255
|
|
||||||
// sudo ip link set dev <devname> up
|
|
||||||
//
|
|
||||||
|
|
||||||
if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
|
|
||||||
err = errno;
|
|
||||||
close(fd);
|
|
||||||
errno = err;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we got back a different device than the one requested, e.g. because
|
|
||||||
// the caller mistakenly passed in '%d' (yes, that's really in the Linux API)
|
|
||||||
// then fail
|
|
||||||
|
|
||||||
if (strncmp(ifr.ifr_name, dev, IFNAMSIZ) != 0) {
|
|
||||||
close(fd);
|
|
||||||
errno = ENODEV;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Attempt a zero-sized write to the device. If the device was freshly created
|
|
||||||
// (as opposed to attached to an existing ine) this will fail with EIO. Ignore
|
|
||||||
// any other error return since that may indicate the device is up
|
|
||||||
//
|
|
||||||
// If this check produces a false positive then caller's later writes to fd will
|
|
||||||
// fali with EIO, which is not great but at least we tried
|
|
||||||
|
|
||||||
char buf[1] = { 0 };
|
|
||||||
if (write(fd, buf, 0) == -1 && errno == EIO) {
|
|
||||||
close(fd);
|
|
||||||
errno = ENODEV;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return fd;
|
|
||||||
}
|
|
||||||
|
|
||||||
//---------------------------------- GET MAC ----------------------------------------------//
|
|
||||||
char* uhyve_get_mac(void)
|
|
||||||
{
|
|
||||||
return netinfo.mac_str;
|
|
||||||
}
|
|
||||||
|
|
||||||
//---------------------------------- SET MAC ----------------------------------------------//
|
|
||||||
|
|
||||||
int uhyve_set_mac(void)
|
|
||||||
{
|
|
||||||
int mac_is_set = 0;
|
|
||||||
uint8_t guest_mac[6];
|
|
||||||
|
|
||||||
char* str = getenv("HERMIT_NETIF_MAC");
|
|
||||||
if (str)
|
|
||||||
{
|
|
||||||
const char *macptr = str;
|
|
||||||
const char *v_macptr = macptr;
|
|
||||||
// checking str is a valid MAC address
|
|
||||||
int i = 0;
|
|
||||||
int s = 0;
|
|
||||||
while(*v_macptr) {
|
|
||||||
if(isxdigit(*v_macptr)) {
|
|
||||||
i++;
|
|
||||||
} else if (*v_macptr == ':') {
|
|
||||||
if (i / 2 - 1 != s++)
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
s = -1;
|
|
||||||
}
|
|
||||||
v_macptr++;
|
|
||||||
}
|
|
||||||
if (i != 12 || s != 5) {
|
|
||||||
warnx("Malformed mac address: %s\n", macptr);
|
|
||||||
} else {
|
|
||||||
snprintf(netinfo.mac_str, sizeof(netinfo.mac_str), "%s", macptr);
|
|
||||||
mac_is_set = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!mac_is_set) {
|
|
||||||
int rfd = open("/dev/urandom", O_RDONLY);
|
|
||||||
if(rfd == -1)
|
|
||||||
err(1, "Could not open /dev/urandom\n");
|
|
||||||
int ret;
|
|
||||||
ret = read(rfd, guest_mac, sizeof(guest_mac));
|
|
||||||
// compare the number of bytes read with the size of guest_mac
|
|
||||||
assert(ret == sizeof(guest_mac));
|
|
||||||
close(rfd);
|
|
||||||
|
|
||||||
guest_mac[0] &= 0xfe; // creats a random MAC-address in the locally administered
|
|
||||||
guest_mac[0] |= 0x02; // address range which can be used without conflict with other public devices
|
|
||||||
// save the MAC address in the netinfo
|
|
||||||
snprintf(netinfo.mac_str, sizeof(netinfo.mac_str),
|
|
||||||
"%02x:%02x:%02x:%02x:%02x:%02x",
|
|
||||||
guest_mac[0], guest_mac[1], guest_mac[2],
|
|
||||||
guest_mac[3], guest_mac[4], guest_mac[5]);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
//-------------------------------------- SETUP NETWORK ---------------------------------------------//
|
|
||||||
int uhyve_net_init(const char *netif)
|
|
||||||
{
|
|
||||||
if (netif == NULL) {
|
|
||||||
err(1, "ERROR: no netif defined\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// attaching netif
|
|
||||||
netfd = attach_linux_tap(netif);
|
|
||||||
if (netfd < 0) {
|
|
||||||
err(1, "Could not attach interface: %s\n", netif);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
uhyve_set_mac();
|
|
||||||
|
|
||||||
return netfd;
|
|
||||||
}
|
|
|
@ -1,60 +0,0 @@
|
||||||
#ifndef __UHYVE_NET_H__
|
|
||||||
#define __UHYVE_NET_H__
|
|
||||||
|
|
||||||
#include <linux/kvm.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <assert.h>
|
|
||||||
#include <errno.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
|
|
||||||
#include <sys/select.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
|
|
||||||
/* network interface */
|
|
||||||
#include <sys/socket.h>
|
|
||||||
#include <linux/if.h>
|
|
||||||
#include <linux/if_tun.h>
|
|
||||||
#include <fcntl.h>
|
|
||||||
#include <sys/ioctl.h>
|
|
||||||
#include <err.h>
|
|
||||||
|
|
||||||
extern int netfd;
|
|
||||||
|
|
||||||
// UHYVE_PORT_NETINFO
|
|
||||||
typedef struct {
|
|
||||||
/* OUT */
|
|
||||||
char mac_str[18];
|
|
||||||
} __attribute__((packed)) uhyve_netinfo_t;
|
|
||||||
|
|
||||||
// UHYVE_PORT_NETWRITE
|
|
||||||
typedef struct {
|
|
||||||
/* IN */
|
|
||||||
const void* data;
|
|
||||||
size_t len;
|
|
||||||
/* OUT */
|
|
||||||
int ret;
|
|
||||||
} __attribute__((packed)) uhyve_netwrite_t;
|
|
||||||
|
|
||||||
// UHYVE_PORT_NETREAD
|
|
||||||
typedef struct {
|
|
||||||
/* IN */
|
|
||||||
void* data;
|
|
||||||
/* IN / OUT */
|
|
||||||
size_t len;
|
|
||||||
/* OUT */
|
|
||||||
int ret;
|
|
||||||
} __attribute__((packed)) uhyve_netread_t;
|
|
||||||
|
|
||||||
// UHYVE_PORT_NETSTAT
|
|
||||||
typedef struct {
|
|
||||||
/* IN */
|
|
||||||
int status;
|
|
||||||
} __attribute__((packed)) uhyve_netstat_t;
|
|
||||||
|
|
||||||
int uhyve_net_init(const char *hermit_netif);
|
|
||||||
char* uhyve_get_mac(void);
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,55 +0,0 @@
|
||||||
/* Copyright (c) 2017, RWTH Aachen University
|
|
||||||
* Author(s): Daniel Krebs <github@daniel-krebs.net>
|
|
||||||
*
|
|
||||||
* Permission to use, copy, modify, and/or distribute this software
|
|
||||||
* for any purpose with or without fee is hereby granted, provided
|
|
||||||
* that the above copyright notice and this permission notice appear
|
|
||||||
* in all copies.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
|
||||||
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
|
||||||
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
|
||||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
|
||||||
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
||||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
||||||
*/
|
|
||||||
#ifndef UHYVE_SYSCALLS_H
|
|
||||||
#define UHYVE_SYSCALLS_H
|
|
||||||
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int fd;
|
|
||||||
const char* buf;
|
|
||||||
size_t len;
|
|
||||||
} __attribute__((packed)) uhyve_write_t;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
const char* name;
|
|
||||||
int flags;
|
|
||||||
int mode;
|
|
||||||
int ret;
|
|
||||||
} __attribute__((packed)) uhyve_open_t;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int fd;
|
|
||||||
int ret;
|
|
||||||
} __attribute__((packed)) uhyve_close_t;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int fd;
|
|
||||||
char* buf;
|
|
||||||
size_t len;
|
|
||||||
ssize_t ret;
|
|
||||||
} __attribute__((packed)) uhyve_read_t;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int fd;
|
|
||||||
off_t offset;
|
|
||||||
int whence;
|
|
||||||
} __attribute__((packed)) uhyve_lseek_t;
|
|
||||||
|
|
||||||
#endif // UHYVE_SYSCALLS_H
|
|
1255
tools/uhyve-x86_64.c
1255
tools/uhyve-x86_64.c
File diff suppressed because it is too large
Load diff
|
@ -1,98 +0,0 @@
|
||||||
#ifndef __UHYVE_CPU_H__
|
|
||||||
#define __UHYVE_CPU_H__
|
|
||||||
|
|
||||||
#ifndef _BITUL
|
|
||||||
|
|
||||||
#ifdef __ASSEMBLY__
|
|
||||||
#define _AC(X,Y) X
|
|
||||||
#define _AT(T,X) X
|
|
||||||
#else
|
|
||||||
#define __AC(X,Y) (X##Y)
|
|
||||||
#define _AC(X,Y) __AC(X,Y)
|
|
||||||
#define _AT(T,X) ((T)(X))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define _BITUL(x) (_AC(1,UL) << (x))
|
|
||||||
#define _BITULL(x) (_AC(1,ULL) << (x))
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
* EFLAGS bits
|
|
||||||
*/
|
|
||||||
#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Basic CPU control in CR0
|
|
||||||
*/
|
|
||||||
#define X86_CR0_PE_BIT 0 /* Protection Enable */
|
|
||||||
#define X86_CR0_PE _BITUL(X86_CR0_PE_BIT)
|
|
||||||
#define X86_CR0_PG_BIT 31 /* Paging */
|
|
||||||
#define X86_CR0_PG _BITUL(X86_CR0_PG_BIT)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Intel CPU features in CR4
|
|
||||||
*/
|
|
||||||
#define X86_CR4_PAE_BIT 5 /* enable physical address extensions */
|
|
||||||
#define X86_CR4_PAE _BITUL(X86_CR4_PAE_BIT)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Intel long mode page directory/table entries
|
|
||||||
*/
|
|
||||||
#define X86_PDPT_P_BIT 0 /* Present */
|
|
||||||
#define X86_PDPT_P _BITUL(X86_PDPT_P_BIT)
|
|
||||||
#define X86_PDPT_RW_BIT 1 /* Writable */
|
|
||||||
#define X86_PDPT_RW _BITUL(X86_PDPT_RW_BIT)
|
|
||||||
#define X86_PDPT_PS_BIT 7 /* Page size */
|
|
||||||
#define X86_PDPT_PS _BITUL(X86_PDPT_PS_BIT)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* GDT and KVM segment manipulation
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define GDT_DESC_OFFSET(n) ((n) * 0x8)
|
|
||||||
|
|
||||||
#define GDT_GET_BASE(x) ( \
|
|
||||||
(((x) & 0xFF00000000000000) >> 32) | \
|
|
||||||
(((x) & 0x000000FF00000000) >> 16) | \
|
|
||||||
(((x) & 0x00000000FFFF0000) >> 16))
|
|
||||||
|
|
||||||
#define GDT_GET_LIMIT(x) (__u32)( \
|
|
||||||
(((x) & 0x000F000000000000) >> 32) | \
|
|
||||||
(((x) & 0x000000000000FFFF)))
|
|
||||||
|
|
||||||
/* Constructor for a conventional segment GDT (or LDT) entry */
|
|
||||||
/* This is a macro so it can be used in initializers */
|
|
||||||
#define GDT_ENTRY(flags, base, limit) \
|
|
||||||
((((base) & _AC(0xff000000, ULL)) << (56-24)) | \
|
|
||||||
(((flags) & _AC(0x0000f0ff, ULL)) << 40) | \
|
|
||||||
(((limit) & _AC(0x000f0000, ULL)) << (48-16)) | \
|
|
||||||
(((base) & _AC(0x00ffffff, ULL)) << 16) | \
|
|
||||||
(((limit) & _AC(0x0000ffff, ULL))))
|
|
||||||
|
|
||||||
#define GDT_GET_G(x) (__u8)(((x) & 0x0080000000000000) >> 55)
|
|
||||||
#define GDT_GET_DB(x) (__u8)(((x) & 0x0040000000000000) >> 54)
|
|
||||||
#define GDT_GET_L(x) (__u8)(((x) & 0x0020000000000000) >> 53)
|
|
||||||
#define GDT_GET_AVL(x) (__u8)(((x) & 0x0010000000000000) >> 52)
|
|
||||||
#define GDT_GET_P(x) (__u8)(((x) & 0x0000800000000000) >> 47)
|
|
||||||
#define GDT_GET_DPL(x) (__u8)(((x) & 0x0000600000000000) >> 45)
|
|
||||||
#define GDT_GET_S(x) (__u8)(((x) & 0x0000100000000000) >> 44)
|
|
||||||
#define GDT_GET_TYPE(x)(__u8)(((x) & 0x00000F0000000000) >> 40)
|
|
||||||
|
|
||||||
#define GDT_TO_KVM_SEGMENT(seg, gdt_table, sel) \
|
|
||||||
do { \
|
|
||||||
__u64 gdt_ent = gdt_table[sel]; \
|
|
||||||
seg.base = GDT_GET_BASE(gdt_ent); \
|
|
||||||
seg.limit = GDT_GET_LIMIT(gdt_ent); \
|
|
||||||
seg.selector = sel * 8; \
|
|
||||||
seg.type = GDT_GET_TYPE(gdt_ent); \
|
|
||||||
seg.present = GDT_GET_P(gdt_ent); \
|
|
||||||
seg.dpl = GDT_GET_DPL(gdt_ent); \
|
|
||||||
seg.db = GDT_GET_DB(gdt_ent); \
|
|
||||||
seg.s = GDT_GET_S(gdt_ent); \
|
|
||||||
seg.l = GDT_GET_L(gdt_ent); \
|
|
||||||
seg.g = GDT_GET_G(gdt_ent); \
|
|
||||||
seg.avl = GDT_GET_AVL(gdt_ent); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#endif
|
|
813
tools/uhyve.c
813
tools/uhyve.c
|
@ -1,813 +0,0 @@
|
||||||
/* Copyright (c) 2015, IBM
|
|
||||||
* Author(s): Dan Williams <djwillia@us.ibm.com>
|
|
||||||
* Ricardo Koller <kollerr@us.ibm.com>
|
|
||||||
* Copyright (c) 2017, RWTH Aachen University
|
|
||||||
* Author(s): Stefan Lankes <slankes@eonerc.rwth-aachen.de>
|
|
||||||
*
|
|
||||||
* Permission to use, copy, modify, and/or distribute this software
|
|
||||||
* for any purpose with or without fee is hereby granted, provided
|
|
||||||
* that the above copyright notice and this permission notice appear
|
|
||||||
* in all copies.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
|
||||||
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
|
||||||
* AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
|
||||||
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
|
||||||
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
||||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* We used several existing projects as guides
|
|
||||||
* kvmtest.c: http://lwn.net/Articles/658512/
|
|
||||||
* Solo5: https://github.com/Solo5/solo5
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* 15.1.2017: extend original version (https://github.com/Solo5/solo5)
|
|
||||||
* for HermitCore
|
|
||||||
* 25.2.2017: add SMP support to enable more than one core
|
|
||||||
* 24.4.2017: add checkpoint/restore support,
|
|
||||||
* remove memory limit
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define _GNU_SOURCE
|
|
||||||
|
|
||||||
#include <arpa/inet.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <errno.h>
|
|
||||||
#include <fcntl.h>
|
|
||||||
#include <sched.h>
|
|
||||||
#include <signal.h>
|
|
||||||
#include <limits.h>
|
|
||||||
#include <pthread.h>
|
|
||||||
#include <semaphore.h>
|
|
||||||
#include <elf.h>
|
|
||||||
#include <err.h>
|
|
||||||
#include <poll.h>
|
|
||||||
#include <sys/wait.h>
|
|
||||||
#include <sys/ioctl.h>
|
|
||||||
#include <sys/mman.h>
|
|
||||||
#include <sys/ioctl.h>
|
|
||||||
#include <sys/mman.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <sys/time.h>
|
|
||||||
#include <sys/eventfd.h>
|
|
||||||
#include <linux/const.h>
|
|
||||||
#include <linux/kvm.h>
|
|
||||||
|
|
||||||
#include "uhyve.h"
|
|
||||||
#include "uhyve-syscalls.h"
|
|
||||||
#include "uhyve-migration.h"
|
|
||||||
#include "uhyve-net.h"
|
|
||||||
#include "uhyve-gdb.h"
|
|
||||||
#include "proxy.h"
|
|
||||||
|
|
||||||
static bool restart = false;
|
|
||||||
static bool migration = false;
|
|
||||||
static pthread_t net_thread;
|
|
||||||
static int* vcpu_fds = NULL;
|
|
||||||
static pthread_mutex_t kvm_lock = PTHREAD_MUTEX_INITIALIZER;
|
|
||||||
|
|
||||||
extern bool verbose;
|
|
||||||
|
|
||||||
static char* guest_path = NULL;
|
|
||||||
static bool uhyve_gdb_enabled = false;
|
|
||||||
size_t guest_size = 0x20000000ULL;
|
|
||||||
bool full_checkpoint = false;
|
|
||||||
pthread_barrier_t barrier;
|
|
||||||
pthread_barrier_t migration_barrier;
|
|
||||||
pthread_t* vcpu_threads = NULL;
|
|
||||||
uint8_t* klog = NULL;
|
|
||||||
uint8_t* guest_mem = NULL;
|
|
||||||
uint32_t no_checkpoint = 0;
|
|
||||||
uint32_t ncores = 1;
|
|
||||||
uint64_t elf_entry;
|
|
||||||
int kvm = -1, vmfd = -1, netfd = -1, efd = -1;
|
|
||||||
uint8_t* mboot = NULL;
|
|
||||||
__thread struct kvm_run *run = NULL;
|
|
||||||
__thread int vcpufd = -1;
|
|
||||||
__thread uint32_t cpuid = 0;
|
|
||||||
static sem_t net_sem;
|
|
||||||
|
|
||||||
int uhyve_argc = -1;
|
|
||||||
int uhyve_envc = -1;
|
|
||||||
char **uhyve_argv = NULL;
|
|
||||||
extern char **environ;
|
|
||||||
char **uhyve_envp = NULL;
|
|
||||||
|
|
||||||
vcpu_state_t *vcpu_thread_states = NULL;
|
|
||||||
static sigset_t signal_mask;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int argc;
|
|
||||||
int argsz[MAX_ARGC_ENVC];
|
|
||||||
int envc;
|
|
||||||
int envsz[MAX_ARGC_ENVC];
|
|
||||||
} __attribute__ ((packed)) uhyve_cmdsize_t;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
char **argv;
|
|
||||||
char **envp;
|
|
||||||
} __attribute__ ((packed)) uhyve_cmdval_t;
|
|
||||||
|
|
||||||
static uint64_t memparse(const char *ptr)
|
|
||||||
{
|
|
||||||
// local pointer to end of parsed string
|
|
||||||
char *endptr;
|
|
||||||
|
|
||||||
// parse number
|
|
||||||
uint64_t size = strtoull(ptr, &endptr, 0);
|
|
||||||
|
|
||||||
// parse size extension, intentional fall-through
|
|
||||||
switch (*endptr) {
|
|
||||||
case 'E':
|
|
||||||
case 'e':
|
|
||||||
size <<= 10;
|
|
||||||
case 'P':
|
|
||||||
case 'p':
|
|
||||||
size <<= 10;
|
|
||||||
case 'T':
|
|
||||||
case 't':
|
|
||||||
size <<= 10;
|
|
||||||
case 'G':
|
|
||||||
case 'g':
|
|
||||||
size <<= 10;
|
|
||||||
case 'M':
|
|
||||||
case 'm':
|
|
||||||
size <<= 10;
|
|
||||||
case 'K':
|
|
||||||
case 'k':
|
|
||||||
size <<= 10;
|
|
||||||
endptr++;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Just close file descriptor if not already done
|
|
||||||
static void close_fd(int* fd)
|
|
||||||
{
|
|
||||||
if (*fd != -1) {
|
|
||||||
close(*fd);
|
|
||||||
*fd = -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void uhyve_exit(void* arg)
|
|
||||||
{
|
|
||||||
//print_registers();
|
|
||||||
|
|
||||||
if (pthread_mutex_trylock(&kvm_lock))
|
|
||||||
{
|
|
||||||
close_fd(&vcpufd);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// only the main thread will execute this
|
|
||||||
if (vcpu_threads) {
|
|
||||||
for(uint32_t i=0; i<ncores; i++) {
|
|
||||||
if (pthread_self() == vcpu_threads[i])
|
|
||||||
continue;
|
|
||||||
|
|
||||||
pthread_kill(vcpu_threads[i], SIGTERM);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (netfd > 0)
|
|
||||||
pthread_kill(net_thread, SIGTERM);
|
|
||||||
}
|
|
||||||
|
|
||||||
close_fd(&vcpufd);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void uhyve_atexit(void)
|
|
||||||
{
|
|
||||||
uhyve_exit(NULL);
|
|
||||||
|
|
||||||
if (vcpu_threads) {
|
|
||||||
for(uint32_t i = 0; i < ncores; i++) {
|
|
||||||
if (pthread_self() == vcpu_threads[i])
|
|
||||||
continue;
|
|
||||||
pthread_join(vcpu_threads[i], NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
free(vcpu_threads);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (vcpu_fds)
|
|
||||||
free(vcpu_fds);
|
|
||||||
|
|
||||||
// clean up and close KVM
|
|
||||||
close_fd(&vmfd);
|
|
||||||
close_fd(&kvm);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void* wait_for_packet(void* arg)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
struct pollfd fds = { .fd = netfd,
|
|
||||||
.events = POLLIN,
|
|
||||||
.revents = 0};
|
|
||||||
|
|
||||||
while(1)
|
|
||||||
{
|
|
||||||
fds.revents = 0;
|
|
||||||
|
|
||||||
ret = poll(&fds, 1, -1000);
|
|
||||||
|
|
||||||
if (ret < 0 && errno == EINTR)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (ret < 0)
|
|
||||||
perror("poll()");
|
|
||||||
else if (ret) {
|
|
||||||
uint64_t event_counter = 1;
|
|
||||||
write(efd, &event_counter, sizeof(event_counter));
|
|
||||||
sem_wait(&net_sem);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void check_network(void)
|
|
||||||
{
|
|
||||||
// should we start the network thread?
|
|
||||||
if ((efd < 0) && (getenv("HERMIT_NETIF"))) {
|
|
||||||
struct kvm_irqfd irqfd = {};
|
|
||||||
|
|
||||||
efd = eventfd(0, 0);
|
|
||||||
irqfd.fd = efd;
|
|
||||||
irqfd.gsi = UHYVE_IRQ;
|
|
||||||
kvm_ioctl(vmfd, KVM_IRQFD, &irqfd);
|
|
||||||
|
|
||||||
sem_init(&net_sem, 0, 0);
|
|
||||||
|
|
||||||
if (pthread_create(&net_thread, NULL, wait_for_packet, NULL))
|
|
||||||
err(1, "unable to create thread");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int vcpu_loop(void)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
pthread_barrier_wait(&barrier);
|
|
||||||
|
|
||||||
if (restart) {
|
|
||||||
vcpu_state_t cpu_state = read_cpu_state();
|
|
||||||
restore_cpu_state(cpu_state);
|
|
||||||
} else if (vcpu_thread_states) {
|
|
||||||
restore_cpu_state(vcpu_thread_states[cpuid]);
|
|
||||||
} else {
|
|
||||||
init_cpu_state(elf_entry);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cpuid == 0) {
|
|
||||||
if (restart) {
|
|
||||||
no_checkpoint++;
|
|
||||||
} else if (migration) {
|
|
||||||
free(vcpu_thread_states);
|
|
||||||
vcpu_thread_states = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* init uhyve gdb support */
|
|
||||||
if (uhyve_gdb_enabled) {
|
|
||||||
if (cpuid == 0)
|
|
||||||
uhyve_gdb_init(vcpufd);
|
|
||||||
|
|
||||||
pthread_barrier_wait(&barrier);
|
|
||||||
}
|
|
||||||
|
|
||||||
while (1) {
|
|
||||||
ret = ioctl(vcpufd, KVM_RUN, NULL);
|
|
||||||
|
|
||||||
if(ret == -1) {
|
|
||||||
switch(errno) {
|
|
||||||
case EINTR:
|
|
||||||
continue;
|
|
||||||
|
|
||||||
case EFAULT: {
|
|
||||||
struct kvm_regs regs;
|
|
||||||
kvm_ioctl(vcpufd, KVM_GET_REGS, ®s);
|
|
||||||
#ifdef __x86_64__
|
|
||||||
err(1, "KVM: host/guest translation fault: rip=0x%llx", regs.rip);
|
|
||||||
#else
|
|
||||||
err(1, "KVM: host/guest translation fault: elr_el1=0x%llx", regs.elr_el1);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
err(1, "KVM: ioctl KVM_RUN in vcpu_loop for cpuid %d failed", cpuid);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t port = 0;
|
|
||||||
unsigned raddr = 0;
|
|
||||||
|
|
||||||
/* handle requests */
|
|
||||||
switch (run->exit_reason) {
|
|
||||||
case KVM_EXIT_HLT:
|
|
||||||
fprintf(stderr, "Guest has halted the CPU, this is considered as a normal exit.\n");
|
|
||||||
if (uhyve_gdb_enabled)
|
|
||||||
uhyve_gdb_handle_term();
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
case KVM_EXIT_MMIO:
|
|
||||||
port = run->mmio.phys_addr;
|
|
||||||
if (run->mmio.is_write)
|
|
||||||
memcpy(&raddr, run->mmio.data, sizeof(raddr) /*run->mmio.len*/);
|
|
||||||
//printf("KVM: handled KVM_EXIT_MMIO at 0x%lx (data %u)\n", port, raddr);
|
|
||||||
|
|
||||||
case KVM_EXIT_IO:
|
|
||||||
if (!port) {
|
|
||||||
port = run->io.port;
|
|
||||||
raddr = *((unsigned*)((size_t)run+run->io.data_offset));
|
|
||||||
}
|
|
||||||
|
|
||||||
//printf("port 0x%x\n", run->io.port);
|
|
||||||
switch (port) {
|
|
||||||
case UHYVE_UART_PORT:
|
|
||||||
if (verbose)
|
|
||||||
putc((unsigned char) raddr, stderr);
|
|
||||||
break;
|
|
||||||
case UHYVE_PORT_WRITE: {
|
|
||||||
uhyve_write_t* uhyve_write = (uhyve_write_t*) (guest_mem+raddr);
|
|
||||||
|
|
||||||
uhyve_write->len = write(uhyve_write->fd, guest_mem+(size_t)uhyve_write->buf, uhyve_write->len);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case UHYVE_PORT_READ: {
|
|
||||||
uhyve_read_t* uhyve_read = (uhyve_read_t*) (guest_mem+raddr);
|
|
||||||
|
|
||||||
uhyve_read->ret = read(uhyve_read->fd, guest_mem+(size_t)uhyve_read->buf, uhyve_read->len);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case UHYVE_PORT_EXIT: {
|
|
||||||
if (cpuid)
|
|
||||||
pthread_exit((int*)(guest_mem+raddr));
|
|
||||||
else
|
|
||||||
exit(*(int*)(guest_mem+raddr));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case UHYVE_PORT_OPEN: {
|
|
||||||
uhyve_open_t* uhyve_open = (uhyve_open_t*) (guest_mem+raddr);
|
|
||||||
char rpath[PATH_MAX];
|
|
||||||
|
|
||||||
// forbid to open the kvm device
|
|
||||||
if (realpath((const char*)guest_mem+(size_t)uhyve_open->name, rpath) < 0)
|
|
||||||
uhyve_open->ret = -1;
|
|
||||||
else if (strcmp(rpath, "/dev/kvm") == 0)
|
|
||||||
uhyve_open->ret = -1;
|
|
||||||
else
|
|
||||||
uhyve_open->ret = open((const char*)guest_mem+(size_t)uhyve_open->name, uhyve_open->flags, uhyve_open->mode);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case UHYVE_PORT_CLOSE: {
|
|
||||||
uhyve_close_t* uhyve_close = (uhyve_close_t*) (guest_mem+raddr);
|
|
||||||
|
|
||||||
if (uhyve_close->fd > 2)
|
|
||||||
uhyve_close->ret = close(uhyve_close->fd);
|
|
||||||
else
|
|
||||||
uhyve_close->ret = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case UHYVE_PORT_NETINFO: {
|
|
||||||
uhyve_netinfo_t* uhyve_netinfo = (uhyve_netinfo_t*)(guest_mem+raddr);
|
|
||||||
memcpy(uhyve_netinfo->mac_str, uhyve_get_mac(), 18);
|
|
||||||
// guest configure the ethernet device => start network thread
|
|
||||||
check_network();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case UHYVE_PORT_NETWRITE: {
|
|
||||||
uhyve_netwrite_t* uhyve_netwrite = (uhyve_netwrite_t*)(guest_mem + raddr);
|
|
||||||
uhyve_netwrite->ret = 0;
|
|
||||||
ret = write(netfd, guest_mem + (size_t)uhyve_netwrite->data, uhyve_netwrite->len);
|
|
||||||
if (ret >= 0) {
|
|
||||||
uhyve_netwrite->ret = 0;
|
|
||||||
uhyve_netwrite->len = ret;
|
|
||||||
} else {
|
|
||||||
uhyve_netwrite->ret = -1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case UHYVE_PORT_NETREAD: {
|
|
||||||
uhyve_netread_t* uhyve_netread = (uhyve_netread_t*)(guest_mem + raddr);
|
|
||||||
ret = read(netfd, guest_mem + (size_t)uhyve_netread->data, uhyve_netread->len);
|
|
||||||
if (ret > 0) {
|
|
||||||
uhyve_netread->len = ret;
|
|
||||||
uhyve_netread->ret = 0;
|
|
||||||
} else {
|
|
||||||
uhyve_netread->ret = -1;
|
|
||||||
sem_post(&net_sem);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case UHYVE_PORT_NETSTAT: {
|
|
||||||
uhyve_netstat_t* uhyve_netstat = (uhyve_netstat_t*)(guest_mem + raddr);
|
|
||||||
char* str = getenv("HERMIT_NETIF");
|
|
||||||
if (str)
|
|
||||||
uhyve_netstat->status = 1;
|
|
||||||
else
|
|
||||||
uhyve_netstat->status = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case UHYVE_PORT_LSEEK: {
|
|
||||||
uhyve_lseek_t* uhyve_lseek = (uhyve_lseek_t*) (guest_mem+raddr);
|
|
||||||
|
|
||||||
uhyve_lseek->offset = lseek(uhyve_lseek->fd, uhyve_lseek->offset, uhyve_lseek->whence);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case UHYVE_PORT_CMDSIZE: {
|
|
||||||
int i;
|
|
||||||
uhyve_cmdsize_t *val = (uhyve_cmdsize_t *) (guest_mem+raddr);
|
|
||||||
|
|
||||||
val->argc = uhyve_argc;
|
|
||||||
for(i=0; i<uhyve_argc; i++)
|
|
||||||
val->argsz[i] = strlen(uhyve_argv[i]) + 1;
|
|
||||||
|
|
||||||
val->envc = uhyve_envc;
|
|
||||||
for(i=0; i<uhyve_envc; i++)
|
|
||||||
val->envsz[i] = strlen(uhyve_envp[i]) + 1;
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case UHYVE_PORT_CMDVAL: {
|
|
||||||
int i;
|
|
||||||
char **argv_ptr, **env_ptr;
|
|
||||||
uhyve_cmdval_t *val = (uhyve_cmdval_t *) (guest_mem+raddr);
|
|
||||||
|
|
||||||
/* argv */
|
|
||||||
argv_ptr = (char **)(guest_mem + (size_t)val->argv);
|
|
||||||
for(i=0; i<uhyve_argc; i++)
|
|
||||||
strcpy(guest_mem + (size_t)argv_ptr[i], uhyve_argv[i]);
|
|
||||||
|
|
||||||
/* env */
|
|
||||||
env_ptr = (char **)(guest_mem + (size_t)val->envp);
|
|
||||||
for(i=0; i<uhyve_envc; i++)
|
|
||||||
strcpy(guest_mem + (size_t)env_ptr[i], uhyve_envp[i]);
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
err(1, "KVM: unhandled KVM_EXIT_IO / KVM_EXIT_MMIO at port 0x%lx\n", port);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case KVM_EXIT_FAIL_ENTRY:
|
|
||||||
if (uhyve_gdb_enabled)
|
|
||||||
uhyve_gdb_handle_exception(vcpufd, GDB_SIGNAL_SEGV);
|
|
||||||
err(1, "KVM: entry failure: hw_entry_failure_reason=0x%llx\n",
|
|
||||||
run->fail_entry.hardware_entry_failure_reason);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case KVM_EXIT_INTERNAL_ERROR:
|
|
||||||
if (uhyve_gdb_enabled)
|
|
||||||
uhyve_gdb_handle_exception(vcpufd, GDB_SIGNAL_SEGV);
|
|
||||||
err(1, "KVM: internal error exit: suberror = 0x%x\n", run->internal.suberror);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case KVM_EXIT_SHUTDOWN:
|
|
||||||
fprintf(stderr, "KVM: receive shutdown command\n");
|
|
||||||
|
|
||||||
case KVM_EXIT_DEBUG:
|
|
||||||
if (uhyve_gdb_enabled) {
|
|
||||||
uhyve_gdb_handle_exception(vcpufd, GDB_SIGNAL_TRAP);
|
|
||||||
break;
|
|
||||||
} else print_registers();
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
|
|
||||||
default:
|
|
||||||
fprintf(stderr, "KVM: unhandled exit: exit_reason = 0x%x\n", run->exit_reason);
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
close(vcpufd);
|
|
||||||
vcpufd = -1;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int vcpu_init(void)
|
|
||||||
{
|
|
||||||
vcpu_fds[cpuid] = vcpufd = kvm_ioctl(vmfd, KVM_CREATE_VCPU, cpuid);
|
|
||||||
|
|
||||||
/* Map the shared kvm_run structure and following data. */
|
|
||||||
size_t mmap_size = (size_t) kvm_ioctl(kvm, KVM_GET_VCPU_MMAP_SIZE, NULL);
|
|
||||||
|
|
||||||
if (mmap_size < sizeof(*run))
|
|
||||||
err(1, "KVM: invalid VCPU_MMAP_SIZE: %zd", mmap_size);
|
|
||||||
|
|
||||||
run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpufd, 0);
|
|
||||||
if (run == MAP_FAILED)
|
|
||||||
err(1, "KVM: VCPU mmap failed");
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void sigusr_handler(int signum)
|
|
||||||
{
|
|
||||||
pthread_barrier_wait(&barrier);
|
|
||||||
write_cpu_state();
|
|
||||||
|
|
||||||
pthread_barrier_wait(&barrier);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void vcpu_thread_mig_handler(int signum)
|
|
||||||
{
|
|
||||||
/* memory should be allocated at this point */
|
|
||||||
assert(vcpu_thread_states != NULL);
|
|
||||||
|
|
||||||
/* ensure consistency among VCPUs */
|
|
||||||
pthread_barrier_wait(&barrier);
|
|
||||||
|
|
||||||
/* save state */
|
|
||||||
vcpu_thread_states[cpuid] = save_cpu_state();
|
|
||||||
|
|
||||||
/* synchronize with migration thread */
|
|
||||||
pthread_barrier_wait(&migration_barrier);
|
|
||||||
|
|
||||||
/* wait to be killed */
|
|
||||||
pthread_barrier_wait(&migration_barrier);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void* uhyve_thread(void* arg)
|
|
||||||
{
|
|
||||||
size_t ret;
|
|
||||||
struct sigaction sa;
|
|
||||||
|
|
||||||
pthread_cleanup_push(uhyve_exit, NULL);
|
|
||||||
|
|
||||||
cpuid = (size_t) arg;
|
|
||||||
|
|
||||||
/* install signal handler for checkpoint */
|
|
||||||
memset(&sa, 0x00, sizeof(sa));
|
|
||||||
sa.sa_handler = &sigusr_handler;
|
|
||||||
sigaction(SIGTHRCHKP, &sa, NULL);
|
|
||||||
|
|
||||||
/* install signal handler for migration */
|
|
||||||
memset(&sa, 0x00, sizeof(sa));
|
|
||||||
sa.sa_handler = &vcpu_thread_mig_handler;
|
|
||||||
sigaction(SIGTHRMIG, &sa, NULL);
|
|
||||||
|
|
||||||
// create new cpu
|
|
||||||
vcpu_init();
|
|
||||||
|
|
||||||
pthread_barrier_wait(&barrier);
|
|
||||||
|
|
||||||
// run cpu loop until thread gets killed
|
|
||||||
ret = vcpu_loop();
|
|
||||||
|
|
||||||
pthread_cleanup_pop(1);
|
|
||||||
|
|
||||||
return (void*) ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
void sigterm_handler(int signum)
|
|
||||||
{
|
|
||||||
pthread_exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
int uhyve_init(char *path)
|
|
||||||
{
|
|
||||||
FILE *f = NULL;
|
|
||||||
guest_path = path;
|
|
||||||
|
|
||||||
signal(SIGTERM, sigterm_handler);
|
|
||||||
|
|
||||||
// register routine to close the VM
|
|
||||||
atexit(uhyve_atexit);
|
|
||||||
|
|
||||||
const char *start_mig_server = getenv("HERMIT_MIGRATION_SERVER");
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Three startups
|
|
||||||
* a) incoming migration
|
|
||||||
* b) load existing checkpoint
|
|
||||||
* c) normal run
|
|
||||||
*/
|
|
||||||
if (start_mig_server) {
|
|
||||||
migration = true;
|
|
||||||
migration_metadata_t metadata;
|
|
||||||
wait_for_incomming_migration(&metadata, MIGRATION_PORT);
|
|
||||||
|
|
||||||
ncores = metadata.ncores;
|
|
||||||
guest_size = metadata.guest_size;
|
|
||||||
elf_entry = metadata.elf_entry;
|
|
||||||
full_checkpoint = metadata.full_checkpoint;
|
|
||||||
} else if ((f = fopen("checkpoint/chk_config.txt", "r")) != NULL) {
|
|
||||||
int tmp = 0;
|
|
||||||
restart = true;
|
|
||||||
|
|
||||||
fscanf(f, "number of cores: %u\n", &ncores);
|
|
||||||
fscanf(f, "memory size: 0x%zx\n", &guest_size);
|
|
||||||
fscanf(f, "checkpoint number: %u\n", &no_checkpoint);
|
|
||||||
fscanf(f, "entry point: 0x%zx", &elf_entry);
|
|
||||||
fscanf(f, "full checkpoint: %d", &tmp);
|
|
||||||
full_checkpoint = tmp ? true : false;
|
|
||||||
|
|
||||||
if (verbose)
|
|
||||||
fprintf(stderr,
|
|
||||||
"Restart from checkpoint %u "
|
|
||||||
"(ncores %d, mem size 0x%zx)\n",
|
|
||||||
no_checkpoint, ncores, guest_size);
|
|
||||||
fclose(f);
|
|
||||||
} else {
|
|
||||||
const char* hermit_memory = getenv("HERMIT_MEM");
|
|
||||||
if (hermit_memory)
|
|
||||||
guest_size = memparse(hermit_memory);
|
|
||||||
|
|
||||||
const char* hermit_cpus = getenv("HERMIT_CPUS");
|
|
||||||
if (hermit_cpus)
|
|
||||||
ncores = (uint32_t) atoi(hermit_cpus);
|
|
||||||
|
|
||||||
const char* full_chk = getenv("HERMIT_FULLCHECKPOINT");
|
|
||||||
if (full_chk && (strcmp(full_chk, "0") != 0))
|
|
||||||
full_checkpoint = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
vcpu_threads = (pthread_t*) calloc(ncores, sizeof(pthread_t));
|
|
||||||
if (!vcpu_threads)
|
|
||||||
err(1, "Not enough memory");
|
|
||||||
|
|
||||||
vcpu_fds = (int*) calloc(ncores, sizeof(int));
|
|
||||||
if (!vcpu_fds)
|
|
||||||
err(1, "Not enough memory");
|
|
||||||
|
|
||||||
kvm = open("/dev/kvm", O_RDWR | O_CLOEXEC);
|
|
||||||
if (kvm < 0)
|
|
||||||
err(1, "Could not open: /dev/kvm");
|
|
||||||
|
|
||||||
/* Make sure we have the stable version of the API */
|
|
||||||
int kvm_api_version = kvm_ioctl(kvm, KVM_GET_API_VERSION, NULL);
|
|
||||||
if (kvm_api_version != 12)
|
|
||||||
err(1, "KVM: API version is %d, uhyve requires version 12", kvm_api_version);
|
|
||||||
|
|
||||||
/* Create the virtual machine */
|
|
||||||
vmfd = kvm_ioctl(kvm, KVM_CREATE_VM, 0);
|
|
||||||
|
|
||||||
#ifdef __x86_64__
|
|
||||||
init_kvm_arch();
|
|
||||||
if (restart) {
|
|
||||||
if (load_checkpoint(guest_mem, path) != 0)
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
} else if (start_mig_server) {
|
|
||||||
load_migration_data(guest_mem);
|
|
||||||
close_migration_channel();
|
|
||||||
} else {
|
|
||||||
if (load_kernel(guest_mem, path) != 0)
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
pthread_barrier_init(&barrier, NULL, ncores);
|
|
||||||
pthread_barrier_init(&migration_barrier, NULL, ncores+1);
|
|
||||||
cpuid = 0;
|
|
||||||
|
|
||||||
// create first CPU, it will be the boot processor by default
|
|
||||||
int ret = vcpu_init();
|
|
||||||
|
|
||||||
const char* netif_str = getenv("HERMIT_NETIF");
|
|
||||||
if (netif_str)
|
|
||||||
{
|
|
||||||
// TODO: strncmp for different network interfaces
|
|
||||||
// for example tun/tap device or uhyvetap device
|
|
||||||
netfd = uhyve_net_init(netif_str);
|
|
||||||
if (netfd < 0)
|
|
||||||
err(1, "unable to initialized network");
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
int uhyve_loop(int argc, char **argv)
|
|
||||||
{
|
|
||||||
const char* hermit_check = getenv("HERMIT_CHECKPOINT");
|
|
||||||
const char* hermit_mig_support = getenv("HERMIT_MIGRATION_SUPPORT");
|
|
||||||
const char* hermit_mig_type = getenv("HERMIT_MIGRATION_TYPE");
|
|
||||||
const char* hermit_debug = getenv("HERMIT_DEBUG");
|
|
||||||
int ts = 0, i = 0;
|
|
||||||
|
|
||||||
if (hermit_debug && (atoi(hermit_debug) != 0))
|
|
||||||
uhyve_gdb_enabled = true;
|
|
||||||
|
|
||||||
/* argv[0] is 'proxy', do not count it */
|
|
||||||
uhyve_argc = argc-1;
|
|
||||||
uhyve_argv = &argv[1];
|
|
||||||
uhyve_envp = environ;
|
|
||||||
while(uhyve_envp[i] != NULL)
|
|
||||||
i++;
|
|
||||||
uhyve_envc = i;
|
|
||||||
|
|
||||||
if (uhyve_argc > MAX_ARGC_ENVC) {
|
|
||||||
fprintf(stderr, "uhyve downsiize envc from %d to %d\n", uhyve_argc, MAX_ARGC_ENVC);
|
|
||||||
uhyve_argc = MAX_ARGC_ENVC;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (uhyve_envc > MAX_ARGC_ENVC-1) {
|
|
||||||
fprintf(stderr, "uhyve downsiize envc from %d to %d\n", uhyve_envc, MAX_ARGC_ENVC-1);
|
|
||||||
uhyve_envc = MAX_ARGC_ENVC-1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (uhyve_argc > MAX_ARGC_ENVC || uhyve_envc > MAX_ARGC_ENVC) {
|
|
||||||
fprintf(stderr, "uhyve cannot forward more than %d command line "
|
|
||||||
"arguments or environment variables, please consider increasing "
|
|
||||||
"the MAX_ARGC_ENVP cmake argument\n", MAX_ARGC_ENVC);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hermit_check)
|
|
||||||
ts = atoi(hermit_check);
|
|
||||||
|
|
||||||
if (hermit_mig_support) {
|
|
||||||
set_migration_target(hermit_mig_support, MIGRATION_PORT);
|
|
||||||
set_migration_type(hermit_mig_type);
|
|
||||||
|
|
||||||
/* block SIGUSR1 in main thread */
|
|
||||||
sigemptyset (&signal_mask);
|
|
||||||
sigaddset (&signal_mask, SIGUSR1);
|
|
||||||
pthread_sigmask (SIG_BLOCK, &signal_mask, NULL);
|
|
||||||
|
|
||||||
/* start migration thread; handles SIGUSR1 */
|
|
||||||
pthread_t sig_thr_id;
|
|
||||||
pthread_create (&sig_thr_id, NULL, migration_handler, (void *)&signal_mask);
|
|
||||||
|
|
||||||
/* install signal handler for migration */
|
|
||||||
struct sigaction sa;
|
|
||||||
memset(&sa, 0x00, sizeof(sa));
|
|
||||||
sa.sa_handler = &vcpu_thread_mig_handler;
|
|
||||||
sigaction(SIGTHRMIG, &sa, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// First CPU is special because it will boot the system. Other CPUs will
|
|
||||||
// be booted linearily after the first one.
|
|
||||||
vcpu_threads[0] = pthread_self();
|
|
||||||
|
|
||||||
// start threads to create VCPUs
|
|
||||||
for(size_t i = 1; i < ncores; i++)
|
|
||||||
pthread_create(&vcpu_threads[i], NULL, uhyve_thread, (void*) i);
|
|
||||||
|
|
||||||
pthread_barrier_wait(&barrier);
|
|
||||||
|
|
||||||
#ifdef __aarch64__
|
|
||||||
init_kvm_arch();
|
|
||||||
if (restart) {
|
|
||||||
if (load_checkpoint(guest_mem, guest_path) != 0)
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
} else {
|
|
||||||
if (load_kernel(guest_mem, guest_path) != 0)
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
*((uint32_t*) (mboot+0x24)) = ncores;
|
|
||||||
|
|
||||||
if (ts > 0)
|
|
||||||
{
|
|
||||||
struct sigaction sa;
|
|
||||||
struct itimerval timer;
|
|
||||||
|
|
||||||
/* Install timer_handler as the signal handler for SIGVTALRM. */
|
|
||||||
memset(&sa, 0x00, sizeof(sa));
|
|
||||||
sa.sa_handler = &timer_handler;
|
|
||||||
sigaction(SIGALRM, &sa, NULL);
|
|
||||||
|
|
||||||
/* Configure the timer to expire after "ts" sec... */
|
|
||||||
timer.it_value.tv_sec = ts;
|
|
||||||
timer.it_value.tv_usec = 0;
|
|
||||||
/* ... and every "ts" sec after that. */
|
|
||||||
timer.it_interval.tv_sec = ts;
|
|
||||||
timer.it_interval.tv_usec = 0;
|
|
||||||
/* Start a virtual timer. It counts down whenever this process is executing. */
|
|
||||||
setitimer(ITIMER_REAL, &timer, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Run first CPU
|
|
||||||
return vcpu_loop();
|
|
||||||
}
|
|
109
tools/uhyve.h
109
tools/uhyve.h
|
@ -1,109 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2018, Stefan Lankes, RWTH Aachen University
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of the University nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this
|
|
||||||
* software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
||||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
||||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
||||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __UHYVE_H__
|
|
||||||
#define __UHYVE_H__
|
|
||||||
|
|
||||||
#include <err.h>
|
|
||||||
#include <linux/kvm.h>
|
|
||||||
|
|
||||||
#define UHYVE_PORT_WRITE 0x400
|
|
||||||
#define UHYVE_PORT_OPEN 0x440
|
|
||||||
#define UHYVE_PORT_CLOSE 0x480
|
|
||||||
#define UHYVE_PORT_READ 0x500
|
|
||||||
#define UHYVE_PORT_EXIT 0x540
|
|
||||||
#define UHYVE_PORT_LSEEK 0x580
|
|
||||||
|
|
||||||
// Networkports
|
|
||||||
#define UHYVE_PORT_NETINFO 0x600
|
|
||||||
#define UHYVE_PORT_NETWRITE 0x640
|
|
||||||
#define UHYVE_PORT_NETREAD 0x680
|
|
||||||
#define UHYVE_PORT_NETSTAT 0x700
|
|
||||||
|
|
||||||
/* Ports and data structures for uhyve command line arguments and envp
|
|
||||||
* forwarding */
|
|
||||||
#define UHYVE_PORT_CMDSIZE 0x740
|
|
||||||
#define UHYVE_PORT_CMDVAL 0x780
|
|
||||||
|
|
||||||
#define UHYVE_UART_PORT 0x800
|
|
||||||
|
|
||||||
#define UHYVE_IRQ 11
|
|
||||||
|
|
||||||
#define SIGTHRCHKP (SIGRTMIN+0)
|
|
||||||
#define SIGTHRMIG (SIGRTMIN+1)
|
|
||||||
|
|
||||||
#define kvm_ioctl(fd, cmd, arg) ({ \
|
|
||||||
const int ret = ioctl(fd, cmd, arg); \
|
|
||||||
if(ret == -1) \
|
|
||||||
err(1, "KVM: ioctl " #cmd " failed"); \
|
|
||||||
ret; \
|
|
||||||
})
|
|
||||||
|
|
||||||
#ifdef __x86_64__
|
|
||||||
#define MAX_MSR_ENTRIES 25
|
|
||||||
struct msr_data {
|
|
||||||
struct kvm_msrs info;
|
|
||||||
struct kvm_msr_entry entries[MAX_MSR_ENTRIES];
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct _vcpu_state {
|
|
||||||
struct msr_data msr_data;
|
|
||||||
struct kvm_regs regs;
|
|
||||||
struct kvm_sregs sregs;
|
|
||||||
struct kvm_fpu fpu;
|
|
||||||
struct kvm_lapic_state lapic;
|
|
||||||
struct kvm_xsave xsave;
|
|
||||||
struct kvm_xcrs xcrs;
|
|
||||||
struct kvm_vcpu_events events;
|
|
||||||
struct kvm_mp_state mp_state;
|
|
||||||
} vcpu_state_t;
|
|
||||||
#else
|
|
||||||
typedef struct _vcpu_state {
|
|
||||||
int dummy;
|
|
||||||
} vcpu_state_t;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef struct _migration_metadata migration_metadata_t;
|
|
||||||
|
|
||||||
void print_registers(void);
|
|
||||||
void timer_handler(int signum);
|
|
||||||
void *migration_handler(void *arg);
|
|
||||||
void restore_cpu_state(vcpu_state_t cpu_state);
|
|
||||||
vcpu_state_t read_cpu_state(void);
|
|
||||||
vcpu_state_t save_cpu_state(void);
|
|
||||||
void write_cpu_state(void);
|
|
||||||
void init_cpu_state(uint64_t elf_entry);
|
|
||||||
int load_kernel(uint8_t* mem, char* path);
|
|
||||||
int load_checkpoint(uint8_t* mem, char* path);
|
|
||||||
int load_migration_data(uint8_t* mem);
|
|
||||||
void wait_for_incomming_migration(migration_metadata_t *metadata, uint16_t listen_portno);
|
|
||||||
void init_kvm_arch(void);
|
|
||||||
int load_kernel(uint8_t* mem, char* path);
|
|
||||||
size_t determine_dest_offset(size_t src_addr);
|
|
||||||
void determine_dirty_pages(void (*save_page_handler)(void*, size_t, void*, size_t));
|
|
||||||
|
|
||||||
#endif
|
|
175
tools/utils.c
175
tools/utils.c
|
@ -1,175 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2017, Stefan Lankes, RWTH Aachen University
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of the University nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this
|
|
||||||
* software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
||||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
||||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
||||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define _GNU_SOURCE
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <errno.h>
|
|
||||||
#include <limits.h>
|
|
||||||
|
|
||||||
#include "proxy.h"
|
|
||||||
|
|
||||||
#ifdef __x86_64__
|
|
||||||
inline static void __cpuid(uint32_t code, uint32_t* a, uint32_t* b, uint32_t* c, uint32_t* d)
|
|
||||||
{
|
|
||||||
__asm volatile ("cpuid" : "=a"(*a), "=b"(*b), "=c"(*c), "=d"(*d) : "0"(code), "2"(*c));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to determine the frequency from the CPU brand.
|
|
||||||
// Code is derived from the manual "Intel Processor
|
|
||||||
// Identification and the CPUID Instruction".
|
|
||||||
static uint32_t get_frequency_from_brand(void)
|
|
||||||
{
|
|
||||||
char cpu_brand[4*3*sizeof(uint32_t)+1] = {[0 ... 4*3*sizeof(uint32_t)] = 0};
|
|
||||||
uint32_t* bint = (uint32_t*) cpu_brand;
|
|
||||||
uint32_t index, multiplier = 0;
|
|
||||||
uint32_t cpu_freq = 0;
|
|
||||||
uint32_t extended;
|
|
||||||
|
|
||||||
__cpuid(0x80000000, &extended, bint+1, bint+2, bint+3);
|
|
||||||
if (extended < 0x80000004)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
__cpuid(0x80000002, bint+0, bint+1, bint+2, bint+3);
|
|
||||||
__cpuid(0x80000003, bint+4, bint+5, bint+6, bint+7);
|
|
||||||
__cpuid(0x80000004, bint+8, bint+9, bint+10, bint+11);
|
|
||||||
|
|
||||||
for(index=0; index<sizeof(cpu_brand)-2; index++)
|
|
||||||
{
|
|
||||||
if ((cpu_brand[index+1] == 'H') && (cpu_brand[index+2] == 'z'))
|
|
||||||
{
|
|
||||||
if (cpu_brand[index] == 'M')
|
|
||||||
multiplier = 1;
|
|
||||||
else if (cpu_brand[index] == 'G')
|
|
||||||
multiplier = 1000;
|
|
||||||
else if (cpu_brand[index] == 'T')
|
|
||||||
multiplier = 1000000;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (multiplier > 0) {
|
|
||||||
uint32_t freq;
|
|
||||||
|
|
||||||
// Compute frequency (in MHz) from brand string
|
|
||||||
if (cpu_brand[index-3] == '.') { // If format is “x.xx”
|
|
||||||
freq = (uint32_t)(cpu_brand[index-4] - '0') * multiplier;
|
|
||||||
freq += (uint32_t)(cpu_brand[index-2] - '0') * (multiplier / 10);
|
|
||||||
freq += (uint32_t)(cpu_brand[index-1] - '0') * (multiplier / 100);
|
|
||||||
} else { // If format is xxxx
|
|
||||||
freq = (uint32_t)(cpu_brand[index-4] - '0') * 1000;
|
|
||||||
freq += (uint32_t)(cpu_brand[index-3] - '0') * 100;
|
|
||||||
freq += (uint32_t)(cpu_brand[index-2] - '0') * 10;
|
|
||||||
freq += (uint32_t)(cpu_brand[index-1] - '0');
|
|
||||||
freq *= multiplier;
|
|
||||||
}
|
|
||||||
|
|
||||||
return freq;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
uint32_t get_cpufreq(void)
|
|
||||||
{
|
|
||||||
char line[128];
|
|
||||||
uint32_t freq = 0;
|
|
||||||
char* match;
|
|
||||||
|
|
||||||
#ifdef __x86_64__
|
|
||||||
freq = get_frequency_from_brand();
|
|
||||||
if (freq > 0)
|
|
||||||
return freq;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// TODO: fallback solution, on some systems is cpuinfo_max_freq the turbo frequency
|
|
||||||
// => wrong value
|
|
||||||
FILE* fp = fopen("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", "r");
|
|
||||||
if (fp != NULL) {
|
|
||||||
if (fgets(line, sizeof(line), fp) != NULL) {
|
|
||||||
// cpuinfo_max_freq is in kHz
|
|
||||||
freq = (uint32_t) atoi(line) / 1000;
|
|
||||||
}
|
|
||||||
|
|
||||||
fclose(fp);
|
|
||||||
} else if( (fp = fopen("/proc/cpuinfo", "r")) ) {
|
|
||||||
// Resorting to /proc/cpuinfo, however on most systems this will only
|
|
||||||
// return the current frequency that might change over time.
|
|
||||||
// Currently only needed when running inside a VM
|
|
||||||
|
|
||||||
// read until we find the line indicating cpu frequency
|
|
||||||
while(fgets(line, sizeof(line), fp) != NULL) {
|
|
||||||
match = strstr(line, "cpu MHz");
|
|
||||||
|
|
||||||
if(match != NULL) {
|
|
||||||
// advance pointer to beginning of number
|
|
||||||
while( ((*match < '0') || (*match > '9')) && (*match != '\0') )
|
|
||||||
match++;
|
|
||||||
|
|
||||||
freq = (uint32_t) atoi(match);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fclose(fp);
|
|
||||||
}
|
|
||||||
|
|
||||||
return freq;
|
|
||||||
}
|
|
||||||
|
|
||||||
ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset)
|
|
||||||
{
|
|
||||||
ssize_t total = 0;
|
|
||||||
char *p = buf;
|
|
||||||
|
|
||||||
if (count > SSIZE_MAX) {
|
|
||||||
errno = E2BIG;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (count > 0) {
|
|
||||||
ssize_t nr;
|
|
||||||
|
|
||||||
nr = pread(fd, p, count, offset);
|
|
||||||
if (nr == 0)
|
|
||||||
return total;
|
|
||||||
else if (nr == -1 && errno == EINTR)
|
|
||||||
continue;
|
|
||||||
else if (nr == -1)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
count -= nr;
|
|
||||||
total += nr;
|
|
||||||
p += nr;
|
|
||||||
offset += nr;
|
|
||||||
}
|
|
||||||
|
|
||||||
return total;
|
|
||||||
}
|
|
Loading…
Add table
Reference in a new issue