1
0
Fork 0
mirror of https://github.com/hermitcore/libhermit.git synced 2025-03-30 00:00:15 +01:00

use interrupts to notifiy incoming packets

=> avoid busy waiting in uhyve's network adapter
This commit is contained in:
Stefan Lankes 2017-09-18 00:23:09 +02:00
parent 28606578e4
commit c6157e3ef7
7 changed files with 167 additions and 62 deletions

View file

@ -238,8 +238,9 @@ host system. For instance, the following command establish the tap device
```bash
$ sudo ip tuntap add tap100 mode tap
$ sudo ip addr add 10.0.5.1/24 dev tap100
$ sudo ip addr add 10.0.5.1/24 broadcast 10.0.5.255 dev tap100
$ sudo ip link set dev tap100 up
$ sudo bash -c 'echo 1 > /proc/sys/net/ipv4/conf/tap100/proxy_arp'
```
Per default, `uhyve`'s network interface uses `10.0.5.2`as IP address, `10.0.5.1`

View file

@ -44,6 +44,7 @@
#include <hermit/mailbox.h>
#include <hermit/logging.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <sys/poll.h>
#include <lwip/sys.h>
#include <lwip/netif.h>
@ -57,6 +58,8 @@
#include "uhyve-net.h"
#define UHYVE_IRQ 11
static int8_t uhyve_net_init_ok = 0;
static struct netif* mynetif = NULL;
@ -72,20 +75,24 @@ static int uhyve_net_write_sync(uint8_t *data, int n)
return uhyve_netwrite.ret;
}
int uhyve_net_stat(void) {
int uhyve_net_stat(void)
{
volatile uhyve_netstat_t uhyve_netstat;
outportl(UHYVE_PORT_NETSTAT, (unsigned)virt_to_phys((size_t)&uhyve_netstat));
return uhyve_netstat.status;
}
static int uhyve_net_read_sync(uint8_t *data, int *n)
{
volatile uhyve_netread_t uhyve_netread;
uhyve_netread.data = (uint8_t*)virt_to_phys((size_t)data);
uhyve_netread.len = *n;
uhyve_netread.ret = 0;
outportl(UHYVE_PORT_NETREAD, (unsigned)virt_to_phys((size_t)&uhyve_netread));
outportl(UHYVE_PORT_NETREAD, (unsigned)virt_to_phys((size_t)&uhyve_netread));
*n = uhyve_netread.len;
return uhyve_netread.ret;
@ -122,10 +129,12 @@ static err_t uhyve_netif_output(struct netif* netif, struct pbuf* p)
uint8_t transmitid = uhyve_netif->tx_queue % TX_BUF_NUM;
uint32_t i;
struct pbuf *q;
if(BUILTIN_EXPECT((uhyve_netif->tx_queue - uhyve_netif->tx_complete) > (TX_BUF_NUM - 1), 0)) {
LOG_ERROR("uhyve_netif_output: too many packets at once\n");
return ERR_IF;
}
if(BUILTIN_EXPECT(p->tot_len > 1792, 0)) {
LOG_ERROR("uhyve_netif_output: packet (%i bytes) is longer than 1792 bytes\n", p->tot_len);
return ERR_IF;
@ -163,25 +172,24 @@ static err_t uhyve_netif_output(struct netif* netif, struct pbuf* p)
uhyve_netif->tx_complete++;
uhyve_netif->tx_inuse[transmitid] = 0;
// LOG_INFO("Transmit OK | queue = %i, complete = %i \n", uhyve_netif->tx_queue, uhyve_netif->tx_complete);
return ERR_OK;
return ERR_OK;
}
//------------------------------- POLLING ----------------------------------------
//uint64_t last_poll = 0;
static int polling;
void uhyve_netif_poll(void) {
if (!uhyve_net_init_ok || polling) {
static void uhyve_netif_poll(struct netif* netif)
{
if (!uhyve_net_init_ok)
return;
}
polling = 1;
uhyve_netif_t* uhyve_netif = mynetif->state;
uhyve_netif_t* uhyve_netif = netif->state;
int len = RX_BUF_LEN;
struct pbuf *p = NULL;
struct pbuf *q;
if (uhyve_net_read_sync(uhyve_netif->rx_buf, &len) == 0) {
if (uhyve_net_read_sync(uhyve_netif->rx_buf, &len) == 0)
{
#if ETH_PAD_SIZE
len += ETH_PAD_SIZE; /*allow room for Ethernet padding */
#endif
@ -200,19 +208,24 @@ void uhyve_netif_poll(void) {
#endif
LINK_STATS_INC(link.recv);
//forward packet to LwIP
mynetif->input(p, mynetif);
netif->input(p, mynetif);
} else {
LOG_ERROR("uhyve_netif_poll: not enough memory!\n");
LINK_STATS_INC(link.memerr);
LINK_STATS_INC(link.drop);
}
}
polling = 0;
}
static void uhyve_irqhandler(struct state* s)
{
uhyve_netif_poll(mynetif);
}
//--------------------------------- INIT -----------------------------------------
err_t uhyve_netif_init (struct netif* netif) {
err_t uhyve_netif_init (struct netif* netif)
{
uhyve_netif_t* uhyve_netif;
uint8_t tmp8 = 0;
static uint8_t num = 0;
@ -231,7 +244,7 @@ err_t uhyve_netif_init (struct netif* netif) {
kfree(uhyve_netif);
return ERR_MEM;
}
memset(uhyve_netif->rx_buf, 0x00, RX_BUF_LEN +16);
memset(uhyve_netif->rx_buf, 0x00, RX_BUF_LEN + 16);
uhyve_netif->tx_buf[0] = page_alloc(TX_BUF_NUM * TX_BUF_LEN, VMA_READ|VMA_WRITE);
if (!(uhyve_netif->tx_buf[0])) {
@ -258,16 +271,13 @@ err_t uhyve_netif_init (struct netif* netif) {
netif->hwaddr[tmp8] = dehex(*hermit_mac++) << 4;
netif->hwaddr[tmp8] |= dehex(*hermit_mac++);
hermit_mac++;
LWIP_DEBUGF(NETIF_DEBUG, ("%02x ", netif->hwaddr[tmp8]));
LWIP_DEBUGF(NETIF_DEBUG, ("%02x ", netif->hwaddr[tmp8]));
}
LWIP_DEBUGF(NETIF_DEBUG, ("\n"));
uhyve_netif->ethaddr = (struct eth_addr *)netif->hwaddr;
if (ETHARP_SUPPORT_VLAN) {
LOG_INFO("ETHARP_SUPPORT_VLAN: enabled\n");
} else {
LOG_INFO("ETHARP_SUPPORT_VLAN: disabled\n");
}
LOG_INFO("uhye_netif uses irq %d\n", UHYVE_IRQ);
irq_install_handler(32+UHYVE_IRQ, uhyve_irqhandler);
netif->name[0] = 'e';
netif->name[1] = 'n';
@ -278,7 +288,7 @@ err_t uhyve_netif_init (struct netif* netif) {
/* maximum transfer unit */
netif->mtu = 1500;
/* broadcast capability */
netif->flags |= NETIF_FLAG_BROADCAST | NETIF_FLAG_ETHARP | NETIF_FLAG_IGMP | NETIF_FLAG_LINK_UP | NETIF_FLAG_MLD6;
netif->flags |= NETIF_FLAG_BROADCAST | NETIF_FLAG_ETHARP | NETIF_FLAG_IGMP | NETIF_FLAG_LINK_UP | NETIF_FLAG_MLD6;
#if LWIP_IPV6
netif->output_ip6 = ethip6_output;
@ -288,5 +298,6 @@ err_t uhyve_netif_init (struct netif* netif) {
LOG_INFO("uhyve_netif_init: OK\n");
uhyve_net_init_ok = 1;
return ERR_OK;
}

View file

@ -91,6 +91,5 @@ typedef struct uhyve_netif {
err_t uhyve_netif_init(struct netif* netif);
int uhyve_net_stat(void);
void uhyve_netif_poll(void);
#endif

View file

@ -254,7 +254,6 @@ void check_ticks(void);
*/
void shutdown_system(void);
extern volatile uint32_t go_down;
static inline void check_workqueues_in_irqhandler(int irq)
{

View file

@ -181,6 +181,8 @@ static int init_netifs(void)
LOG_INFO("set_default\n");
netifapi_netif_set_up(&default_netif);
LOG_INFO("set_up\n");
} else {
return -ENODEV;
}
} else if (!is_single_kernel()) {
LOG_INFO("HermitCore is running side-by-side to Linux!\n");

View file

@ -36,7 +36,8 @@
static uhyve_netinfo_t netinfo;
//-------------------------------------- ATTACH LINUX TAP -----------------------------------------//
int attach_linux_tap(const char *dev) {
int attach_linux_tap(const char *dev)
{
struct ifreq ifr;
int fd, err;
@ -52,7 +53,7 @@ int attach_linux_tap(const char *dev) {
fd = open("/dev/net/tun", O_RDWR | O_NONBLOCK);
// Initialize interface request for TAP interface
memset(&ifr, 0, sizeof(ifr));
memset(&ifr, 0x00, sizeof(ifr));
ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
if (strlen(dev) > IFNAMSIZ) {
@ -67,16 +68,17 @@ int attach_linux_tap(const char *dev) {
// create before a tap device with these commands:
//
// sudo ip tuntap add <devname> mode tap user <user>
// sudo ip addr add 10.0.5.1/24 dev <devname>
// sudo ip addr add 10.0.5.1/24 broadcast 10.0.5.255
// sudo ip link set dev <devname> up
//
if (ioctl(fd, TUNSETIFF, (void *)&ifr) == -1) {
if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
err = errno;
close(fd);
errno = err;
return -1;
}
// If we got back a different device than the one requested, e.g. because
// the caller mistakenly passed in '%d' (yes, that's really in the Linux API)
// then fail
@ -100,19 +102,23 @@ int attach_linux_tap(const char *dev) {
errno = ENODEV;
return -1;
}
return fd;
}
//---------------------------------- GET MAC ----------------------------------------------//
char* uhyve_get_mac(void) {
char* uhyve_get_mac(void)
{
return netinfo.mac_str;
}
//---------------------------------- SET MAC ----------------------------------------------//
int uhyve_set_mac(void) {
int uhyve_set_mac(void)
{
int mac_is_set = 0;
uint8_t guest_mac[6];
char* str = getenv("HERMIT_NETIF_MAC");
if (str)
{
@ -125,7 +131,7 @@ int uhyve_set_mac(void) {
if(isxdigit(*v_macptr)) {
i++;
} else if (*v_macptr == ':') {
if (i /2 - 1 != s++ )
if (i / 2 - 1 != s++)
break;
} else {
s = -1;

View file

@ -45,10 +45,10 @@
#include <sched.h>
#include <signal.h>
#include <limits.h>
#include <assert.h>
#include <pthread.h>
#include <elf.h>
#include <err.h>
#include <poll.h>
#include <sys/wait.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
@ -57,6 +57,7 @@
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/eventfd.h>
#include <linux/const.h>
#include <linux/kvm.h>
#include <asm/msr-index.h>
@ -158,10 +159,18 @@
#define UHYVE_PORT_NETREAD 0x507
#define UHYVE_PORT_NETSTAT 0x508
#define UHYVE_IRQ 11
#define IOAPIC_DEFAULT_BASE 0xfec00000
#define APIC_DEFAULT_BASE 0xfee00000
static bool restart = false;
static bool cap_tsc_deadline = false;
static bool cap_irqchip = false;
static bool cap_adjust_clock_stable = false;
static bool cap_irqfd = false;
static bool cap_vapic = false;
static bool verbose = false;
static bool full_checkpoint = false;
static uint32_t ncores = 1;
@ -171,8 +180,9 @@ static uint8_t* mboot = NULL;
static size_t guest_size = 0x20000000ULL;
static uint64_t elf_entry;
static pthread_t* vcpu_threads = NULL;
static pthread_t net_thread;
static int* vcpu_fds = NULL;
static int kvm = -1, vmfd = -1, netfd = -1;
static int kvm = -1, vmfd = -1, netfd = -1, efd = -1;
static uint32_t no_checkpoint = 0;
static pthread_mutex_t kvm_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_barrier_t barrier;
@ -241,6 +251,9 @@ static void uhyve_exit(void* arg)
pthread_kill(vcpu_threads[i], SIGTERM);
}
if (netfd > 0)
pthread_kill(net_thread, SIGTERM);
}
close_fd(&vcpufd);
@ -766,6 +779,49 @@ static void setup_cpuid(int kvm, int vcpufd)
free(kvm_cpuid);
}
static void* wait_for_packet(void* arg)
{
int ret;
struct pollfd fds = { .fd = netfd,
.events = POLLIN,
.revents = 0};
while(1)
{
fds.revents = 0;
ret = poll(&fds, 1, -1000);
if (ret < 0 && errno == EINTR)
continue;
if (ret < 0)
perror("poll()");
else if (ret) {
uint64_t event_counter = 1;
write(efd, &event_counter, sizeof(event_counter));
}
}
return NULL;
}
static inline void check_network(void)
{
// should we start the network thread?
if ((efd < 0) && (getenv("HERMIT_NETIF"))) {
struct kvm_irqfd irqfd = {};
efd = eventfd(0, 0);
irqfd.fd = efd;
irqfd.gsi = UHYVE_IRQ;
kvm_ioctl(vmfd, KVM_IRQFD, &irqfd);
if (pthread_create(&net_thread, NULL, wait_for_packet, NULL))
err(1, "unable to create thread");
}
}
static int vcpu_loop(void)
{
int ret;
@ -854,47 +910,48 @@ static int vcpu_loop(void)
break;
}
case UHYVE_PORT_NETINFO: {
case UHYVE_PORT_NETINFO: {
unsigned data = *((unsigned*)((size_t)run+run->io.data_offset));
uhyve_netinfo_t* uhyve_netinfo = (uhyve_netinfo_t*)(guest_mem+data);
memcpy(uhyve_netinfo->mac_str, uhyve_get_mac(), 18);
// guest configure the ethernet device => start network thread
check_network();
break;
}
case UHYVE_PORT_NETWRITE: {
case UHYVE_PORT_NETWRITE: {
unsigned data = *((unsigned*)((size_t)run+run->io.data_offset));
uhyve_netwrite_t* uhyve_netwrite = (uhyve_netwrite_t*)(guest_mem + data);
int ret;
ret = write(netfd, guest_mem + (size_t)uhyve_netwrite->data, uhyve_netwrite->len);
assert(uhyve_netwrite->len == ret);
uhyve_netwrite->ret = 0;
ret = write(netfd, guest_mem + (size_t)uhyve_netwrite->data, uhyve_netwrite->len);
if (ret >= 0) {
uhyve_netwrite->ret = 0;
uhyve_netwrite->len = ret;
} else {
uhyve_netwrite->ret = -1;
}
break;
}
case UHYVE_PORT_NETREAD: {
case UHYVE_PORT_NETREAD: {
unsigned data = *((unsigned*)((size_t)run+run->io.data_offset));
uhyve_netread_t* uhyve_netread = (uhyve_netread_t*)(guest_mem + data);
int ret;
ret = read(netfd, guest_mem + (size_t)uhyve_netread->data, uhyve_netread->len);
if ((ret == 0) || (ret == -1 && errno == EAGAIN)) {
uhyve_netread->ret = -1;
break;
}
assert(ret > 0);
uhyve_netread->len = ret;
uhyve_netread->ret = 0;
if (ret > 0) {
uhyve_netread->len = ret;
uhyve_netread->ret = 0;
} else uhyve_netread->ret = -1;
break;
}
case UHYVE_PORT_NETSTAT: {
case UHYVE_PORT_NETSTAT: {
unsigned status = *((unsigned*)((size_t)run+run->io.data_offset));
uhyve_netstat_t* uhyve_netstat = (uhyve_netstat_t*)(guest_mem + status);
char* str = getenv("HERMIT_NETIF");
if (str) {
if (str)
uhyve_netstat->status = 1;
} else {
else
uhyve_netstat->status = 0;
}
break;
}
@ -905,6 +962,7 @@ static int vcpu_loop(void)
uhyve_lseek->offset = lseek(uhyve_lseek->fd, uhyve_lseek->offset, uhyve_lseek->whence);
break;
}
default:
err(1, "KVM: unhandled KVM_EXIT_IO at port 0x%x, direction %d\n", run->io.port, run->io.direction);
break;
@ -958,6 +1016,7 @@ static int vcpu_init(void)
if (run == MAP_FAILED)
err(1, "KVM: VCPU mmap failed");
run->apic_base = APIC_DEFAULT_BASE;
setup_cpuid(kvm, vcpufd);
if (restart) {
@ -1220,7 +1279,7 @@ int uhyve_init(char *path)
if (guest_mem == MAP_FAILED)
err(1, "mmap failed");
} else {
guest_size += + KVM_32BIT_GAP_SIZE;
guest_size += KVM_32BIT_GAP_SIZE;
guest_mem = mmap(NULL, guest_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (guest_mem == MAP_FAILED)
err(1, "mmap failed");
@ -1269,14 +1328,6 @@ int uhyve_init(char *path)
kvm_ioctl(vmfd, KVM_CREATE_IRQCHIP, NULL);
const char* netif_str = getenv("HERMIT_NETIF");
if (netif_str)
{
//TODO: strncmp for different network interfaces
// for example tun/tap device or uhyvetap device
netfd = uhyve_net_init(netif_str);
}
#ifdef KVM_CAP_X2APIC_API
// enable x2APIC support
struct kvm_enable_cap cap = {
@ -1287,12 +1338,36 @@ int uhyve_init(char *path)
kvm_ioctl(vmfd, KVM_ENABLE_CAP, &cap);
#endif
// initialited IOAPIC with HermitCore's default settings
struct kvm_irqchip chip;
chip.chip_id = KVM_IRQCHIP_IOAPIC;
kvm_ioctl(vmfd, KVM_GET_IRQCHIP, &chip);
for(int i=0; i<KVM_IOAPIC_NUM_PINS; i++) {
chip.chip.ioapic.redirtbl[i].fields.vector = 0x20+i;
chip.chip.ioapic.redirtbl[i].fields.delivery_mode = 0;
chip.chip.ioapic.redirtbl[i].fields.dest_mode = 0;
chip.chip.ioapic.redirtbl[i].fields.delivery_status = 0;
chip.chip.ioapic.redirtbl[i].fields.polarity = 0;
chip.chip.ioapic.redirtbl[i].fields.remote_irr = 0;
chip.chip.ioapic.redirtbl[i].fields.trig_mode = 0;
chip.chip.ioapic.redirtbl[i].fields.mask = i != 2 ? 0 : 1;
chip.chip.ioapic.redirtbl[i].fields.dest_id = 0;
}
kvm_ioctl(vmfd, KVM_SET_IRQCHIP, &chip);
// try to detect KVM extensions
cap_tsc_deadline = kvm_ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_TSC_DEADLINE_TIMER) <= 0 ? false : true;
cap_irqchip = kvm_ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP) <= 0 ? false : true;
#ifdef KVM_CLOCK_TSC_STABLE
cap_adjust_clock_stable = kvm_ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_ADJUST_CLOCK) == KVM_CLOCK_TSC_STABLE ? true : false;
#endif
cap_irqfd = kvm_ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_IRQFD) <= 0 ? false : true;
if (!cap_irqfd)
err(1, "the support of KVM_CAP_IRQFD is curently required");
// TODO: add VAPIC support
cap_vapic = kvm_ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_VAPIC) <= 0 ? false : true;
//if (cap_vapic)
// printf("System supports vapic\n");
if (restart) {
if (load_checkpoint(guest_mem, path) != 0)
@ -1306,7 +1381,19 @@ int uhyve_init(char *path)
cpuid = 0;
// create first CPU, it will be the boot processor by default
return vcpu_init();
int ret = vcpu_init();
const char* netif_str = getenv("HERMIT_NETIF");
if (netif_str)
{
// TODO: strncmp for different network interfaces
// for example tun/tap device or uhyvetap device
netfd = uhyve_net_init(netif_str);
if (netfd < 0)
err(1, "unable to initialized network");
}
return ret;
}
static void timer_handler(int signum)