From c6157e3ef766dbbd4f31f150338bb3d1ca8cd13f Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Mon, 18 Sep 2017 00:23:09 +0200 Subject: [PATCH] use interrupts to notifiy incoming packets => avoid busy waiting in uhyve's network adapter --- README.md | 3 +- drivers/net/uhyve-net.c | 55 +++++++++------ drivers/net/uhyve-net.h | 1 - include/hermit/tasks.h | 1 - kernel/main.c | 2 + tools/uhyve-net.c | 20 ++++-- tools/uhyve.c | 147 ++++++++++++++++++++++++++++++++-------- 7 files changed, 167 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index f91e7b7be..0e3521608 100644 --- a/README.md +++ b/README.md @@ -238,8 +238,9 @@ host system. For instance, the following command establish the tap device ```bash $ sudo ip tuntap add tap100 mode tap -$ sudo ip addr add 10.0.5.1/24 dev tap100 +$ sudo ip addr add 10.0.5.1/24 broadcast 10.0.5.255 dev tap100 $ sudo ip link set dev tap100 up +$ sudo bash -c 'echo 1 > /proc/sys/net/ipv4/conf/tap100/proxy_arp' ``` Per default, `uhyve`'s network interface uses `10.0.5.2`as IP address, `10.0.5.1` diff --git a/drivers/net/uhyve-net.c b/drivers/net/uhyve-net.c index fd61054c9..46f8b3db6 100755 --- a/drivers/net/uhyve-net.c +++ b/drivers/net/uhyve-net.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -57,6 +58,8 @@ #include "uhyve-net.h" +#define UHYVE_IRQ 11 + static int8_t uhyve_net_init_ok = 0; static struct netif* mynetif = NULL; @@ -72,20 +75,24 @@ static int uhyve_net_write_sync(uint8_t *data, int n) return uhyve_netwrite.ret; } -int uhyve_net_stat(void) { +int uhyve_net_stat(void) +{ volatile uhyve_netstat_t uhyve_netstat; + outportl(UHYVE_PORT_NETSTAT, (unsigned)virt_to_phys((size_t)&uhyve_netstat)); + return uhyve_netstat.status; } static int uhyve_net_read_sync(uint8_t *data, int *n) { volatile uhyve_netread_t uhyve_netread; + uhyve_netread.data = (uint8_t*)virt_to_phys((size_t)data); uhyve_netread.len = *n; uhyve_netread.ret = 0; - outportl(UHYVE_PORT_NETREAD, (unsigned)virt_to_phys((size_t)&uhyve_netread)); + outportl(UHYVE_PORT_NETREAD, (unsigned)virt_to_phys((size_t)&uhyve_netread)); *n = uhyve_netread.len; return uhyve_netread.ret; @@ -122,10 +129,12 @@ static err_t uhyve_netif_output(struct netif* netif, struct pbuf* p) uint8_t transmitid = uhyve_netif->tx_queue % TX_BUF_NUM; uint32_t i; struct pbuf *q; + if(BUILTIN_EXPECT((uhyve_netif->tx_queue - uhyve_netif->tx_complete) > (TX_BUF_NUM - 1), 0)) { LOG_ERROR("uhyve_netif_output: too many packets at once\n"); return ERR_IF; } + if(BUILTIN_EXPECT(p->tot_len > 1792, 0)) { LOG_ERROR("uhyve_netif_output: packet (%i bytes) is longer than 1792 bytes\n", p->tot_len); return ERR_IF; @@ -163,25 +172,24 @@ static err_t uhyve_netif_output(struct netif* netif, struct pbuf* p) uhyve_netif->tx_complete++; uhyve_netif->tx_inuse[transmitid] = 0; // LOG_INFO("Transmit OK | queue = %i, complete = %i \n", uhyve_netif->tx_queue, uhyve_netif->tx_complete); - return ERR_OK; + return ERR_OK; } //------------------------------- POLLING ---------------------------------------- -//uint64_t last_poll = 0; -static int polling; -void uhyve_netif_poll(void) { - if (!uhyve_net_init_ok || polling) { +static void uhyve_netif_poll(struct netif* netif) +{ + if (!uhyve_net_init_ok) return; - } - polling = 1; - uhyve_netif_t* uhyve_netif = mynetif->state; + + uhyve_netif_t* uhyve_netif = netif->state; int len = RX_BUF_LEN; struct pbuf *p = NULL; struct pbuf *q; - if (uhyve_net_read_sync(uhyve_netif->rx_buf, &len) == 0) { + if (uhyve_net_read_sync(uhyve_netif->rx_buf, &len) == 0) + { #if ETH_PAD_SIZE len += ETH_PAD_SIZE; /*allow room for Ethernet padding */ #endif @@ -200,19 +208,24 @@ void uhyve_netif_poll(void) { #endif LINK_STATS_INC(link.recv); //forward packet to LwIP - mynetif->input(p, mynetif); + netif->input(p, mynetif); } else { LOG_ERROR("uhyve_netif_poll: not enough memory!\n"); LINK_STATS_INC(link.memerr); LINK_STATS_INC(link.drop); } } - polling = 0; +} + +static void uhyve_irqhandler(struct state* s) +{ + uhyve_netif_poll(mynetif); } //--------------------------------- INIT ----------------------------------------- -err_t uhyve_netif_init (struct netif* netif) { +err_t uhyve_netif_init (struct netif* netif) +{ uhyve_netif_t* uhyve_netif; uint8_t tmp8 = 0; static uint8_t num = 0; @@ -231,7 +244,7 @@ err_t uhyve_netif_init (struct netif* netif) { kfree(uhyve_netif); return ERR_MEM; } - memset(uhyve_netif->rx_buf, 0x00, RX_BUF_LEN +16); + memset(uhyve_netif->rx_buf, 0x00, RX_BUF_LEN + 16); uhyve_netif->tx_buf[0] = page_alloc(TX_BUF_NUM * TX_BUF_LEN, VMA_READ|VMA_WRITE); if (!(uhyve_netif->tx_buf[0])) { @@ -258,16 +271,13 @@ err_t uhyve_netif_init (struct netif* netif) { netif->hwaddr[tmp8] = dehex(*hermit_mac++) << 4; netif->hwaddr[tmp8] |= dehex(*hermit_mac++); hermit_mac++; - LWIP_DEBUGF(NETIF_DEBUG, ("%02x ", netif->hwaddr[tmp8])); + LWIP_DEBUGF(NETIF_DEBUG, ("%02x ", netif->hwaddr[tmp8])); } LWIP_DEBUGF(NETIF_DEBUG, ("\n")); uhyve_netif->ethaddr = (struct eth_addr *)netif->hwaddr; - if (ETHARP_SUPPORT_VLAN) { - LOG_INFO("ETHARP_SUPPORT_VLAN: enabled\n"); - } else { - LOG_INFO("ETHARP_SUPPORT_VLAN: disabled\n"); - } + LOG_INFO("uhye_netif uses irq %d\n", UHYVE_IRQ); + irq_install_handler(32+UHYVE_IRQ, uhyve_irqhandler); netif->name[0] = 'e'; netif->name[1] = 'n'; @@ -278,7 +288,7 @@ err_t uhyve_netif_init (struct netif* netif) { /* maximum transfer unit */ netif->mtu = 1500; /* broadcast capability */ - netif->flags |= NETIF_FLAG_BROADCAST | NETIF_FLAG_ETHARP | NETIF_FLAG_IGMP | NETIF_FLAG_LINK_UP | NETIF_FLAG_MLD6; + netif->flags |= NETIF_FLAG_BROADCAST | NETIF_FLAG_ETHARP | NETIF_FLAG_IGMP | NETIF_FLAG_LINK_UP | NETIF_FLAG_MLD6; #if LWIP_IPV6 netif->output_ip6 = ethip6_output; @@ -288,5 +298,6 @@ err_t uhyve_netif_init (struct netif* netif) { LOG_INFO("uhyve_netif_init: OK\n"); uhyve_net_init_ok = 1; + return ERR_OK; } diff --git a/drivers/net/uhyve-net.h b/drivers/net/uhyve-net.h index 92792cd10..9b45b156a 100755 --- a/drivers/net/uhyve-net.h +++ b/drivers/net/uhyve-net.h @@ -91,6 +91,5 @@ typedef struct uhyve_netif { err_t uhyve_netif_init(struct netif* netif); int uhyve_net_stat(void); -void uhyve_netif_poll(void); #endif diff --git a/include/hermit/tasks.h b/include/hermit/tasks.h index 9842bd7aa..e31f5298b 100644 --- a/include/hermit/tasks.h +++ b/include/hermit/tasks.h @@ -254,7 +254,6 @@ void check_ticks(void); */ void shutdown_system(void); - extern volatile uint32_t go_down; static inline void check_workqueues_in_irqhandler(int irq) { diff --git a/kernel/main.c b/kernel/main.c index 702037a88..5b0a8f698 100644 --- a/kernel/main.c +++ b/kernel/main.c @@ -181,6 +181,8 @@ static int init_netifs(void) LOG_INFO("set_default\n"); netifapi_netif_set_up(&default_netif); LOG_INFO("set_up\n"); + } else { + return -ENODEV; } } else if (!is_single_kernel()) { LOG_INFO("HermitCore is running side-by-side to Linux!\n"); diff --git a/tools/uhyve-net.c b/tools/uhyve-net.c index bb8d08acb..fbd1bf980 100755 --- a/tools/uhyve-net.c +++ b/tools/uhyve-net.c @@ -36,7 +36,8 @@ static uhyve_netinfo_t netinfo; //-------------------------------------- ATTACH LINUX TAP -----------------------------------------// -int attach_linux_tap(const char *dev) { +int attach_linux_tap(const char *dev) +{ struct ifreq ifr; int fd, err; @@ -52,7 +53,7 @@ int attach_linux_tap(const char *dev) { fd = open("/dev/net/tun", O_RDWR | O_NONBLOCK); // Initialize interface request for TAP interface - memset(&ifr, 0, sizeof(ifr)); + memset(&ifr, 0x00, sizeof(ifr)); ifr.ifr_flags = IFF_TAP | IFF_NO_PI; if (strlen(dev) > IFNAMSIZ) { @@ -67,16 +68,17 @@ int attach_linux_tap(const char *dev) { // create before a tap device with these commands: // // sudo ip tuntap add mode tap user - // sudo ip addr add 10.0.5.1/24 dev + // sudo ip addr add 10.0.5.1/24 broadcast 10.0.5.255 // sudo ip link set dev up // - if (ioctl(fd, TUNSETIFF, (void *)&ifr) == -1) { + if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) { err = errno; close(fd); errno = err; return -1; } + // If we got back a different device than the one requested, e.g. because // the caller mistakenly passed in '%d' (yes, that's really in the Linux API) // then fail @@ -100,19 +102,23 @@ int attach_linux_tap(const char *dev) { errno = ENODEV; return -1; } + return fd; } //---------------------------------- GET MAC ----------------------------------------------// -char* uhyve_get_mac(void) { +char* uhyve_get_mac(void) +{ return netinfo.mac_str; } //---------------------------------- SET MAC ----------------------------------------------// -int uhyve_set_mac(void) { +int uhyve_set_mac(void) +{ int mac_is_set = 0; uint8_t guest_mac[6]; + char* str = getenv("HERMIT_NETIF_MAC"); if (str) { @@ -125,7 +131,7 @@ int uhyve_set_mac(void) { if(isxdigit(*v_macptr)) { i++; } else if (*v_macptr == ':') { - if (i /2 - 1 != s++ ) + if (i / 2 - 1 != s++) break; } else { s = -1; diff --git a/tools/uhyve.c b/tools/uhyve.c index c0a2c9ac5..4e4c736fb 100644 --- a/tools/uhyve.c +++ b/tools/uhyve.c @@ -45,10 +45,10 @@ #include #include #include -#include #include #include #include +#include #include #include #include @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -158,10 +159,18 @@ #define UHYVE_PORT_NETREAD 0x507 #define UHYVE_PORT_NETSTAT 0x508 +#define UHYVE_IRQ 11 + +#define IOAPIC_DEFAULT_BASE 0xfec00000 +#define APIC_DEFAULT_BASE 0xfee00000 + + static bool restart = false; static bool cap_tsc_deadline = false; static bool cap_irqchip = false; static bool cap_adjust_clock_stable = false; +static bool cap_irqfd = false; +static bool cap_vapic = false; static bool verbose = false; static bool full_checkpoint = false; static uint32_t ncores = 1; @@ -171,8 +180,9 @@ static uint8_t* mboot = NULL; static size_t guest_size = 0x20000000ULL; static uint64_t elf_entry; static pthread_t* vcpu_threads = NULL; +static pthread_t net_thread; static int* vcpu_fds = NULL; -static int kvm = -1, vmfd = -1, netfd = -1; +static int kvm = -1, vmfd = -1, netfd = -1, efd = -1; static uint32_t no_checkpoint = 0; static pthread_mutex_t kvm_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_barrier_t barrier; @@ -241,6 +251,9 @@ static void uhyve_exit(void* arg) pthread_kill(vcpu_threads[i], SIGTERM); } + + if (netfd > 0) + pthread_kill(net_thread, SIGTERM); } close_fd(&vcpufd); @@ -766,6 +779,49 @@ static void setup_cpuid(int kvm, int vcpufd) free(kvm_cpuid); } +static void* wait_for_packet(void* arg) +{ + int ret; + struct pollfd fds = { .fd = netfd, + .events = POLLIN, + .revents = 0}; + + while(1) + { + fds.revents = 0; + + ret = poll(&fds, 1, -1000); + + if (ret < 0 && errno == EINTR) + continue; + + if (ret < 0) + perror("poll()"); + else if (ret) { + uint64_t event_counter = 1; + write(efd, &event_counter, sizeof(event_counter)); + } + } + + return NULL; +} + +static inline void check_network(void) +{ + // should we start the network thread? + if ((efd < 0) && (getenv("HERMIT_NETIF"))) { + struct kvm_irqfd irqfd = {}; + + efd = eventfd(0, 0); + irqfd.fd = efd; + irqfd.gsi = UHYVE_IRQ; + kvm_ioctl(vmfd, KVM_IRQFD, &irqfd); + + if (pthread_create(&net_thread, NULL, wait_for_packet, NULL)) + err(1, "unable to create thread"); + } +} + static int vcpu_loop(void) { int ret; @@ -854,47 +910,48 @@ static int vcpu_loop(void) break; } - case UHYVE_PORT_NETINFO: { + case UHYVE_PORT_NETINFO: { unsigned data = *((unsigned*)((size_t)run+run->io.data_offset)); uhyve_netinfo_t* uhyve_netinfo = (uhyve_netinfo_t*)(guest_mem+data); memcpy(uhyve_netinfo->mac_str, uhyve_get_mac(), 18); + // guest configure the ethernet device => start network thread + check_network(); break; } - case UHYVE_PORT_NETWRITE: { + case UHYVE_PORT_NETWRITE: { unsigned data = *((unsigned*)((size_t)run+run->io.data_offset)); uhyve_netwrite_t* uhyve_netwrite = (uhyve_netwrite_t*)(guest_mem + data); - int ret; - ret = write(netfd, guest_mem + (size_t)uhyve_netwrite->data, uhyve_netwrite->len); - assert(uhyve_netwrite->len == ret); uhyve_netwrite->ret = 0; + ret = write(netfd, guest_mem + (size_t)uhyve_netwrite->data, uhyve_netwrite->len); + if (ret >= 0) { + uhyve_netwrite->ret = 0; + uhyve_netwrite->len = ret; + } else { + uhyve_netwrite->ret = -1; + } break; } - case UHYVE_PORT_NETREAD: { + case UHYVE_PORT_NETREAD: { unsigned data = *((unsigned*)((size_t)run+run->io.data_offset)); uhyve_netread_t* uhyve_netread = (uhyve_netread_t*)(guest_mem + data); - int ret; ret = read(netfd, guest_mem + (size_t)uhyve_netread->data, uhyve_netread->len); - if ((ret == 0) || (ret == -1 && errno == EAGAIN)) { - uhyve_netread->ret = -1; - break; - } - assert(ret > 0); - uhyve_netread->len = ret; - uhyve_netread->ret = 0; + if (ret > 0) { + uhyve_netread->len = ret; + uhyve_netread->ret = 0; + } else uhyve_netread->ret = -1; break; } - case UHYVE_PORT_NETSTAT: { + case UHYVE_PORT_NETSTAT: { unsigned status = *((unsigned*)((size_t)run+run->io.data_offset)); uhyve_netstat_t* uhyve_netstat = (uhyve_netstat_t*)(guest_mem + status); char* str = getenv("HERMIT_NETIF"); - if (str) { + if (str) uhyve_netstat->status = 1; - } else { + else uhyve_netstat->status = 0; - } break; } @@ -905,6 +962,7 @@ static int vcpu_loop(void) uhyve_lseek->offset = lseek(uhyve_lseek->fd, uhyve_lseek->offset, uhyve_lseek->whence); break; } + default: err(1, "KVM: unhandled KVM_EXIT_IO at port 0x%x, direction %d\n", run->io.port, run->io.direction); break; @@ -958,6 +1016,7 @@ static int vcpu_init(void) if (run == MAP_FAILED) err(1, "KVM: VCPU mmap failed"); + run->apic_base = APIC_DEFAULT_BASE; setup_cpuid(kvm, vcpufd); if (restart) { @@ -1220,7 +1279,7 @@ int uhyve_init(char *path) if (guest_mem == MAP_FAILED) err(1, "mmap failed"); } else { - guest_size += + KVM_32BIT_GAP_SIZE; + guest_size += KVM_32BIT_GAP_SIZE; guest_mem = mmap(NULL, guest_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (guest_mem == MAP_FAILED) err(1, "mmap failed"); @@ -1269,14 +1328,6 @@ int uhyve_init(char *path) kvm_ioctl(vmfd, KVM_CREATE_IRQCHIP, NULL); - const char* netif_str = getenv("HERMIT_NETIF"); - if (netif_str) - { - //TODO: strncmp for different network interfaces - // for example tun/tap device or uhyvetap device - netfd = uhyve_net_init(netif_str); - } - #ifdef KVM_CAP_X2APIC_API // enable x2APIC support struct kvm_enable_cap cap = { @@ -1287,12 +1338,36 @@ int uhyve_init(char *path) kvm_ioctl(vmfd, KVM_ENABLE_CAP, &cap); #endif + // initialited IOAPIC with HermitCore's default settings + struct kvm_irqchip chip; + chip.chip_id = KVM_IRQCHIP_IOAPIC; + kvm_ioctl(vmfd, KVM_GET_IRQCHIP, &chip); + for(int i=0; i