diff --git a/common/include/villas/kernel/vfio.hpp b/common/include/villas/kernel/vfio.hpp deleted file mode 100644 index 89a2d7d15..000000000 --- a/common/include/villas/kernel/vfio.hpp +++ /dev/null @@ -1,175 +0,0 @@ -/** Virtual Function IO wrapper around kernel API - * - * @file - * @author Steffen Vogel - * @author Daniel Krebs - * @copyright 2014-2021, Steffen Vogel - * @copyright 2018, Daniel Krebs - *********************************************************************************/ - -#pragma once - -#include -#include -#include - -#include -#include - -#define VFIO_PATH "/dev/vfio/" -#define VFIO_DEV VFIO_PATH "vfio" - -#ifndef VFIO_NOIOMMU_IOMMU - #define VFIO_NOIOMMU_IOMMU 8 -#endif - -namespace villas { -namespace kernel { - -namespace pci { -namespace vfio { - -class Device { -public: - Device(const std::string &name, Group &group) : - name(name), - fd(-1), - pci_device(nullptr), - group(group) - { } - - ~Device(); - - bool reset(); - - /** Map a device memory region to the application address space (e.g. PCI BARs) */ - void* regionMap(size_t index); - - /** munmap() a region which has been mapped by vfio_map_region() */ - bool regionUnmap(size_t index); - - /** Get the size of a device memory region */ - size_t regionGetSize(size_t index); - - - /** Enable memory accesses and bus mastering for PCI device */ - bool pciEnable(); - - bool pciHotReset(); - int pciMsiInit(int efds[32]); - int pciMsiDeinit(int efds[32]); - bool pciMsiFind(int nos[32]); - - bool isVfioPciDevice() const; - -private: - /// Name of the device as listed under - /// /sys/kernel/iommu_groups/[vfio_group::index]/devices/ - std::string name; - - /// VFIO device file descriptor - int fd; - - struct vfio_device_info info; - - std::vector irqs; - std::vector regions; - std::vector mappings; - - /**< libpci handle of the device */ - const kernel::pci::Device *pci_device; - - Group &group; /**< The VFIO group this device belongs to */ -}; - - - -class Group { - friend class Container; - friend Device; -private: - Group(int index) : - fd(-1), - index(index), - container(nullptr) - { } -public: - ~Group(); - - static std::unique_ptr - attach(Container &container, int groupIndex); - - std::list>& getDevices() - { return devices; } - - int getFd() - { return fd; } - -private: - /// VFIO group file descriptor - int fd; - - /// Index of the IOMMU group as listed under /sys/kernel/iommu_groups/ - int index; - - /// Status of group - struct vfio_group_status status; - - /// All devices owned by this group - std::list> devices; - - Container* container; /**< The VFIO container to which this group is belonging */ -}; - - -class Container { -private: - Container(); -public: - ~Container(); - - static std::shared_ptr - create(); - - void dump(); - - Device & attachDevice(const char *name, int groupIndex); - Device & attachDevice(const pci::Device &pdev); - - /** - * @brief Map VM to an IOVA, which is accessible by devices in the container - * @param virt virtual address of memory - * @param phys IOVA where to map @p virt, -1 to use VFIO internal allocator - * @param length size of memory region in bytes - * @return IOVA address, UINTPTR_MAX on failure - */ - uintptr_t memoryMap(uintptr_t virt, uintptr_t phys, size_t length); - - /** munmap() a region which has been mapped by vfio_map_region() */ - bool memoryUnmap(uintptr_t phys, size_t length); - - bool isIommuEnabled() const - { return this->hasIommu; } - - const int &getFd() const - { return fd; } - - std::list> &getGroups() - { return groups; } - -private: - Group & getOrAttachGroup(int index); - - int fd; - int version; - int extensions; - uint64_t iova_next; /**< Next free IOVA address */ - bool hasIommu; - - /// All groups bound to this container - std::list> groups; -}; - -} /* namespace vfio */ -} /* namespace kernel */ -} /* namespace villas */ diff --git a/common/include/villas/kernel/vfio_container.hpp b/common/include/villas/kernel/vfio_container.hpp new file mode 100644 index 000000000..17b48853e --- /dev/null +++ b/common/include/villas/kernel/vfio_container.hpp @@ -0,0 +1,82 @@ +/** Virtual Function IO wrapper around kernel API + * + * @file + * @author Niklas Eiling + * @author Steffen Vogel + * @author Daniel Krebs + * @copyright 2022, Niklas Eiling + * @copyright 2014-2021, Steffen Vogel + * @copyright 2018, Daniel Krebs + *********************************************************************************/ + +#pragma once + +#include +#include +#include + +#include +#include + +#include "vfio_group.hpp" + +namespace villas { +namespace kernel { +namespace vfio { + +class Container { +private: + //This is a singleton: There can only be one container to rule them all. + Container(); +public: + //The Container instance is lazily initialized and correctly destroyed. + static Container* getInstance() + { + static Container instance; + return &instance; + }; + //No copying allowed + Container(Container const&) = delete; + void operator=(Container const&) = delete; + + ~Container(); + + void dump(); + + void attachGroup(std::shared_ptr group); + + std::shared_ptr attachDevice(const std::string& name, int groupIndex); + std::shared_ptr attachDevice(const pci::Device &pdev); + + /** + * @brief Map VM to an IOVA, which is accessible by devices in the container + * @param virt virtual address of memory + * @param phys IOVA where to map @p virt, -1 to use VFIO internal allocator + * @param length size of memory region in bytes + * @return IOVA address, UINTPTR_MAX on failure + */ + uintptr_t memoryMap(uintptr_t virt, uintptr_t phys, size_t length); + + /** munmap() a region which has been mapped by vfio_map_region() */ + bool memoryUnmap(uintptr_t phys, size_t length); + + bool isIommuEnabled() const + { return this->hasIommu; } +private: + std::shared_ptr getOrAttachGroup(int index); + + int fd; + int version; + int extensions; + uint64_t iova_next; /**< Next free IOVA address */ + bool hasIommu; + + /// All groups bound to this container + std::list> groups; + + Logger log; +}; + +} /* namespace vfio */ +} /* namespace kernel */ +} /* namespace villas */ diff --git a/common/include/villas/kernel/vfio_device.hpp b/common/include/villas/kernel/vfio_device.hpp new file mode 100644 index 000000000..f275a839b --- /dev/null +++ b/common/include/villas/kernel/vfio_device.hpp @@ -0,0 +1,98 @@ +/** Virtual Function IO wrapper around kernel API + * + * @file + * @author Niklas Eiling + * @author Steffen Vogel + * @author Daniel Krebs + * @copyright 2022, Niklas Eiling + * @copyright 2014-2021, Steffen Vogel + * @copyright 2018, Daniel Krebs + *********************************************************************************/ + +#pragma once + +#include +#include +#include +#include + +#include +#include + +#include "pci.hpp" +#include + +#define VFIO_PATH "/dev/vfio/" +#define VFIO_DEV VFIO_PATH "vfio" + +#ifndef VFIO_NOIOMMU_IOMMU + #define VFIO_NOIOMMU_IOMMU 8 +#endif + +namespace villas { +namespace kernel { +namespace vfio { + +class Device { +public: + Device(const std::string &name, int groupFileDescriptor, const kernel::pci::Device *pci_device = nullptr); + + ~Device(); + + bool reset(); + + /** Map a device memory region to the application address space (e.g. PCI BARs) */ + void* regionMap(size_t index); + + /** munmap() a region which has been mapped by vfio_map_region() */ + bool regionUnmap(size_t index); + + /** Get the size of a device memory region */ + size_t regionGetSize(size_t index); + + + /** Enable memory accesses and bus mastering for PCI device */ + bool pciEnable(); + + int pciMsiInit(int efds[32]); + int pciMsiDeinit(int efds[32]); + bool pciMsiFind(int nos[32]); + + bool isVfioPciDevice() const; + bool pciHotReset(); + + int getFileDescriptor() const + { return fd; } + + void dump(); + + bool isAttachedToGroup() const + { return attachedToGroup; } + + void setAttachedToGroup() + { this->attachedToGroup = true; } + +private: + /// Name of the device as listed under + /// /sys/kernel/iommu_groups/[vfio_group::index]/devices/ + std::string name; + + /// VFIO device file descriptor + int fd; + + bool attachedToGroup; + + struct vfio_device_info info; + + std::vector irqs; + std::vector regions; + std::vector mappings; + + /**< libpci handle of the device */ + const kernel::pci::Device *pci_device; + + Logger log; +}; +} /* namespace vfio */ +} /* namespace kernel */ +} /* namespace villas */ diff --git a/common/include/villas/kernel/vfio_group.hpp b/common/include/villas/kernel/vfio_group.hpp new file mode 100644 index 000000000..e9c831847 --- /dev/null +++ b/common/include/villas/kernel/vfio_group.hpp @@ -0,0 +1,71 @@ +/** Virtual Function IO wrapper around kernel API + * + * @file + * @author Niklas Eiling + * @author Steffen Vogel + * @author Daniel Krebs + * @copyright 2022, Niklas Eiling + * @copyright 2014-2021, Steffen Vogel + * @copyright 2018, Daniel Krebs + *********************************************************************************/ + +#pragma once + +#include +#include +#include + +#include +#include + +#include "vfio_device.hpp" +#include + +namespace villas { +namespace kernel { +namespace vfio { + +class Group { +public: + Group(int index, bool iommuEnabled); + + ~Group(); + + void setAttachedToContainer() + { attachedToContainer = true; }; + + bool isAttachedToContainer() + { return attachedToContainer; }; + + int getFileDescriptor() + { return fd; }; + + int getIndex() + { return index; }; + + std::shared_ptr attachDevice(std::shared_ptr device); + std::shared_ptr attachDevice(const std::string& name, const kernel::pci::Device *pci_device = nullptr); + + bool checkStatus(); + void dump(); +private: + /// VFIO group file descriptor + int fd; + + /// Index of the IOMMU group as listed under /sys/kernel/iommu_groups/ + int index; + + bool attachedToContainer; + + /// Status of group + struct vfio_group_status status; + + /// All devices owned by this group + std::list> devices; + + Logger log; +}; + +} /* namespace vfio */ +} /* namespace kernel */ +} /* namespace villas */ diff --git a/common/lib/CMakeLists.txt b/common/lib/CMakeLists.txt index 91163f0c7..987ef30b5 100644 --- a/common/lib/CMakeLists.txt +++ b/common/lib/CMakeLists.txt @@ -41,7 +41,9 @@ endif() if(CMAKE_SYSTEM_NAME STREQUAL Linux) target_sources(villas-common PRIVATE kernel/pci.cpp - kernel/vfio.cpp + kernel/vfio_device.cpp + kernel/vfio_group.cpp + kernel/vfio_container.cpp ) endif() diff --git a/common/lib/kernel/vfio_container.cpp b/common/lib/kernel/vfio_container.cpp new file mode 100644 index 000000000..6131cc414 --- /dev/null +++ b/common/lib/kernel/vfio_container.cpp @@ -0,0 +1,300 @@ +/** Virtual Function IO wrapper around kernel API + * + * @author Steffen Vogel + * @author Daniel Krebs + * @copyright 2014-2021, Steffen Vogel + * @copyright 2018, Daniel Krebs + * @license Apache License 2.0 + *********************************************************************************/ + +#define _DEFAULT_SOURCE + +#if defined(__arm__) || defined(__aarch64__) + #define _LARGEFILE64_SOURCE 1 + #define _FILE_OFFSET_BITS 64 +#endif + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +using namespace villas::kernel::vfio; + + +Container::Container() : + fd(-1), + version(0), + extensions(0), + iova_next(0), + hasIommu(false), + groups(), + log(logging.get("kernel:vfio::Container")) +{ + spdlog::set_level(spdlog::level::debug); + + static constexpr const char* requiredKernelModules[] = { + "vfio", "vfio_pci", "vfio_iommu_type1" + }; + + for (const char* module : requiredKernelModules) { + if (kernel::loadModule(module) != 0) + throw RuntimeError("Kernel module '{}' required but could not be loaded. " + "Please load manually!", module); + } + + /* Open VFIO API */ + fd = open(VFIO_DEV, O_RDWR); + if (fd < 0) + throw RuntimeError("Failed to open VFIO container"); + + /* Check VFIO API version */ + version = ioctl(fd, VFIO_GET_API_VERSION); + if (version < 0 || version != VFIO_API_VERSION) + throw RuntimeError("Failed to get VFIO version"); + + /* Check available VFIO extensions (IOMMU types) */ + extensions = 0; + for (unsigned int i = VFIO_TYPE1_IOMMU; i <= VFIO_NOIOMMU_IOMMU; i++) { + int ret = ioctl(fd, VFIO_CHECK_EXTENSION, i); + if (ret < 0) + throw RuntimeError("Failed to get VFIO extensions"); + else if (ret > 0) + extensions |= (1 << i); + } + + hasIommu = false; + + if (not (extensions & (1 << VFIO_NOIOMMU_IOMMU))) { + if (not (extensions & (1 << VFIO_TYPE1_IOMMU))) + throw RuntimeError("No supported IOMMU extension found"); + else + hasIommu = true; + } + + log->debug("Version: {:#x}", version); + log->debug("Extensions: {:#x}", extensions); + log->debug("IOMMU: {}", hasIommu ? "yes" : "no"); +} + + +Container::~Container() +{ + log->debug("Clean up container with fd {}", fd); + + /* Release memory and close fds */ + groups.clear(); + + /* Close container */ + int ret = close(fd); + if (ret < 0) + log->error("Error closing vfio container fd {}: {}", fd, ret); +} + +void Container::attachGroup(std::shared_ptr group) +{ + if (group->isAttachedToContainer()) + throw RuntimeError("Group is already attached to a container"); + + /* Claim group ownership */ + int ret = ioctl(group->getFileDescriptor(), VFIO_GROUP_SET_CONTAINER, &fd); + if (ret < 0) { + log->error("Failed to attach VFIO group {} to container fd {} (error {})", + group->getIndex(), fd, ret); + throw RuntimeError("Failed to attach VFIO group to container"); + } + + /* Set IOMMU type */ + int iommu_type = isIommuEnabled() ? VFIO_TYPE1_IOMMU : VFIO_NOIOMMU_IOMMU; + + ret = ioctl(fd, VFIO_SET_IOMMU, iommu_type); + if (ret < 0) { + log->error("Failed to set IOMMU type of container: {}", ret); + throw RuntimeError("Failed to set IOMMU type of container"); + } + if (!group->checkStatus()) + throw RuntimeError("bad VFIO group status for group {}.", group->getIndex()); + else + log->debug("Attached new group {} to VFIO container", group->getIndex()); + + /* Push to our list */ + groups.push_back(std::move(group)); +} + +std::shared_ptr Container::getOrAttachGroup(int index) +{ + /* Search if group with index already exists */ + for (auto &group : groups) { + if (group->getIndex() == index) { + return group; + } + } + + /* Group not yet part of this container, so acquire ownership */ + auto group = std::make_shared(index, isIommuEnabled()); + attachGroup(group); + + + return group; +} + + + +void +Container::dump() +{ + log->info("File descriptor: {}", fd); + log->info("Version: {}", version); + log->info("Extensions: 0x{:x}", extensions); + + for (auto &group : groups) { + group->dump(); + } +} + + +std::shared_ptr +Container::attachDevice(const std::string& name, int index) +{ + auto group = getOrAttachGroup(index); + auto device = group->attachDevice(name); + + return device; +} + + +std::shared_ptr +Container::attachDevice(const pci::Device &pdev) +{ + int ret; + char name[32], iommu_state[4]; + static constexpr const char* kernelDriver = "vfio-pci"; + + /* Load PCI bus driver for VFIO */ + if (kernel::loadModule("vfio_pci")) + throw RuntimeError("Failed to load kernel driver: vfio_pci"); + + /* Bind PCI card to vfio-pci driver if not already bound */ + if (pdev.getDriver() != kernelDriver) { + log->debug("Bind PCI card to kernel driver '{}'", kernelDriver); + pdev.attachDriver(kernelDriver); + } + + /* Get IOMMU group of device */ + int index = isIommuEnabled() ? pdev.getIOMMUGroup() : 0; + if (index < 0) { + ret = kernel::getCmdlineParam("intel_iommu", iommu_state, sizeof(iommu_state)); + if (ret != 0 || strcmp("on", iommu_state) != 0) + log->warn("Kernel booted without command line parameter " + "'intel_iommu' set to 'on'. Please check documentation " + "(https://villas.fein-aachen.org/doc/fpga-setup.html) " + "for help with troubleshooting."); + + throw RuntimeError("Failed to get IOMMU group of device"); + } + + /* VFIO device name consists of PCI BDF */ + snprintf(name, sizeof(name), "%04x:%02x:%02x.%x", pdev.slot.domain, + pdev.slot.bus, pdev.slot.device, pdev.slot.function); + + log->info("Attach to device {} with index {}", std::string(name), index); + auto group = getOrAttachGroup(index); + auto device = group->attachDevice(name, &pdev); + + + /* Check if this is really a vfio-pci device */ + if (!device->isVfioPciDevice()) + throw RuntimeError("Device is not a vfio-pci device"); + + return device; +} + + +uintptr_t +Container::memoryMap(uintptr_t virt, uintptr_t phys, size_t length) +{ + int ret; + + if (not hasIommu) { + log->error("DMA mapping not supported without IOMMU"); + return UINTPTR_MAX; + } + + if (length & 0xFFF) { + length += 0x1000; + length &= ~0xFFF; + } + + /* Super stupid allocator */ + size_t iovaIncrement = 0; + if (phys == UINTPTR_MAX) { + phys = this->iova_next; + iovaIncrement = length; + } + + struct vfio_iommu_type1_dma_map dmaMap; + memset(&dmaMap, 0, sizeof(dmaMap)); + + dmaMap.argsz = sizeof(dmaMap); + dmaMap.vaddr = virt; + dmaMap.iova = phys; + dmaMap.size = length; + dmaMap.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; + + ret = ioctl(this->fd, VFIO_IOMMU_MAP_DMA, &dmaMap); + if (ret) { + log->error("Failed to create DMA mapping: {}", ret); + return UINTPTR_MAX; + } + + log->debug("DMA map size={:#x}, iova={:#x}, vaddr={:#x}", + dmaMap.size, dmaMap.iova, dmaMap.vaddr); + + /* Mapping successful, advance IOVA allocator */ + this->iova_next += iovaIncrement; + + /* We intentionally don't return the actual mapped length, the users are + * only guaranteed to have their demanded memory mapped correctly + */ + return dmaMap.iova; +} + + +bool +Container::memoryUnmap(uintptr_t phys, size_t length) +{ + int ret; + + if (not hasIommu) + return true; + + struct vfio_iommu_type1_dma_unmap dmaUnmap; + dmaUnmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap); + dmaUnmap.flags = 0; + dmaUnmap.iova = phys; + dmaUnmap.size = length; + + ret = ioctl(this->fd, VFIO_IOMMU_UNMAP_DMA, &dmaUnmap); + if (ret) { + log->error("Failed to unmap DMA mapping"); + return false; + } + + return true; +} + diff --git a/common/lib/kernel/vfio_device.cpp b/common/lib/kernel/vfio_device.cpp new file mode 100644 index 000000000..568921759 --- /dev/null +++ b/common/lib/kernel/vfio_device.cpp @@ -0,0 +1,469 @@ +/** Virtual Function IO wrapper around kernel API + * + * @author Niklas Eiling + * @author Steffen Vogel + * @author Daniel Krebs + * @copyright 2022, Niklas Eiling + * @copyright 2014-2021, Steffen Vogel + * @copyright 2018, Daniel Krebs + * @license Apache License 2.0 + *********************************************************************************/ + +#define _DEFAULT_SOURCE + +#if defined(__arm__) || defined(__aarch64__) + #define _LARGEFILE64_SOURCE 1 + #define _FILE_OFFSET_BITS 64 +#endif + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +using namespace villas::kernel::vfio; + +static const char *vfio_pci_region_names[] = { + "PCI_BAR0", /* VFIO_PCI_BAR0_REGION_INDEX */ + "PCI_BAR1", /* VFIO_PCI_BAR1_REGION_INDEX */ + "PCI_BAR2", /* VFIO_PCI_BAR2_REGION_INDEX */ + "PCI_BAR3", /* VFIO_PCI_BAR3_REGION_INDEX */ + "PCI_BAR4", /* VFIO_PCI_BAR4_REGION_INDEX */ + "PCI_BAR5", /* VFIO_PCI_BAR5_REGION_INDEX */ + "PCI_ROM", /* VFIO_PCI_ROM_REGION_INDEX */ + "PCI_CONFIG", /* VFIO_PCI_CONFIG_REGION_INDEX */ + "PCI_VGA" /* VFIO_PCI_INTX_IRQ_INDEX */ +}; + +static const char *vfio_pci_irq_names[] = { + "PCI_INTX", /* VFIO_PCI_INTX_IRQ_INDEX */ + "PCI_MSI", /* VFIO_PCI_MSI_IRQ_INDEX */ + "PCI_MSIX", /* VFIO_PCI_MSIX_IRQ_INDEX */ + "PCI_ERR", /* VFIO_PCI_ERR_IRQ_INDEX */ + "PCI_REQ" /* VFIO_PCI_REQ_IRQ_INDEX */ +}; + +Device::Device(const std::string &name, int groupFileDescriptor, const kernel::pci::Device *pci_device) : + name(name), + fd(-1), + attachedToGroup(false), + info(), + irqs(), + regions(), + mappings(), + pci_device(pci_device), + log(logging.get("kernel:vfio::Device")) +{ + if (groupFileDescriptor < 0) + throw RuntimeError("Invalid group file descriptor"); + + /* Open device fd */ + fd = ioctl(groupFileDescriptor, VFIO_GROUP_GET_DEVICE_FD, name.c_str()); + if (fd < 0) + throw RuntimeError("Failed to open VFIO device: {}", name.c_str()); + + /* Get device info */ + info.argsz = sizeof(info); + + int ret = ioctl(fd, VFIO_DEVICE_GET_INFO, &info); + if (ret < 0) + throw RuntimeError("Failed to get VFIO device info for: {}", name); + + log->debug("Device has {} regions", info.num_regions); + log->debug("Device has {} IRQs", info.num_irqs); + + /* Reserve slots already so that we can use the []-operator for access */ + irqs.resize(info.num_irqs); + regions.resize(info.num_regions); + mappings.resize(info.num_regions); + + /* Get device regions */ + for (size_t i = 0; i < info.num_regions && i < 8; i++) { + struct vfio_region_info region; + memset(®ion, 0, sizeof (region)); + + region.argsz = sizeof(region); + region.index = i; + + ret = ioctl(fd, VFIO_DEVICE_GET_REGION_INFO, ®ion); + if (ret < 0) + throw RuntimeError("Failed to get region of VFIO device: {}", name); + + regions[i] = region; + } + + + /* Get device irqs */ + for (size_t i = 0; i < info.num_irqs; i++) { + struct vfio_irq_info irq; + memset(&irq, 0, sizeof (irq)); + + irq.argsz = sizeof(irq); + irq.index = i; + + ret = ioctl(fd, VFIO_DEVICE_GET_IRQ_INFO, &irq); + if (ret < 0) + throw RuntimeError("Failed to get IRQs of VFIO device: {}", name); + + irqs[i] = irq; + } + +} + +Device::~Device() +{ + log->debug("Clean up device {} with fd {}", this->name, this->fd); + + for (auto ®ion : regions) { + regionUnmap(region.index); + } + reset(); + + int ret = close(fd); + if (ret != 0) { + log->error("Closing device fd {} failed", fd); + } +} + + +bool +Device::reset() +{ + log->debug("Resetting device."); + if (this->info.flags & VFIO_DEVICE_FLAGS_RESET) + return ioctl(this->fd, VFIO_DEVICE_RESET) == 0; + else + return false; /* not supported by this device */ +} + + +void* +Device::regionMap(size_t index) +{ + struct vfio_region_info *r = ®ions[index]; + + if (!(r->flags & VFIO_REGION_INFO_FLAG_MMAP)) + return MAP_FAILED; + + int flags = MAP_SHARED; + +#if !(defined(__arm__) || defined(__aarch64__)) + flags |= MAP_SHARED | MAP_32BIT; +#endif + + mappings[index] = mmap(nullptr, r->size, + PROT_READ | PROT_WRITE, + flags, fd, r->offset); + + return mappings[index]; +} + + +bool +Device::regionUnmap(size_t index) +{ + int ret; + struct vfio_region_info *r = ®ions[index]; + + if (!mappings[index]) + return false; /* was not mapped */ + + log->debug("Unmap region {} from device {}", index, name); + + ret = munmap(mappings[index], r->size); + if (ret) + return false; + + mappings[index] = nullptr; + + return true; +} + + +size_t +Device::regionGetSize(size_t index) +{ + if (index >= regions.size()) { + log->error("Index out of range: {} >= {}", index, regions.size()); + throw std::out_of_range("Index out of range"); + } + + return regions[index].size; +} + +void Device::dump() +{ + log->info("Device {}: regions={}, irqs={}, flags={}", + name, + info.num_regions, + info.num_irqs, + info.flags + ); + + for (size_t i = 0; i < info.num_regions && i < 8; i++) { + struct vfio_region_info *region = ®ions[i]; + + if (region->size > 0) { + log->info("Region {} {}: size={}, offset={}, flags={}", + region->index, + (info.flags & VFIO_DEVICE_FLAGS_PCI) ? + vfio_pci_region_names[i] : "", + region->size, + region->offset, + region->flags + ); + } + } + + for (size_t i = 0; i < info.num_irqs; i++) { + struct vfio_irq_info *irq = &irqs[i]; + + if (irq->count > 0) { + log->info("IRQ {} {}: count={}, flags={}", + irq->index, + (info.flags & VFIO_DEVICE_FLAGS_PCI ) ? + vfio_pci_irq_names[i] : "", + irq->count, + irq->flags + ); + } + } +} + +bool +Device::pciEnable() +{ + int ret; + uint32_t reg; + const off64_t offset = PCI_COMMAND + + (static_cast(VFIO_PCI_CONFIG_REGION_INDEX) << 40); + + /* Check if this is really a vfio-pci device */ + if (!(this->info.flags & VFIO_DEVICE_FLAGS_PCI)) + return false; + + ret = pread64(this->fd, ®, sizeof(reg), offset); + if (ret != sizeof(reg)) + return false; + + /* Enable memory access and PCI bus mastering which is required for DMA */ + reg |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER; + + ret = pwrite64(this->fd, ®, sizeof(reg), offset); + if (ret != sizeof(reg)) + return false; + + return true; +} + + +int +Device::pciMsiInit(int efds[]) +{ + /* Check if this is really a vfio-pci device */ + if (not isVfioPciDevice()) + return -1; + + const size_t irqCount = irqs[VFIO_PCI_MSI_IRQ_INDEX].count; + const size_t irqSetSize = sizeof(struct vfio_irq_set) + + sizeof(int) * irqCount; + + auto *irqSetBuf = new char[irqSetSize]; + if (!irqSetBuf) + throw MemoryAllocationError(); + + auto *irqSet = reinterpret_cast(irqSetBuf); + + irqSet->argsz = irqSetSize; + // DATA_EVENTFD binds the interrupt to the provided eventfd. + // SET_ACTION_TRIGGER enables kernel->userspace signalling. + irqSet->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irqSet->index = VFIO_PCI_MSI_IRQ_INDEX; + irqSet->start = 0; + irqSet->count = irqCount; + + /* Now set the new eventfds */ + for (size_t i = 0; i < irqCount; i++) { + efds[i] = eventfd(0, 0); + if (efds[i] < 0) { + delete[] irqSetBuf; + return -1; + } + } + + memcpy(irqSet->data, efds, sizeof(int) * irqCount); + + if (ioctl(fd, VFIO_DEVICE_SET_IRQS, irqSet) != 0) { + delete[] irqSetBuf; + return -1; + } + + delete[] irqSetBuf; + + return irqCount; +} + + +int +Device::pciMsiDeinit(int efds[]) +{ + /* Check if this is really a vfio-pci device */ + if (not isVfioPciDevice()) + return -1; + + const size_t irqCount = irqs[VFIO_PCI_MSI_IRQ_INDEX].count; + const size_t irqSetSize = sizeof(struct vfio_irq_set) + + sizeof(int) * irqCount; + + auto *irqSetBuf = new char[irqSetSize]; + if (!irqSetBuf) + throw MemoryAllocationError(); + + auto *irqSet = reinterpret_cast(irqSetBuf); + + irqSet->argsz = irqSetSize; + irqSet->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irqSet->index = VFIO_PCI_MSI_IRQ_INDEX; + irqSet->count = irqCount; + irqSet->start = 0; + + for (size_t i = 0; i < irqCount; i++) { + close(efds[i]); + efds[i] = -1; + } + + memcpy(irqSet->data, efds, sizeof(int) * irqCount); + + if (ioctl(fd, VFIO_DEVICE_SET_IRQS, irqSet) != 0) { + delete[] irqSetBuf; + return -1; + } + + delete[] irqSetBuf; + + return irqCount; +} + + +bool +Device::pciMsiFind(int nos[]) +{ + int ret, idx, irq; + char *end, *col, *last, line[1024], name[13]; + FILE *f; + + f = fopen("/proc/interrupts", "r"); + if (!f) + return false; + + for (int i = 0; i < 32; i++) + nos[i] = -1; + + /* For each line in /proc/interrupts */ + while (fgets(line, sizeof(line), f)) { + col = strtok(line, " "); + + /* IRQ number is in first column */ + irq = strtol(col, &end, 10); + if (col == end) + continue; + + /* Find last column of line */ + do { + last = col; + } while ((col = strtok(nullptr, " "))); + + + ret = sscanf(last, "vfio-msi[%d](%12[0-9:])", &idx, name); + if (ret == 2) { + if (strstr(this->name.c_str(), name) == this->name.c_str()) + nos[idx] = irq; + } + } + + fclose(f); + + return true; +} + + +bool +Device::isVfioPciDevice() const +{ + return info.flags & VFIO_DEVICE_FLAGS_PCI; +} + +bool Device::pciHotReset() +{ + /* Check if this is really a vfio-pci device */ + if (!isVfioPciDevice()) + return false; + + const size_t reset_info_len = sizeof(struct vfio_pci_hot_reset_info) + + sizeof(struct vfio_pci_dependent_device) * 64; + + auto *reset_info_buf = new char[reset_info_len]; + if (!reset_info_buf) + throw MemoryAllocationError(); + + auto *reset_info = reinterpret_cast(reset_info_buf); + + reset_info->argsz = reset_info_len; + + if (ioctl(fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, reset_info) != 0) { + delete[] reset_info_buf; + return false; + } + + log->debug("Dependent devices for hot-reset:"); + for (size_t i = 0; i < reset_info->count; i++) { + struct vfio_pci_dependent_device *dd = &reset_info->devices[i]; + log->debug(" {:04x}:{:02x}:{:02x}.{:01x}: iommu_group={}", + dd->segment, dd->bus, + PCI_SLOT(dd->devfn), PCI_FUNC(dd->devfn), dd->group_id); + + /*if (static_cast(dd->group_id) != index) { + delete[] reset_info_buf; + return false; + }*/ + } + + delete[] reset_info_buf; + + const size_t reset_len = sizeof(struct vfio_pci_hot_reset) + + sizeof(int32_t) * 1; + auto *reset_buf = new char[reset_len]; + if (!reset_buf) + throw MemoryAllocationError(); + + auto *reset = reinterpret_cast(reset_buf); + + reset->argsz = reset_len; + reset->count = 1; + reset->group_fds[0] = fd; + + int ret = ioctl(fd, VFIO_DEVICE_PCI_HOT_RESET, reset); + const bool success = (ret == 0); + + delete[] reset_buf; + + if (!success) { + log->warn("PCI hot reset failed, maybe not IOMMU available?"); + return true; + } + + return success; +} diff --git a/common/lib/kernel/vfio_group.cpp b/common/lib/kernel/vfio_group.cpp new file mode 100644 index 000000000..7f1240f9b --- /dev/null +++ b/common/lib/kernel/vfio_group.cpp @@ -0,0 +1,138 @@ +/** Virtual Function IO wrapper around kernel API + * + * @author Steffen Vogel + * @author Daniel Krebs + * @copyright 2014-2021, Steffen Vogel + * @copyright 2018, Daniel Krebs + * @license Apache License 2.0 + *********************************************************************************/ + +#define _DEFAULT_SOURCE + +#if defined(__arm__) || defined(__aarch64__) + #define _LARGEFILE64_SOURCE 1 + #define _FILE_OFFSET_BITS 64 +#endif + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +using namespace villas::kernel::vfio; + +Group::Group(int index, bool iommuEnabled) : + fd(-1), + index(index), + attachedToContainer(false), + status(), + devices(), + log(logging.get("kernel:vfio::Group")) +{ + /* Open group fd */ + std::stringstream groupPath; + groupPath << VFIO_PATH + << (iommuEnabled ? "" : "noiommu-") + << index; + + log->debug("path: {}", groupPath.str().c_str()); + fd = open(groupPath.str().c_str(), O_RDWR); + if (fd < 0) { + log->error("Failed to open VFIO group {}", index); + throw RuntimeError("Failed to open VFIO group"); + } + + log->debug("VFIO group {} (fd {}) has path {}", + index, fd, groupPath.str()); + +} + +std::shared_ptr Group::attachDevice(std::shared_ptr device) +{ + if (device->isAttachedToGroup()) + throw RuntimeError("Device is already attached to a group"); + + devices.push_back(device); + + device->setAttachedToGroup(); + + return device; +} + +std::shared_ptr Group::attachDevice(const std::string& name, const kernel::pci::Device *pci_device) +{ + auto device = std::make_shared(name, fd, pci_device); + return attachDevice(device); +} + + +bool Group::checkStatus() +{ + int ret; + if (!attachedToContainer) { + log->debug("Group {} is not attached to a container", index); + return false; + } + + /* Check group viability and features */ + status.argsz = sizeof(status); + + ret = ioctl(fd, VFIO_GROUP_GET_STATUS, &status); + if (ret < 0) { + log->error("Failed to get VFIO group status"); + return false; + } + + if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) { + log->error("VFIO group is not available: bind all devices to the VFIO driver!"); + return false; + } + return true; +} + +void Group::dump() +{ + log->info("VFIO Group {}, viable={}, container={}", + index, + (status.flags & VFIO_GROUP_FLAGS_VIABLE) > 0, + (status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET) > 0 + ); + + for (auto& device : devices) { + device->dump(); + } +} + +Group::~Group() +{ + log->debug("Clean up group {} with fd {}", this->index, this->fd); + + /* Release memory and close fds */ + devices.clear(); + + if (fd < 0) + log->debug("Destructing group that has not been attached"); + else { + log->debug("unsetting group container"); + int ret = ioctl(fd, VFIO_GROUP_UNSET_CONTAINER); + if (ret != 0) + log->error("Cannot unset container for group fd {}", fd); + + ret = close(fd); + if (ret != 0) + log->error("Cannot close group fd {}", fd); + } +}