From b66733640a98399e82d434aa9af9fb8ec4aec331 Mon Sep 17 00:00:00 2001 From: Niklas Eiling Date: Thu, 5 Jan 2023 09:51:11 +0100 Subject: [PATCH 01/15] format and comment fixes Signed-off-by: Niklas Eiling --- fpga/lib/utils.cpp | 1 + fpga/src/villas-fpga-ctrl.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fpga/lib/utils.cpp b/fpga/lib/utils.cpp index 2a97be3a0..32107a40a 100644 --- a/fpga/lib/utils.cpp +++ b/fpga/lib/utils.cpp @@ -50,6 +50,7 @@ const std::shared_ptr portStringToStreamVertex(std::stri return aurora_channels[port]; } } + // parses a string lik "1->2" or "1<->stdout" and configures the crossbar void fpga::configCrossBarUsingConnectString(std::string connectString, std::shared_ptr dma, diff --git a/fpga/src/villas-fpga-ctrl.cpp b/fpga/src/villas-fpga-ctrl.cpp index 5cf1f9196..2ad64557e 100644 --- a/fpga/src/villas-fpga-ctrl.cpp +++ b/fpga/src/villas-fpga-ctrl.cpp @@ -33,7 +33,7 @@ using namespace villas; static std::shared_ptr pciDevices; -static auto logger = villas::logging.get("cat"); +static auto logger = villas::logging.get("ctrl"); @@ -66,9 +66,9 @@ int main(int argc, char* argv[]) return 1; } - //FIXME: This must be called before card is intialized, because the card descructor - // still accesses the allocated memory. This order ensures that the allocator - // is destroyed AFTER the card. + // This must be called before card is intialized, because the card descructor + // still accesses the allocated memory. This order ensures that the allocator + // is destroyed AFTER the card. auto &alloc = villas::HostRam::getAllocator(); villas::MemoryAccessor mem[] = {alloc.allocate(0x200), alloc.allocate(0x200)}; const villas::MemoryBlock block[] = {mem[0].getMemoryBlock(), mem[1].getMemoryBlock()}; From 40d0452b0a260d96dfc1b1c2f4958e88794d8a38 Mon Sep 17 00:00:00 2001 From: Niklas Eiling Date: Thu, 5 Jan 2023 10:50:27 +0100 Subject: [PATCH 02/15] move connectString parsing into separate class Signed-off-by: Niklas Eiling --- fpga/include/villas/fpga/utils.hpp | 56 +++++++++++++--- fpga/lib/utils.cpp | 102 +++++++++++++++++++++-------- fpga/src/villas-fpga-ctrl.cpp | 3 +- 3 files changed, 124 insertions(+), 37 deletions(-) diff --git a/fpga/include/villas/fpga/utils.hpp b/fpga/include/villas/fpga/utils.hpp index d21ef0ebc..fae15afde 100644 --- a/fpga/include/villas/fpga/utils.hpp +++ b/fpga/include/villas/fpga/utils.hpp @@ -1,9 +1,24 @@ /** Helper function for directly using VILLASfpga outside of VILLASnode * - * Author: Niklas Eiling - * SPDX-FileCopyrightText: 2022 Steffen Vogel - * SPDX-FileCopyrightText: 2022 Niklas Eiling - * SPDX-License-Identifier: Apache-2.0 + * @file + * @author Niklas Eiling + * @copyright 2022, Steffen Vogel, Niklas Eiling + * @license GNU General Public License (version 3) + * + * VILLASfpga + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . *********************************************************************************/ #pragma once @@ -17,11 +32,36 @@ namespace fpga { std::shared_ptr setupFpgaCard(const std::string &configFile, const std::string &fpgaName); -void configCrossBarUsingConnectString(std::string connectString, - std::shared_ptr dma, - std::vector>& aurora_channels); - void setupColorHandling(); +class ConnectString { +public: + ConnectString(std::string& connectString, int maxPortNum = 7); + + void parseString(std::string& connectString); + int portStringToInt(std::string &str) const; + + void configCrossBar(std::shared_ptr dma, + std::vector>& aurora_channels) const; + + bool isBidirectional() const { return bidirectional; }; + bool isDmaLoopback() const { return dmaLoopback; }; + bool isSrcStdin() const { return srcIsStdin; }; + bool isDstStdout() const { return dstIsStdout; }; + int getSrcAsInt() const { return srcAsInt; }; + int getDstAsInt() const { return dstAsInt; }; +protected: + villas::Logger log; + int maxPortNum; + bool bidirectional; + bool invert; + int srcAsInt; + int dstAsInt; + bool srcIsStdin; + bool dstIsStdout; + bool dmaLoopback; +}; + + } /* namespace fpga */ } /* namespace villas */ diff --git a/fpga/lib/utils.cpp b/fpga/lib/utils.cpp index 32107a40a..071336cef 100644 --- a/fpga/lib/utils.cpp +++ b/fpga/lib/utils.cpp @@ -35,46 +35,42 @@ using namespace villas; static std::shared_ptr pciDevices; static auto logger = villas::logging.get("streamer"); -const std::shared_ptr portStringToStreamVertex(std::string &str, - std::shared_ptr dma, - std::vector>& aurora_channels) +fpga::ConnectString::ConnectString(std::string& connectString, int maxPortNum) : + log(villas::logging.get("ConnectString")), + maxPortNum(maxPortNum), + bidirectional(false), + invert(false), + srcAsInt(-1), + dstAsInt(-1), + srcIsStdin(false), + dstIsStdout(false), + dmaLoopback(false) { - if (str == "stdin" || str == "stdout") { - return dma; - } else { - int port = std::stoi(str); - - if (port > 7 || port < 0) - throw std::runtime_error("Invalid port number"); - - return aurora_channels[port]; - } + parseString(connectString); } -// parses a string lik "1->2" or "1<->stdout" and configures the crossbar -void fpga::configCrossBarUsingConnectString(std::string connectString, - std::shared_ptr dma, - std::vector>& aurora_channels) +void fpga::ConnectString::parseString(std::string& connectString) { - bool bidirectional = false; - bool invert = false; - if (connectString.empty()) return; if (connectString == "loopback") { logger->info("Connecting loopback"); - // is this working? - dma->connectLoopback(); + srcIsStdin = true; + dstIsStdout = true; + bidirectional = true; + dmaLoopback = true; + return; } - static std::regex re("([0-9]+)([<\\->]+)([0-9]+|stdin|stdout)"); + std::regex regex("([0-9]+)([<\\->]+)([0-9]+|stdin|stdout)"); std::smatch match; - if (!std::regex_match(connectString, match, re)) { + if (!std::regex_match(connectString, match, regex) || match.size() != 4) { logger->error("Invalid connect string: {}", connectString); throw std::runtime_error("Invalid connect string"); } + if (match[2] == "<->") { bidirectional = true; } else if(match[2] == "<-") { @@ -83,11 +79,61 @@ void fpga::configCrossBarUsingConnectString(std::string connectString, std::string srcStr = (invert ? match[3] : match[1]); std::string dstStr = (invert ? match[1] : match[3]); - logger->info("Connect string {}: Connecting {} to {}, {}directional", - connectString, srcStr, dstStr, + + srcAsInt = portStringToInt(srcStr); + dstAsInt = portStringToInt(dstStr); + if (srcAsInt == -1) { + srcIsStdin = true; + } + if (dstAsInt == -1) { + dstIsStdout = true; + } +} + +int fpga::ConnectString::portStringToInt(std::string &str) const +{ + if (str == "stdin" || str == "stdout") { + return -1; + } else { + const int port = std::stoi(str); + + if (port > maxPortNum || port < 0) + throw std::runtime_error("Invalid port number"); + + return port; + } +} + + +// parses a string lik "1->2" or "1<->stdout" and configures the crossbar +void fpga::ConnectString::configCrossBar(std::shared_ptr dma, + std::vector>& aurora_channels) const +{ + if (dmaLoopback) { + log->info("Configuring DMA loopback"); + dma->connectLoopback(); + return; + } + + log->info("Connecting {} to {}, {}directional", + (srcAsInt==-1 ? "stdin" : std::to_string(srcAsInt)), + (dstAsInt==-1 ? "stdout" : std::to_string(dstAsInt)), (bidirectional ? "bi" : "uni")); - auto src = portStringToStreamVertex(srcStr, dma, aurora_channels); - auto dest = portStringToStreamVertex(dstStr, dma, aurora_channels); + + std::shared_ptr src; + std::shared_ptr dest; + if (srcIsStdin) { + src = dma; + } else { + src = aurora_channels[srcAsInt]; + } + + if (dstIsStdout) { + dest = dma; + } else { + dest = aurora_channels[dstAsInt]; + } + src->connect(src->getDefaultMasterPort(), dest->getDefaultSlavePort()); if (bidirectional) { dest->connect(dest->getDefaultMasterPort(), src->getDefaultSlavePort()); diff --git a/fpga/src/villas-fpga-ctrl.cpp b/fpga/src/villas-fpga-ctrl.cpp index 2ad64557e..bb5a93b61 100644 --- a/fpga/src/villas-fpga-ctrl.cpp +++ b/fpga/src/villas-fpga-ctrl.cpp @@ -99,7 +99,8 @@ int main(int argc, char* argv[]) aurora->dump(); // Configure Crossbar switch - fpga::configCrossBarUsingConnectString(connectStr, dma, aurora_channels); + const fpga::ConnectString parsedConnectString(connectStr); + parsedConnectString.configCrossBar(dma, aurora_channels); if (!noDma) { for (auto b : block) { From 14f924b6c5afe5e4b20040bfd430432192c6e389 Mon Sep 17 00:00:00 2001 From: Niklas Eiling Date: Thu, 5 Jan 2023 17:08:48 +0100 Subject: [PATCH 03/15] rework MemoryBlock use to make use of shared_ptr so the lifetime of the objects is properly tracked this fixes that the wrong order of allocating and PciCard destruction causes an undefined state. Signed-off-by: Niklas Eiling --- fpga/.vscode/launch.json | 2 +- fpga/include/villas/fpga/card.hpp | 8 +- fpga/include/villas/fpga/ips/dma.hpp | 6 +- fpga/lib/card.cpp | 129 +++++++++++++-------------- fpga/lib/ips/dma.cpp | 14 +-- fpga/src/villas-fpga-ctrl.cpp | 97 ++++++++++---------- fpga/tests/unit/dma.cpp | 11 ++- 7 files changed, 135 insertions(+), 132 deletions(-) diff --git a/fpga/.vscode/launch.json b/fpga/.vscode/launch.json index 42ab0f641..1bce5a80d 100644 --- a/fpga/.vscode/launch.json +++ b/fpga/.vscode/launch.json @@ -10,7 +10,7 @@ "request": "launch", "program": "${workspaceFolder}/build/src/villas-fpga-ctrl", "args": [ - "-c", "${workspaceFolder}/etc/fpgas.json", "--connect", "\"2<->stdout\"", "--no-dma" + "-c", "${workspaceFolder}/etc/fpgas.json", "--connect", "\"2<->stdout\"" ], "stopAtEntry": false, "cwd": "${workspaceFolder}", diff --git a/fpga/include/villas/fpga/card.hpp b/fpga/include/villas/fpga/card.hpp index 3ad51e3fa..e1b8a4566 100644 --- a/fpga/include/villas/fpga/card.hpp +++ b/fpga/include/villas/fpga/card.hpp @@ -36,17 +36,19 @@ public: virtual ~Card(); + + std::shared_ptr lookupIp(const std::string &name) const; std::shared_ptr lookupIp(const Vlnv &vlnv) const; std::shared_ptr lookupIp(const ip::IpIdentifier &id) const; - bool mapMemoryBlock(const MemoryBlock &block); + bool mapMemoryBlock(const std::shared_ptr block); bool unmapMemoryBlock(const MemoryBlock &block); std::shared_ptr vfioContainer; protected: - // Cache a set of already mapped memory blocks - std::set memoryBlocksMapped; + // Keep a map of already mapped memory blocks + std::map> memoryBlocksMapped; Logger logger; }; diff --git a/fpga/include/villas/fpga/ips/dma.hpp b/fpga/include/villas/fpga/ips/dma.hpp index 8b9d49c04..5f4f21a0f 100644 --- a/fpga/include/villas/fpga/ips/dma.hpp +++ b/fpga/include/villas/fpga/ips/dma.hpp @@ -50,7 +50,7 @@ public: bool memcpy(const MemoryBlock &src, const MemoryBlock &dst, size_t len); - void makeAccesibleFromVA(const MemoryBlock &mem); + void makeAccesibleFromVA(std::shared_ptr mem); bool makeInaccesibleFromVA(const MemoryBlock &mem); inline @@ -134,8 +134,8 @@ private: // Depending on alignment, the actual number of BDs usable can be smaller static constexpr size_t requestedRingBdSize = 2048; uint32_t actualRingBdSize = XAxiDma_BdRingCntCalc(XAXIDMA_BD_MINIMUM_ALIGNMENT, requestedRingBdSize); - MemoryBlock::UniquePtr sgRingTx; - MemoryBlock::UniquePtr sgRingRx; + std::shared_ptr sgRingTx; + std::shared_ptr sgRingRx; }; class DmaFactory : NodeFactory { diff --git a/fpga/lib/card.cpp b/fpga/lib/card.cpp index 3d8dda65c..ac97e1f07 100644 --- a/fpga/lib/card.cpp +++ b/fpga/lib/card.cpp @@ -22,13 +22,13 @@ Card::~Card() // Unmap all memory blocks for (auto &mappedMemoryBlock : memoryBlocksMapped) { - auto translation = mm.getTranslation(addrSpaceIdDeviceToHost, mappedMemoryBlock); + auto translation = mm.getTranslation(addrSpaceIdDeviceToHost, mappedMemoryBlock.first); const uintptr_t iova = translation.getLocalAddr(0); const size_t size = translation.getSize(); logger->debug("Unmap block {} at IOVA {:#x} of size {:#x}", - mappedMemoryBlock, iova, size); + mappedMemoryBlock.first, iova, size); vfioContainer->memoryUnmap(iova, size); } } @@ -57,82 +57,73 @@ std::shared_ptr Card::lookupIp(const Vlnv &vlnv) const std::shared_ptr Card::lookupIp(const ip::IpIdentifier &id) const { - for(auto &ip : ips) { - if(*ip == id) { - return ip; - } - } + for (auto &ip : ips) { + if (*ip == id) { + return ip; + } + } - return nullptr; + return nullptr; } -bool Card::unmapMemoryBlock(const MemoryBlock &block) +bool Card::unmapMemoryBlock(const MemoryBlock& block) { - if(memoryBlocksMapped.find(block.getAddrSpaceId()) - == memoryBlocksMapped.end()) { - throw std::runtime_error( - "Block " + std::to_string(block.getAddrSpaceId()) - + " is not mapped but was requested to be unmapped."); - } + if (memoryBlocksMapped.find(block.getAddrSpaceId()) == memoryBlocksMapped.end()) { + throw std::runtime_error("Block " + std::to_string(block.getAddrSpaceId()) + " is not mapped but was requested to be unmapped."); + } - auto &mm = MemoryManager::get(); + auto &mm = MemoryManager::get(); - auto translation = mm.getTranslation(addrSpaceIdDeviceToHost, - block.getAddrSpaceId()); + auto translation = mm.getTranslation(addrSpaceIdDeviceToHost, block.getAddrSpaceId()); - const uintptr_t iova = translation.getLocalAddr(0); - const size_t size = translation.getSize(); + const uintptr_t iova = translation.getLocalAddr(0); + const size_t size = translation.getSize(); - logger->debug("Unmap block {} at IOVA {:#x} of size {:#x}", - block.getAddrSpaceId(), - iova, - size); - vfioContainer->memoryUnmap(iova, size); + logger->debug("Unmap block {} at IOVA {:#x} of size {:#x}", + block.getAddrSpaceId(), iova, size); + vfioContainer->memoryUnmap(iova, size); - memoryBlocksMapped.erase(block.getAddrSpaceId()); - - return true; -} - -bool Card::mapMemoryBlock(const MemoryBlock &block) -{ - if(not vfioContainer->isIommuEnabled()) { - logger->warn("VFIO mapping not supported without IOMMU"); - return false; - } - - auto &mm = MemoryManager::get(); - const auto &addrSpaceId = block.getAddrSpaceId(); - - if(memoryBlocksMapped.find(addrSpaceId) != memoryBlocksMapped.end()) - // Block already mapped - return true; - else - logger->debug("Create VFIO mapping for {}", addrSpaceId); - - auto translationFromProcess - = mm.getTranslationFromProcess(addrSpaceId); - uintptr_t processBaseAddr = translationFromProcess.getLocalAddr(0); - uintptr_t iovaAddr = vfioContainer->memoryMap(processBaseAddr, - UINTPTR_MAX, - block.getSize()); - - if(iovaAddr == UINTPTR_MAX) { - logger->error("Cannot map memory at {:#x} of size {:#x}", - processBaseAddr, - block.getSize()); - return false; - } - - mm.createMapping(iovaAddr, - 0, - block.getSize(), - "VFIO-D2H", - this->addrSpaceIdDeviceToHost, - addrSpaceId); - - // Remember that this block has already been mapped for later - memoryBlocksMapped.insert(addrSpaceId); + memoryBlocksMapped.erase(block.getAddrSpaceId()); + + return true; +} + + +bool Card::mapMemoryBlock(const std::shared_ptr block) +{ + if (not vfioContainer->isIommuEnabled()) { + logger->warn("VFIO mapping not supported without IOMMU"); + return false; + } + + auto &mm = MemoryManager::get(); + const auto &addrSpaceId = block->getAddrSpaceId(); + + if (memoryBlocksMapped.find(addrSpaceId) != memoryBlocksMapped.end()) + // Block already mapped + return true; + else + logger->debug("Create VFIO mapping for {}", addrSpaceId); + + auto translationFromProcess = mm.getTranslationFromProcess(addrSpaceId); + uintptr_t processBaseAddr = translationFromProcess.getLocalAddr(0); + uintptr_t iovaAddr = vfioContainer->memoryMap(processBaseAddr, + UINTPTR_MAX, + block->getSize()); + + if (iovaAddr == UINTPTR_MAX) { + logger->error("Cannot map memory at {:#x} of size {:#x}", + processBaseAddr, block->getSize()); + return false; + } + + mm.createMapping(iovaAddr, 0, block->getSize(), + "VFIO-D2H", + this->addrSpaceIdDeviceToHost, + addrSpaceId); + + // Remember that this block has already been mapped for later + memoryBlocksMapped.insert({addrSpaceId, block}); return true; } diff --git a/fpga/lib/ips/dma.cpp b/fpga/lib/ips/dma.cpp index da5ff4a30..c129b6f82 100644 --- a/fpga/lib/ips/dma.cpp +++ b/fpga/lib/ips/dma.cpp @@ -84,7 +84,7 @@ void Dma::setupScatterGatherRingRx() auto &alloc = villas::HostRam::getAllocator(); sgRingRx = alloc.allocateBlock(requestedRingBdSize * sizeof(uint16_t) * XAXIDMA_BD_NUM_WORDS); - if (not card->mapMemoryBlock(*sgRingRx)) + if (not card->mapMemoryBlock(sgRingRx)) throw RuntimeError("Memory not accessible by DMA"); auto &mm = MemoryManager::get(); @@ -129,7 +129,7 @@ void Dma::setupScatterGatherRingTx() auto &alloc = villas::HostRam::getAllocator(); sgRingTx = alloc.allocateBlock(requestedRingBdSize * sizeof(uint16_t) * XAXIDMA_BD_NUM_WORDS); - if (not card->mapMemoryBlock(*sgRingTx)) + if (not card->mapMemoryBlock(sgRingTx)) throw RuntimeError("Memory not accessible by DMA"); auto &mm = MemoryManager::get(); @@ -564,11 +564,11 @@ Dma::readCompleteSimple() return bytesRead; } -void Dma::makeAccesibleFromVA(const MemoryBlock &mem) +void Dma::makeAccesibleFromVA(std::shared_ptr mem) { // Only symmetric mapping supported currently - if (isMemoryBlockAccesible(mem, s2mmInterface) and - isMemoryBlockAccesible(mem, mm2sInterface)) + if (isMemoryBlockAccesible(*mem, s2mmInterface) and + isMemoryBlockAccesible(*mem, mm2sInterface)) return; // Try mapping via FPGA-card (VFIO) @@ -576,8 +576,8 @@ void Dma::makeAccesibleFromVA(const MemoryBlock &mem) throw RuntimeError("Memory not accessible by DMA"); // Sanity-check if mapping worked, this shouldn't be neccessary - if (not isMemoryBlockAccesible(mem, s2mmInterface) or - not isMemoryBlockAccesible(mem, mm2sInterface)) + if (not isMemoryBlockAccesible(*mem, s2mmInterface) or + not isMemoryBlockAccesible(*mem, mm2sInterface)) throw RuntimeError("Mapping memory via card didn't work, but reported success?!"); } diff --git a/fpga/src/villas-fpga-ctrl.cpp b/fpga/src/villas-fpga-ctrl.cpp index bb5a93b61..a15112bb1 100644 --- a/fpga/src/villas-fpga-ctrl.cpp +++ b/fpga/src/villas-fpga-ctrl.cpp @@ -36,6 +36,56 @@ static std::shared_ptr pciDevices; static auto logger = villas::logging.get("ctrl"); +void readFromDmaToStdOut(std::shared_ptr dma) +{ + auto &alloc = villas::HostRam::getAllocator(); + + const std::shared_ptr block[] = { + alloc.allocateBlock(0x200 * sizeof(uint32_t)), + alloc.allocateBlock(0x200 * sizeof(uint32_t)) + }; + villas::MemoryAccessor mem[] = {*block[0], *block[1]}; + + for (auto b : block) { + dma->makeAccesibleFromVA(b); + } + + auto &mm = MemoryManager::get(); + mm.getGraph().dump("graph.dot"); + + // Setup read transfer + dma->read(*block[0], block[0]->getSize()); + size_t cur = 0, next = 1; + while (true) { + dma->read(*block[next], block[next]->getSize()); + auto bytesRead = dma->readComplete(); + // Setup read transfer + + //auto valuesRead = bytesRead / sizeof(int32_t); + //logger->info("Read {} bytes", bytesRead); + + //for (size_t i = 0; i < valuesRead; i++) + // std::cerr << std::hex << mem[i] << ";"; + //std::cerr << std::endl; + + for (size_t i = 0; i*4 < bytesRead; i++) { + int32_t ival = mem[cur][i]; + float fval = *((float*)(&ival)); // cppcheck-suppress invalidPointerCast + //std::cerr << std::hex << ival << ","; + std::cerr << fval << std::endl; + /*int64_t ival = (int64_t)(mem[1] & 0xFFFF) << 48 | + (int64_t)(mem[1] & 0xFFFF0000) << 16 | + (int64_t)(mem[0] & 0xFFFF) << 16 | + (int64_t)(mem[0] & 0xFFFF0000) >> 16; + double dval = *((double*)(&ival)); + std::cerr << std::hex << ival << "," << dval << std::endl; + bytesRead -= 8;*/ + //logger->info("Read value: {}", dval); + } + cur = next; + next = (next + 1) % (sizeof(mem)/sizeof(mem[0])); + } +} int main(int argc, char* argv[]) { @@ -66,12 +116,7 @@ int main(int argc, char* argv[]) return 1; } - // This must be called before card is intialized, because the card descructor - // still accesses the allocated memory. This order ensures that the allocator - // is destroyed AFTER the card. - auto &alloc = villas::HostRam::getAllocator(); - villas::MemoryAccessor mem[] = {alloc.allocate(0x200), alloc.allocate(0x200)}; - const villas::MemoryBlock block[] = {mem[0].getMemoryBlock(), mem[1].getMemoryBlock()}; + auto card = fpga::setupFpgaCard(configFile, fpgaName); @@ -103,45 +148,7 @@ int main(int argc, char* argv[]) parsedConnectString.configCrossBar(dma, aurora_channels); if (!noDma) { - for (auto b : block) { - dma->makeAccesibleFromVA(b); - } - - auto &mm = MemoryManager::get(); - mm.getGraph().dump("graph.dot"); - - // Setup read transfer - dma->read(block[0], block[0].getSize()); - size_t cur = 0, next = 1; - while (true) { - dma->read(block[next], block[next].getSize()); - auto bytesRead = dma->readComplete(); - // Setup read transfer - - //auto valuesRead = bytesRead / sizeof(int32_t); - //logger->info("Read {} bytes", bytesRead); - - //for (size_t i = 0; i < valuesRead; i++) - // std::cerr << std::hex << mem[i] << ";"; - //std::cerr << std::endl; - - for (size_t i = 0; i*4 < bytesRead; i++) { - int32_t ival = mem[cur][i]; - float fval = *((float*)(&ival)); // cppcheck-suppress invalidPointerCast - //std::cerr << std::hex << ival << ","; - std::cerr << fval << std::endl; - /*int64_t ival = (int64_t)(mem[1] & 0xFFFF) << 48 | - (int64_t)(mem[1] & 0xFFFF0000) << 16 | - (int64_t)(mem[0] & 0xFFFF) << 16 | - (int64_t)(mem[0] & 0xFFFF0000) >> 16; - double dval = *((double*)(&ival)); - std::cerr << std::hex << ival << "," << dval << std::endl; - bytesRead -= 8;*/ - //logger->info("Read value: {}", dval); - } - cur = next; - next = (next + 1) % (sizeof(mem)/sizeof(mem[0])); - } + readFromDmaToStdOut(std::move(dma)); } } catch (const RuntimeError &e) { logger->error("Error: {}", e.what()); diff --git a/fpga/tests/unit/dma.cpp b/fpga/tests/unit/dma.cpp index d208d47cb..d396b1885 100644 --- a/fpga/tests/unit/dma.cpp +++ b/fpga/tests/unit/dma.cpp @@ -63,12 +63,15 @@ Test(fpga, dma, .description = "DMA") auto dst = bram->getAllocator().allocate(len); #else // ... only works with IOMMU enabled currently - auto src = HostRam::getAllocator().allocate(len); - auto dst = HostRam::getAllocator().allocate(len); + auto &alloc = villas::HostRam::getAllocator(); + const std::shared_ptr srcBlock = alloc.allocateBlock(len); + const std::shared_ptr dstBlock = alloc.allocateBlock(len); + villas::MemoryAccessor src(*srcBlock); + villas::MemoryAccessor dst(*dstBlock); #endif // Make sure memory is accessible for DMA - dma->makeAccesibleFromVA(src.getMemoryBlock()); - dma->makeAccesibleFromVA(dst.getMemoryBlock()); + dma->makeAccesibleFromVA(srcBlock); + dma->makeAccesibleFromVA(dstBlock); // Get new random data const size_t lenRandom = utils::readRandom(&src, len); From 5ab6007909c371d6dc29ef1e2ffc485a3e02618b Mon Sep 17 00:00:00 2001 From: Niklas Eiling Date: Fri, 6 Jan 2023 11:14:34 +0100 Subject: [PATCH 04/15] small code review Signed-off-by: Niklas Eiling --- fpga/include/villas/fpga/card.hpp | 8 +++----- fpga/lib/utils.cpp | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fpga/include/villas/fpga/card.hpp b/fpga/include/villas/fpga/card.hpp index e1b8a4566..3f0e3891d 100644 --- a/fpga/include/villas/fpga/card.hpp +++ b/fpga/include/villas/fpga/card.hpp @@ -36,16 +36,14 @@ public: virtual ~Card(); - + bool mapMemoryBlock(const std::shared_ptr block); + bool unmapMemoryBlock(const MemoryBlock &block); + std::shared_ptr vfioContainer; std::shared_ptr lookupIp(const std::string &name) const; std::shared_ptr lookupIp(const Vlnv &vlnv) const; std::shared_ptr lookupIp(const ip::IpIdentifier &id) const; - bool mapMemoryBlock(const std::shared_ptr block); - bool unmapMemoryBlock(const MemoryBlock &block); - std::shared_ptr vfioContainer; - protected: // Keep a map of already mapped memory blocks std::map> memoryBlocksMapped; diff --git a/fpga/lib/utils.cpp b/fpga/lib/utils.cpp index 071336cef..6fdede233 100644 --- a/fpga/lib/utils.cpp +++ b/fpga/lib/utils.cpp @@ -63,7 +63,7 @@ void fpga::ConnectString::parseString(std::string& connectString) return; } - std::regex regex("([0-9]+)([<\\->]+)([0-9]+|stdin|stdout)"); + static const std::regex regex("([0-9]+)([<\\->]+)([0-9]+|stdin|stdout)"); std::smatch match; if (!std::regex_match(connectString, match, regex) || match.size() != 4) { From 498af9fd1c5b23e084382adfa51dd95677527a14 Mon Sep 17 00:00:00 2001 From: Niklas Eiling Date: Fri, 6 Jan 2023 17:21:47 +0100 Subject: [PATCH 05/15] ips/dma: make read correctly wait on interrupts Modify villas-fpga-ctrl to fit the new behavior of Dma. Makes reading from DMA work even when we are too slow and only receive partial batches of BDs. Signed-off-by: Niklas Eiling --- fpga/include/villas/fpga/ips/dma.hpp | 3 +- fpga/lib/ips/dma.cpp | 42 ++++++++-------------------- fpga/src/villas-fpga-ctrl.cpp | 41 ++++++++++++++------------- 3 files changed, 35 insertions(+), 51 deletions(-) diff --git a/fpga/include/villas/fpga/ips/dma.hpp b/fpga/include/villas/fpga/ips/dma.hpp index 5f4f21a0f..ed29786f2 100644 --- a/fpga/include/villas/fpga/ips/dma.hpp +++ b/fpga/include/villas/fpga/ips/dma.hpp @@ -133,7 +133,8 @@ private: // When using SG: ringBdSize is the maximum number of BDs usable in the ring // Depending on alignment, the actual number of BDs usable can be smaller static constexpr size_t requestedRingBdSize = 2048; - uint32_t actualRingBdSize = XAxiDma_BdRingCntCalc(XAXIDMA_BD_MINIMUM_ALIGNMENT, requestedRingBdSize); + static constexpr size_t requestedRingBdSizeMemory = requestedRingBdSize * sizeof(XAxiDma_Bd); + uint32_t actualRingBdSize = XAxiDma_BdRingCntCalc(XAXIDMA_BD_MINIMUM_ALIGNMENT, requestedRingBdSizeMemory); std::shared_ptr sgRingTx; std::shared_ptr sgRingRx; }; diff --git a/fpga/lib/ips/dma.cpp b/fpga/lib/ips/dma.cpp index c129b6f82..92aae09ae 100644 --- a/fpga/lib/ips/dma.cpp +++ b/fpga/lib/ips/dma.cpp @@ -82,7 +82,7 @@ void Dma::setupScatterGatherRingRx() // Allocate and map space for BD ring in host RAM auto &alloc = villas::HostRam::getAllocator(); - sgRingRx = alloc.allocateBlock(requestedRingBdSize * sizeof(uint16_t) * XAXIDMA_BD_NUM_WORDS); + sgRingRx = alloc.allocateBlock(requestedRingBdSizeMemory); if (not card->mapMemoryBlock(sgRingRx)) throw RuntimeError("Memory not accessible by DMA"); @@ -127,7 +127,7 @@ void Dma::setupScatterGatherRingTx() // Allocate and map space for BD ring in host RAM auto &alloc = villas::HostRam::getAllocator(); - sgRingTx = alloc.allocateBlock(requestedRingBdSize * sizeof(uint16_t) * XAXIDMA_BD_NUM_WORDS); + sgRingTx = alloc.allocateBlock(requestedRingBdSizeMemory); if (not card->mapMemoryBlock(sgRingTx)) throw RuntimeError("Memory not accessible by DMA"); @@ -256,8 +256,6 @@ bool Dma::read(const MemoryBlock &mem, size_t len) if (buf == nullptr) throw RuntimeError("Buffer was null"); - logger->debug("Read from stream and write to address {:p}", buf); - return hasScatterGather() ? readScatterGather(buf, len) : readSimple(buf, len); } @@ -396,43 +394,27 @@ Dma::readCompleteScatterGather() auto rxRing = XAxiDma_GetRxRing(&xDma); int ret = XST_FAILURE; size_t bytesRead = 0; + static size_t errcnt = 32; + + //auto intrNum = + irqs[s2mmInterrupt].irqController->waitForInterrupt(irqs[s2mmInterrupt].num); // Wait until the data has been received by the RX channel. if ((processedBds = XAxiDma_BdRingFromHw(rxRing, readCoalesce, &bd)) < readCoalesce) { - if (processedBds != 0) { - //Ignore partial batches - logger->warn("Ignoring partial batch of {} BDs.", processedBds); - ret = XAxiDma_BdRingFree(rxRing, processedBds, bd); - if (ret != XST_SUCCESS) - throw RuntimeError("Failed to free {} RX BDs {}", processedBds, ret); + logger->warn("Got partial batch of {}/{} BDs.", processedBds, readCoalesce); + if(errcnt-- == 0) { + throw RuntimeError("too many partial batches"); } - //auto intrNum = - irqs[s2mmInterrupt].irqController->waitForInterrupt(irqs[s2mmInterrupt].num); - //If we got a partial batch on the first call, we have to receive up to readCoalesce*2 - //to make sure we get a full batch of readCoalesce messages - processedBds = XAxiDma_BdRingFromHw(rxRing, readCoalesce*2, &bd); } - if(processedBds < readCoalesce) { - // We got less than we expected. We already tried two times so let's give up. - throw RuntimeError("Read only {} BDs, expected {}.", processedBds, readCoalesce); - } else if(processedBds > readCoalesce) { - // If the first try was a partial batch, we receive two batches on the second try - // We ignore the first batch and only process the second one - while (processedBds > readCoalesce) { - bd = (XAxiDma_Bd *) XAxiDma_BdRingNext(rxRing, bd); - processedBds--; - } - ret = XAxiDma_BdRingFree(rxRing, processedBds-readCoalesce, bd); - if (ret != XST_SUCCESS) - throw RuntimeError("Failed to free {} RX BDs {}", processedBds, ret); - } - // At this point we have exactly readCoalesce BDs. // Acknowledge the interrupt. Has no effect if no interrupt has occured. auto irqStatus = XAxiDma_BdRingGetIrq(rxRing); XAxiDma_BdRingAckIrq(rxRing, irqStatus); + if (processedBds == 0) + return 0; + if (bd == nullptr) throw RuntimeError("Bd was null."); diff --git a/fpga/src/villas-fpga-ctrl.cpp b/fpga/src/villas-fpga-ctrl.cpp index a15112bb1..b6de02200 100644 --- a/fpga/src/villas-fpga-ctrl.cpp +++ b/fpga/src/villas-fpga-ctrl.cpp @@ -53,37 +53,38 @@ void readFromDmaToStdOut(std::shared_ptr dma) auto &mm = MemoryManager::get(); mm.getGraph().dump("graph.dot"); + + size_t cur = 0, next = 1; + std::ios::sync_with_stdio(false); + size_t samplecnt = 0; + static const char outputfmt[] = "%05zd: %7f\n"; + static const size_t outputfmtSize = 16; + char outputbuf[16][outputfmtSize] = {0}; + size_t bytesRead; + // Setup read transfer dma->read(*block[0], block[0]->getSize()); - size_t cur = 0, next = 1; + while (true) { + //logger->debug("Read from stream and write to address {:p}", *block[next]); dma->read(*block[next], block[next]->getSize()); - auto bytesRead = dma->readComplete(); - // Setup read transfer - - //auto valuesRead = bytesRead / sizeof(int32_t); - //logger->info("Read {} bytes", bytesRead); - - //for (size_t i = 0; i < valuesRead; i++) - // std::cerr << std::hex << mem[i] << ";"; - //std::cerr << std::endl; + bytesRead = dma->readComplete(); for (size_t i = 0; i*4 < bytesRead; i++) { int32_t ival = mem[cur][i]; float fval = *((float*)(&ival)); // cppcheck-suppress invalidPointerCast //std::cerr << std::hex << ival << ","; - std::cerr << fval << std::endl; - /*int64_t ival = (int64_t)(mem[1] & 0xFFFF) << 48 | - (int64_t)(mem[1] & 0xFFFF0000) << 16 | - (int64_t)(mem[0] & 0xFFFF) << 16 | - (int64_t)(mem[0] & 0xFFFF0000) >> 16; - double dval = *((double*)(&ival)); - std::cerr << std::hex << ival << "," << dval << std::endl; - bytesRead -= 8;*/ - //logger->info("Read value: {}", dval); + //std::cout << samplecnt++ << ": " << fval << '\n'; + if (std::snprintf(outputbuf[i], outputfmtSize+1, outputfmt, (samplecnt++%100000), fval) > (int)outputfmtSize) { + throw RuntimeError("Output buffer too small"); + } } + for (size_t i = 0; i < sizeof(outputbuf)/sizeof(outputbuf[0])-bytesRead/4; i++) { + outputbuf[i][0] = '\0'; + } + std::cout << *outputbuf << std::flush; cur = next; - next = (next + 1) % (sizeof(mem)/sizeof(mem[0])); + next = (next + 1) % (sizeof(mem) / sizeof(mem[0])); } } From ce1e8e28ce047d9f77f8e82b5755949a454607e7 Mon Sep 17 00:00:00 2001 From: Niklas Eiling Date: Mon, 9 Jan 2023 14:12:32 +0100 Subject: [PATCH 06/15] move formatting of printing to stdout to separate class and make in configurable Signed-off-by: Niklas Eiling --- fpga/include/villas/fpga/utils.hpp | 81 ++++++++++++++++++++++++++++++ fpga/src/villas-fpga-ctrl.cpp | 29 ++++------- 2 files changed, 92 insertions(+), 18 deletions(-) diff --git a/fpga/include/villas/fpga/utils.hpp b/fpga/include/villas/fpga/utils.hpp index fae15afde..ecbcacc2b 100644 --- a/fpga/include/villas/fpga/utils.hpp +++ b/fpga/include/villas/fpga/utils.hpp @@ -62,6 +62,87 @@ protected: bool dmaLoopback; }; +class BufferedSampleFormatter { +public: + virtual void format(float value) = 0; + virtual void output(std::ostream& out) + { + out << buf.data() << std::flush; + clearBuf(); + } + virtual void clearBuf() + { + for (size_t i = 0; i < bufSamples && buf[i*bufSampleSize] != '\0'; i++) { + buf[i*bufSampleSize] = '\0'; + } + currentBufLoc = 0; + } +protected: + std::vector buf; + const size_t bufSamples; + const size_t bufSampleSize; + size_t currentBufLoc; + + BufferedSampleFormatter(const size_t bufSamples, const size_t bufSampleSize) : + buf(bufSamples*bufSampleSize+1), // Leave room for a final `\0' + bufSamples(bufSamples), + bufSampleSize(bufSampleSize), + currentBufLoc(0) {}; + BufferedSampleFormatter() = delete; + BufferedSampleFormatter(const BufferedSampleFormatter&) = delete; + virtual char* nextBufPos() + { + return &buf[(currentBufLoc++)*bufSampleSize]; + } +}; + +class BufferedSampleFormatterShort : public BufferedSampleFormatter { +public: + BufferedSampleFormatterShort(size_t bufSizeInSamples) : + BufferedSampleFormatter(bufSizeInSamples, formatStringSize) {}; + + virtual void format(float value) override + { + if (std::snprintf(nextBufPos(), formatStringSize+1, formatString, value) > (int)formatStringSize) { + throw RuntimeError("Output buffer too small"); + } + } + +protected: + static constexpr char formatString[] = "%7f\n"; + static constexpr size_t formatStringSize = 9; +}; + +class BufferedSampleFormatterLong : public BufferedSampleFormatter { +public: + BufferedSampleFormatterLong(size_t bufSizeInSamples) : + BufferedSampleFormatter(bufSizeInSamples, formatStringSize), + sampleCnt(0) {}; + + virtual void format(float value) override + { + if (std::snprintf(nextBufPos(), formatStringSize+1, formatString, sampleCnt, value) > (int)formatStringSize) { + throw RuntimeError("Output buffer too small"); + } + sampleCnt = (sampleCnt+1)%100000; + } + +protected: + static constexpr char formatString[] = "%05zd: %7f\n"; + static constexpr size_t formatStringSize = 16; + size_t sampleCnt; +}; + +std::unique_ptr getBufferedSampleFormatter(const std::string &format, size_t bufSizeInSamples) +{ + if (format == "long") { + return std::make_unique(bufSizeInSamples); + } else if (format == "short") { + return std::make_unique(bufSizeInSamples); + } else { + throw RuntimeError("Unknown output format '{}'", format); + } +} } /* namespace fpga */ } /* namespace villas */ diff --git a/fpga/src/villas-fpga-ctrl.cpp b/fpga/src/villas-fpga-ctrl.cpp index b6de02200..2d73f7c4f 100644 --- a/fpga/src/villas-fpga-ctrl.cpp +++ b/fpga/src/villas-fpga-ctrl.cpp @@ -36,7 +36,8 @@ static std::shared_ptr pciDevices; static auto logger = villas::logging.get("ctrl"); -void readFromDmaToStdOut(std::shared_ptr dma) +void readFromDmaToStdOut(std::shared_ptr dma, + std::unique_ptr formatter) { auto &alloc = villas::HostRam::getAllocator(); @@ -56,33 +57,24 @@ void readFromDmaToStdOut(std::shared_ptr dma) size_t cur = 0, next = 1; std::ios::sync_with_stdio(false); - size_t samplecnt = 0; - static const char outputfmt[] = "%05zd: %7f\n"; - static const size_t outputfmtSize = 16; - char outputbuf[16][outputfmtSize] = {0}; size_t bytesRead; // Setup read transfer dma->read(*block[0], block[0]->getSize()); while (true) { - //logger->debug("Read from stream and write to address {:p}", *block[next]); + logger->trace("Read from stream and write to address {}:{:p}", block[next]->getAddrSpaceId(), block[next]->getOffset()); + // We could use the number of interrupts to determine if we missed a chunk of data dma->read(*block[next], block[next]->getSize()); bytesRead = dma->readComplete(); for (size_t i = 0; i*4 < bytesRead; i++) { int32_t ival = mem[cur][i]; float fval = *((float*)(&ival)); // cppcheck-suppress invalidPointerCast - //std::cerr << std::hex << ival << ","; - //std::cout << samplecnt++ << ": " << fval << '\n'; - if (std::snprintf(outputbuf[i], outputfmtSize+1, outputfmt, (samplecnt++%100000), fval) > (int)outputfmtSize) { - throw RuntimeError("Output buffer too small"); - } + formatter->format(fval); } - for (size_t i = 0; i < sizeof(outputbuf)/sizeof(outputbuf[0])-bytesRead/4; i++) { - outputbuf[i][0] = '\0'; - } - std::cout << *outputbuf << std::flush; + formatter->output(std::cout); + cur = next; next = (next + 1) % (sizeof(mem) / sizeof(mem[0])); } @@ -104,6 +96,8 @@ int main(int argc, char* argv[]) app.add_option("-x,--connect", connectStr, "Connect a FPGA port with another or stdin/stdout"); bool noDma = false; app.add_flag("--no-dma", noDma, "Do not setup DMA, only setup FPGA and Crossbar links"); + std::string outputFormat = "short"; + app.add_option("--output-format", outputFormat, "Output format (short, long)"); app.parse(argc, argv); @@ -117,8 +111,6 @@ int main(int argc, char* argv[]) return 1; } - - auto card = fpga::setupFpgaCard(configFile, fpgaName); std::vector> aurora_channels; @@ -149,7 +141,8 @@ int main(int argc, char* argv[]) parsedConnectString.configCrossBar(dma, aurora_channels); if (!noDma) { - readFromDmaToStdOut(std::move(dma)); + auto formatter = fpga::getBufferedSampleFormatter(outputFormat, 16); + readFromDmaToStdOut(std::move(dma), std::move(formatter)); } } catch (const RuntimeError &e) { logger->error("Error: {}", e.what()); From 8ff0370d3679f85eed097756606ac729c0c241e8 Mon Sep 17 00:00:00 2001 From: Niklas Eiling Date: Mon, 9 Jan 2023 14:23:18 +0100 Subject: [PATCH 07/15] fix license of utils.hpp Signed-off-by: Niklas Eiling --- fpga/include/villas/fpga/utils.hpp | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/fpga/include/villas/fpga/utils.hpp b/fpga/include/villas/fpga/utils.hpp index ecbcacc2b..85a0031b4 100644 --- a/fpga/include/villas/fpga/utils.hpp +++ b/fpga/include/villas/fpga/utils.hpp @@ -1,24 +1,7 @@ /** Helper function for directly using VILLASfpga outside of VILLASnode - * - * @file - * @author Niklas Eiling - * @copyright 2022, Steffen Vogel, Niklas Eiling - * @license GNU General Public License (version 3) - * - * VILLASfpga - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . + * Author: Niklas Eiling + * SPDX-FileCopyrightText: 2022 Niklas Eiling + * SPDX-License-Identifier: Apache-2.0 *********************************************************************************/ #pragma once From ab39f57405b46fbbc193167862f620bc784ba5a5 Mon Sep 17 00:00:00 2001 From: Niklas Eiling Date: Tue, 10 Jan 2023 17:05:47 +0100 Subject: [PATCH 08/15] add more configuration options to villas-fpga-ctrl Signed-off-by: Niklas Eiling --- fpga/common | 2 +- fpga/lib/utils.cpp | 2 +- fpga/src/villas-fpga-ctrl.cpp | 22 +++++++++++++++------- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/fpga/common b/fpga/common index 729b877a4..90e0c3df7 160000 --- a/fpga/common +++ b/fpga/common @@ -1 +1 @@ -Subproject commit 729b877a405b3bd80205fa1c54bfedbf2f030dc2 +Subproject commit 90e0c3df70200f0eccbb3b145393f81e31e15ebb diff --git a/fpga/lib/utils.cpp b/fpga/lib/utils.cpp index 6fdede233..470210aa2 100644 --- a/fpga/lib/utils.cpp +++ b/fpga/lib/utils.cpp @@ -105,7 +105,7 @@ int fpga::ConnectString::portStringToInt(std::string &str) const } -// parses a string lik "1->2" or "1<->stdout" and configures the crossbar +// parses a string like "1->2" or "1<->stdout" and configures the crossbar accordingly void fpga::ConnectString::configCrossBar(std::shared_ptr dma, std::vector>& aurora_channels) const { diff --git a/fpga/src/villas-fpga-ctrl.cpp b/fpga/src/villas-fpga-ctrl.cpp index 2d73f7c4f..86b2d2981 100644 --- a/fpga/src/villas-fpga-ctrl.cpp +++ b/fpga/src/villas-fpga-ctrl.cpp @@ -51,9 +51,6 @@ void readFromDmaToStdOut(std::shared_ptr dma, dma->makeAccesibleFromVA(b); } - auto &mm = MemoryManager::get(); - mm.getGraph().dump("graph.dot"); - size_t cur = 0, next = 1; std::ios::sync_with_stdio(false); @@ -98,7 +95,10 @@ int main(int argc, char* argv[]) app.add_flag("--no-dma", noDma, "Do not setup DMA, only setup FPGA and Crossbar links"); std::string outputFormat = "short"; app.add_option("--output-format", outputFormat, "Output format (short, long)"); - + bool dumpGraph = false; + app.add_flag("--dump-graph", dumpGraph, "Dumps the graph of memory regions into \"graph.dot\""); + bool dumpAuroraChannels = true; + app.add_flag("--dump-aurora", dumpAuroraChannels, "Dumps the detected Aurora channels."); app.parse(argc, argv); // Logging setup @@ -133,16 +133,24 @@ int main(int argc, char* argv[]) return 1; } - for (auto aurora : aurora_channels) - aurora->dump(); + if (dumpGraph) { + auto &mm = MemoryManager::get(); + mm.getGraph().dump("graph.dot"); + } + if (dumpAuroraChannels) { + for (auto aurora : aurora_channels) + aurora->dump(); + } // Configure Crossbar switch const fpga::ConnectString parsedConnectString(connectStr); parsedConnectString.configCrossBar(dma, aurora_channels); - if (!noDma) { + if (!noDma && parsedConnectString.isDstStdout()) { auto formatter = fpga::getBufferedSampleFormatter(outputFormat, 16); readFromDmaToStdOut(std::move(dma), std::move(formatter)); + } else if (!noDma && parsedConnectString.isSrcStdin()) { + } } catch (const RuntimeError &e) { logger->error("Error: {}", e.what()); From 590cef10d074bc7606db3866ab2817f199a96f6e Mon Sep 17 00:00:00 2001 From: Niklas Eiling Date: Wed, 11 Jan 2023 15:08:55 +0100 Subject: [PATCH 09/15] add check for missed interrupts when handling reads introduce new struct Completion that is returned by Dma::readCompletion and Dma::writeCompletion Signed-off-by: Niklas Eiling --- fpga/include/villas/fpga/ips/dma.hpp | 42 ++++++++-------- fpga/include/villas/fpga/utils.hpp | 7 ++- fpga/lib/ips/dma.cpp | 73 ++++++++++++++-------------- fpga/src/villas-fpga-ctrl.cpp | 12 +++-- fpga/tests/unit/rtds.cpp | 2 +- 5 files changed, 74 insertions(+), 62 deletions(-) diff --git a/fpga/include/villas/fpga/ips/dma.hpp b/fpga/include/villas/fpga/ips/dma.hpp index ed29786f2..5740db13d 100644 --- a/fpga/include/villas/fpga/ips/dma.hpp +++ b/fpga/include/villas/fpga/ips/dma.hpp @@ -38,12 +38,19 @@ public: // Stream to memory-mapped (S2MM) bool read(const MemoryBlock &mem, size_t len); - size_t writeComplete() + struct Completion { + Completion() : bytes(0), bds(0), interrupts(0) { } + size_t bytes; // Number of bytes transferred + size_t bds; // Number of buffer descriptors used (only for scatter-gather) + size_t interrupts; // Number of interrupts received since last call (only if interrupts enabled) + }; + + Completion writeComplete() { return hasScatterGather() ? writeCompleteScatterGather() : writeCompleteSimple(); } - size_t readComplete() + Completion readComplete() { return hasScatterGather() ? readCompleteScatterGather() : readCompleteSimple(); } @@ -69,22 +76,6 @@ public: return getMasterPort(mm2sPort); } -private: - bool writeScatterGather(const void* buf, size_t len); - bool readScatterGather(void* buf, size_t len); - size_t writeCompleteScatterGather(); - size_t readCompleteScatterGather(); - - bool writeSimple(const void* buf, size_t len); - bool readSimple(void* buf, size_t len); - size_t writeCompleteSimple(); - size_t readCompleteSimple(); - - void setupScatterGather(); - void setupScatterGatherRingRx(); - void setupScatterGatherRingTx(); - -public: static constexpr const char* s2mmPort = "S2MM"; static constexpr const char* mm2sPort = "MM2S"; @@ -92,8 +83,21 @@ public: virtual void dump() override; - private: + bool writeScatterGather(const void* buf, size_t len); + bool readScatterGather(void* buf, size_t len); + Completion writeCompleteScatterGather(); + Completion readCompleteScatterGather(); + + bool writeSimple(const void* buf, size_t len); + bool readSimple(void* buf, size_t len); + Completion writeCompleteSimple(); + Completion readCompleteSimple(); + + void setupScatterGather(); + void setupScatterGatherRingRx(); + void setupScatterGatherRingTx(); + static constexpr char registerMemory[] = "Reg"; static constexpr char mm2sInterrupt[] = "mm2s_introut"; diff --git a/fpga/include/villas/fpga/utils.hpp b/fpga/include/villas/fpga/utils.hpp index 85a0031b4..a6e4a3139 100644 --- a/fpga/include/villas/fpga/utils.hpp +++ b/fpga/include/villas/fpga/utils.hpp @@ -107,7 +107,7 @@ public: if (std::snprintf(nextBufPos(), formatStringSize+1, formatString, sampleCnt, value) > (int)formatStringSize) { throw RuntimeError("Output buffer too small"); } - sampleCnt = (sampleCnt+1)%100000; + sampleCnt = (sampleCnt+1) % 100000; } protected: @@ -116,7 +116,10 @@ protected: size_t sampleCnt; }; -std::unique_ptr getBufferedSampleFormatter(const std::string &format, size_t bufSizeInSamples) + +std::unique_ptr getBufferedSampleFormatter( + const std::string &format, + size_t bufSizeInSamples) { if (format == "long") { return std::make_unique(bufSizeInSamples); diff --git a/fpga/lib/ips/dma.cpp b/fpga/lib/ips/dma.cpp index 92aae09ae..b17d24901 100644 --- a/fpga/lib/ips/dma.cpp +++ b/fpga/lib/ips/dma.cpp @@ -210,10 +210,10 @@ bool Dma::memcpy(const MemoryBlock &src, const MemoryBlock &dst, size_t len) if (this->write(src, len) == 0) return false; - if (not this->writeComplete()) + if (not this->writeComplete().bds) return false; - if (not this->readComplete()) + if (not this->readComplete().bds) return false; return true; @@ -344,20 +344,18 @@ bool Dma::readScatterGather(void *buf, size_t len) return true; } -size_t -Dma::writeCompleteScatterGather() +Dma::Completion Dma::writeCompleteScatterGather() { + Completion c; XAxiDma_Bd *bd = nullptr, *curBd; - size_t processedBds = 0; auto txRing = XAxiDma_GetTxRing(&xDma); int ret = XST_FAILURE; - size_t bytesWritten = 0; - if ((processedBds = XAxiDma_BdRingFromHw(txRing, 1, &bd)) == 0) + if ((c.bds = XAxiDma_BdRingFromHw(txRing, 1, &bd)) == 0) { - /*auto intrNum = */irqs[mm2sInterrupt].irqController->waitForInterrupt(irqs[mm2sInterrupt].num); + c.interrupts = irqs[mm2sInterrupt].irqController->waitForInterrupt(irqs[mm2sInterrupt].num); //logger->info("Got {} interrupts (id: {}) from write channel", intrNum, irqs[mm2sInterrupt].num); - processedBds = XAxiDma_BdRingFromHw(txRing, 1, &bd); + c.bds = XAxiDma_BdRingFromHw(txRing, 1, &bd); } // Acknowledge the interrupt @@ -368,41 +366,38 @@ Dma::writeCompleteScatterGather() throw RuntimeError("Bd was null."); curBd = bd; - for (size_t i = 0; i < processedBds; i++) { + for (size_t i = 0; i < c.bds; i++) { ret = XAxiDma_BdGetSts(curBd); if ((ret & XAXIDMA_BD_STS_ALL_ERR_MASK) || (!(ret & XAXIDMA_BD_STS_COMPLETE_MASK))) { throw RuntimeError("Bd Status register shows error: {}", ret); break; } - bytesWritten += XAxiDma_BdGetLength(bd, txRing->MaxTransferLen); + c.bytes += XAxiDma_BdGetLength(bd, txRing->MaxTransferLen); curBd = (XAxiDma_Bd *) XAxiDma_BdRingNext(txRing, curBd); } - ret = XAxiDma_BdRingFree(txRing, processedBds, bd); + ret = XAxiDma_BdRingFree(txRing, c.bds, bd); if (ret != XST_SUCCESS) - throw RuntimeError("Failed to free {} TX BDs {}", processedBds, ret); + throw RuntimeError("Failed to free {} TX BDs {}", c.bds, ret); - return bytesWritten; + return c; } -size_t -Dma::readCompleteScatterGather() +Dma::Completion Dma::readCompleteScatterGather() { + Completion c; XAxiDma_Bd *bd = nullptr, *curBd; - size_t processedBds = 0; auto rxRing = XAxiDma_GetRxRing(&xDma); int ret = XST_FAILURE; - size_t bytesRead = 0; static size_t errcnt = 32; - //auto intrNum = - irqs[s2mmInterrupt].irqController->waitForInterrupt(irqs[s2mmInterrupt].num); + c.interrupts = irqs[s2mmInterrupt].irqController->waitForInterrupt(irqs[s2mmInterrupt].num); // Wait until the data has been received by the RX channel. - if ((processedBds = XAxiDma_BdRingFromHw(rxRing, readCoalesce, &bd)) < readCoalesce) + if ((c.bds = XAxiDma_BdRingFromHw(rxRing, readCoalesce, &bd)) < readCoalesce) { - logger->warn("Got partial batch of {}/{} BDs.", processedBds, readCoalesce); + logger->warn("Got partial batch of {}/{} BDs.", c.bds, readCoalesce); if(errcnt-- == 0) { throw RuntimeError("too many partial batches"); } @@ -412,30 +407,32 @@ Dma::readCompleteScatterGather() auto irqStatus = XAxiDma_BdRingGetIrq(rxRing); XAxiDma_BdRingAckIrq(rxRing, irqStatus); - if (processedBds == 0) - return 0; + if (c.bds == 0) { + c.bytes = 0; + return c; + } if (bd == nullptr) throw RuntimeError("Bd was null."); curBd = bd; - for (size_t i = 0; i < processedBds; i++) { + for (size_t i = 0; i < c.bds; i++) { ret = XAxiDma_BdGetSts(curBd); if ((ret & XAXIDMA_BD_STS_ALL_ERR_MASK) || (!(ret & XAXIDMA_BD_STS_COMPLETE_MASK))) { throw RuntimeError("Bd Status register shows error: {}", ret); break; } - bytesRead += XAxiDma_BdGetActualLength(bd, rxRing->MaxTransferLen); + c.bytes += XAxiDma_BdGetActualLength(bd, rxRing->MaxTransferLen); curBd = (XAxiDma_Bd *) XAxiDma_BdRingNext(rxRing, curBd); } // Free all processed RX BDs for future transmission. - ret = XAxiDma_BdRingFree(rxRing, processedBds, bd); + ret = XAxiDma_BdRingFree(rxRing, c.bds, bd); if (ret != XST_SUCCESS) - throw RuntimeError("Failed to free {} TX BDs {}.", processedBds, ret); + throw RuntimeError("Failed to free {} TX BDs {}.", c.bds, ret); - return bytesRead; + return c; } bool Dma::writeSimple(const void *buf, size_t len) @@ -518,32 +515,34 @@ bool Dma::readSimple(void *buf, size_t len) return true; } -size_t -Dma::writeCompleteSimple() +Dma::Completion Dma::writeCompleteSimple() { + Completion c; while (!(XAxiDma_IntrGetIrq(&xDma, XAXIDMA_DMA_TO_DEVICE) & XAXIDMA_IRQ_IOC_MASK)) - irqs[mm2sInterrupt].irqController->waitForInterrupt(irqs[mm2sInterrupt]); + c.interrupts = irqs[mm2sInterrupt].irqController->waitForInterrupt(irqs[mm2sInterrupt]); XAxiDma_IntrAckIrq(&xDma, XAXIDMA_IRQ_IOC_MASK, XAXIDMA_DMA_TO_DEVICE); const XAxiDma_BdRing *ring = XAxiDma_GetTxRing(&xDma); const size_t bytesWritten = XAxiDma_ReadReg(ring->ChanBase, XAXIDMA_BUFFLEN_OFFSET); - return bytesWritten; + c.bytes = bytesWritten; + return c; } -size_t -Dma::readCompleteSimple() +Dma::Completion Dma::readCompleteSimple() { + Completion c; while (!(XAxiDma_IntrGetIrq(&xDma, XAXIDMA_DEVICE_TO_DMA) & XAXIDMA_IRQ_IOC_MASK)) - irqs[s2mmInterrupt].irqController->waitForInterrupt(irqs[s2mmInterrupt]); + c.interrupts = irqs[s2mmInterrupt].irqController->waitForInterrupt(irqs[s2mmInterrupt]); XAxiDma_IntrAckIrq(&xDma, XAXIDMA_IRQ_IOC_MASK, XAXIDMA_DEVICE_TO_DMA); const XAxiDma_BdRing *ring = XAxiDma_GetRxRing(&xDma); const size_t bytesRead = XAxiDma_ReadReg(ring->ChanBase, XAXIDMA_BUFFLEN_OFFSET); - return bytesRead; + c.bytes = bytesRead; + return c; } void Dma::makeAccesibleFromVA(std::shared_ptr mem) diff --git a/fpga/src/villas-fpga-ctrl.cpp b/fpga/src/villas-fpga-ctrl.cpp index 86b2d2981..07fcf7dc8 100644 --- a/fpga/src/villas-fpga-ctrl.cpp +++ b/fpga/src/villas-fpga-ctrl.cpp @@ -54,7 +54,6 @@ void readFromDmaToStdOut(std::shared_ptr dma, size_t cur = 0, next = 1; std::ios::sync_with_stdio(false); - size_t bytesRead; // Setup read transfer dma->read(*block[0], block[0]->getSize()); @@ -63,9 +62,16 @@ void readFromDmaToStdOut(std::shared_ptr dma, logger->trace("Read from stream and write to address {}:{:p}", block[next]->getAddrSpaceId(), block[next]->getOffset()); // We could use the number of interrupts to determine if we missed a chunk of data dma->read(*block[next], block[next]->getSize()); - bytesRead = dma->readComplete(); + auto c = dma->readComplete(); - for (size_t i = 0; i*4 < bytesRead; i++) { + if (c.interrupts > 1) { + logger->warn("Missed {} interrupts", c.interrupts - 1); + } + + logger->trace("bytes: {}, intrs: {}, bds: {}", + c.bytes, c.interrupts, c.bds); + + for (size_t i = 0; i*4 < c.bytes; i++) { int32_t ival = mem[cur][i]; float fval = *((float*)(&ival)); // cppcheck-suppress invalidPointerCast formatter->format(fval); diff --git a/fpga/tests/unit/rtds.cpp b/fpga/tests/unit/rtds.cpp index 8002c7dbb..ca16d91d2 100644 --- a/fpga/tests/unit/rtds.cpp +++ b/fpga/tests/unit/rtds.cpp @@ -81,7 +81,7 @@ Test(fpga, rtds, .description = "RTDS") "Failed to initiate DMA read"); // logger->info("Wait read"); - const size_t bytesRead = dma->readComplete(); + const size_t bytesRead = dma->readComplete().bytes; cr_assert(bytesRead > 0, "Failed to complete DMA read"); From e6f035cd3119b0a701c7f3867bfc4df17ac22ae6 Mon Sep 17 00:00:00 2001 From: Niklas Eiling Date: Thu, 12 Jan 2023 12:11:28 +0100 Subject: [PATCH 10/15] add basic thread-safety to ips/dma Signed-off-by: Niklas Eiling --- fpga/include/villas/fpga/ips/dma.hpp | 1 + fpga/lib/ips/dma.cpp | 127 +++++++++++++++++++++------ fpga/src/villas-fpga-ctrl.cpp | 1 - 3 files changed, 101 insertions(+), 28 deletions(-) diff --git a/fpga/include/villas/fpga/ips/dma.hpp b/fpga/include/villas/fpga/ips/dma.hpp index 5740db13d..6e788e1d3 100644 --- a/fpga/include/villas/fpga/ips/dma.hpp +++ b/fpga/include/villas/fpga/ips/dma.hpp @@ -119,6 +119,7 @@ private: XAxiDma xDma; XAxiDma_Config xConfig; + std::mutex hwLock; bool configDone = false; // use polling to wait for DMA completion or interrupts via efds diff --git a/fpga/lib/ips/dma.cpp b/fpga/lib/ips/dma.cpp index b17d24901..378145f47 100644 --- a/fpga/lib/ips/dma.cpp +++ b/fpga/lib/ips/dma.cpp @@ -33,6 +33,7 @@ bool Dma::init() xConfig.BaseAddr = getBaseAddr(registerMemory); + hwLock.lock(); if (XAxiDma_CfgInitialize(&xDma, &xConfig) != XST_SUCCESS) { logger->error("Cannot initialize Xilinx DMA driver"); return false; @@ -45,6 +46,7 @@ bool Dma::init() else logger->debug("DMA selftest passed"); + hwLock.unlock(); // Map buffer descriptors if (hasScatterGather()) { if (actualRingBdSize < 2*readCoalesce || actualRingBdSize < 2*writeCoalesce) { @@ -54,8 +56,6 @@ bool Dma::init() setupScatterGather(); } - - irqs[mm2sInterrupt].irqController->enableInterrupt(irqs[mm2sInterrupt], polling); irqs[s2mmInterrupt].irqController->enableInterrupt(irqs[s2mmInterrupt], polling); @@ -72,6 +72,7 @@ void Dma::setupScatterGatherRingRx() { int ret; + hwLock.lock(); auto *rxRingPtr = XAxiDma_GetRxRing(&xDma); // Disable all RX interrupts before RxBD space setup @@ -111,12 +112,15 @@ void Dma::setupScatterGatherRingRx() ret = XAxiDma_BdRingStart(rxRingPtr); if (ret != XST_SUCCESS) throw RuntimeError("Failed to start TX ring: {}", ret); + + hwLock.unlock(); } void Dma::setupScatterGatherRingTx() { int ret; + hwLock.lock(); auto *txRingPtr = XAxiDma_GetTxRing(&xDma); // Disable all TX interrupts before TxBD space setup @@ -156,6 +160,7 @@ void Dma::setupScatterGatherRingTx() ret = XAxiDma_BdRingStart(txRingPtr); if (ret != XST_SUCCESS) throw RuntimeError("Failed to start TX ring: {}", ret); + hwLock.unlock(); } bool Dma::reset() @@ -164,8 +169,10 @@ bool Dma::reset() return true; } + hwLock.lock(); XAxiDma_IntrDisable(&xDma, XAXIDMA_IRQ_ALL_MASK, XAXIDMA_DMA_TO_DEVICE); XAxiDma_IntrDisable(&xDma, XAXIDMA_IRQ_ALL_MASK, XAXIDMA_DEVICE_TO_DMA); + XAxiDma_Reset(&xDma); @@ -180,6 +187,7 @@ bool Dma::reset() timeout--; } + hwLock.unlock(); logger->error("DMA reset timed out"); return false; @@ -265,23 +273,31 @@ bool Dma::writeScatterGather(const void *buf, size_t len) // buf is address from view of DMA controller int ret = XST_FAILURE; - + hwLock.lock(); auto *txRing = XAxiDma_GetTxRing(&xDma); - if (txRing == nullptr) + if (txRing == nullptr) { + hwLock.unlock(); throw RuntimeError("TxRing was null."); + } XAxiDma_Bd *bd; ret = XAxiDma_BdRingAlloc(txRing, 1, &bd); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("BdRingAlloc returned {}.", ret); + } ret = XAxiDma_BdSetBufAddr(bd, (uintptr_t) buf); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("Setting BdBufAddr to {} returned {}.", buf, ret); + } ret = XAxiDma_BdSetLength(bd, len, txRing->MaxTransferLen); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("Setting BdBufLength to {} returned {}.", len, ret); + } // We have a single descriptor so it is both start and end of the list XAxiDma_BdSetCtrl(bd, XAXIDMA_BD_CTRL_TXEOF_MASK | XAXIDMA_BD_CTRL_TXSOF_MASK); @@ -292,8 +308,12 @@ bool Dma::writeScatterGather(const void *buf, size_t len) // Give control of BD to HW. We should not access it until transfer is finished. // Failure could also indicate that EOF is not set on last Bd ret = XAxiDma_BdRingToHw(txRing, 1, bd); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("Enqueuing Bd and giving control to HW failed {}", ret); + } + + hwLock.unlock(); return true; } @@ -305,25 +325,34 @@ bool Dma::readScatterGather(void *buf, size_t len) if (len < readCoalesce*readMsgSize) throw RuntimeError("Read size is smaller than readCoalesce*msgSize. Cannot setup BDs."); + hwLock.lock(); auto *rxRing = XAxiDma_GetRxRing(&xDma); - if (rxRing == nullptr) + if (rxRing == nullptr) { + hwLock.unlock(); throw RuntimeError("RxRing was null."); + } XAxiDma_Bd *bd; ret = XAxiDma_BdRingAlloc(rxRing, readCoalesce, &bd); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("Failed to alloc BD in RX ring: {}", ret); + } auto curBd = bd; char* curBuf = (char*)buf; for (size_t i = 0; i < readCoalesce; i++) { ret = XAxiDma_BdSetBufAddr(curBd, (uintptr_t) curBuf); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("Failed to set buffer address {:x} on BD {:x}: {}", (uintptr_t) buf, (uintptr_t) bd, ret); + } ret = XAxiDma_BdSetLength(curBd, readMsgSize, rxRing->MaxTransferLen); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("Rx set length {} on BD {:x} failed {}", len, (uintptr_t) bd, ret); + } // Receive BDs do not need to set anything for the control @@ -338,9 +367,12 @@ bool Dma::readScatterGather(void *buf, size_t len) } ret = XAxiDma_BdRingToHw(rxRing, readCoalesce, bd); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("Failed to submit BD to RX ring: {}", ret); + } + hwLock.unlock(); return true; } @@ -350,25 +382,40 @@ Dma::Completion Dma::writeCompleteScatterGather() XAxiDma_Bd *bd = nullptr, *curBd; auto txRing = XAxiDma_GetTxRing(&xDma); int ret = XST_FAILURE; + static size_t errcnt = 32; - if ((c.bds = XAxiDma_BdRingFromHw(txRing, 1, &bd)) == 0) + c.interrupts = irqs[mm2sInterrupt].irqController->waitForInterrupt(irqs[mm2sInterrupt].num); + + hwLock.lock(); + if ((c.bds = XAxiDma_BdRingFromHw(txRing, writeCoalesce, &bd)) < writeCoalesce) { - c.interrupts = irqs[mm2sInterrupt].irqController->waitForInterrupt(irqs[mm2sInterrupt].num); - //logger->info("Got {} interrupts (id: {}) from write channel", intrNum, irqs[mm2sInterrupt].num); - c.bds = XAxiDma_BdRingFromHw(txRing, 1, &bd); + logger->warn("Send partial batch of {}/{} BDs.", c.bds, writeCoalesce); + if(errcnt-- == 0) { + hwLock.unlock(); + throw RuntimeError("too many partial batches"); + } } // Acknowledge the interrupt auto irqStatus = XAxiDma_BdRingGetIrq(txRing); XAxiDma_BdRingAckIrq(txRing, irqStatus); - if (bd == nullptr) + if (c.bds == 0) { + c.bytes = 0; + hwLock.unlock(); + return c; + } + + if (bd == nullptr) { + hwLock.unlock(); throw RuntimeError("Bd was null."); + } curBd = bd; for (size_t i = 0; i < c.bds; i++) { ret = XAxiDma_BdGetSts(curBd); if ((ret & XAXIDMA_BD_STS_ALL_ERR_MASK) || (!(ret & XAXIDMA_BD_STS_COMPLETE_MASK))) { + hwLock.unlock(); throw RuntimeError("Bd Status register shows error: {}", ret); break; } @@ -378,9 +425,12 @@ Dma::Completion Dma::writeCompleteScatterGather() } ret = XAxiDma_BdRingFree(txRing, c.bds, bd); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("Failed to free {} TX BDs {}", c.bds, ret); + } + hwLock.unlock(); return c; } @@ -394,11 +444,13 @@ Dma::Completion Dma::readCompleteScatterGather() c.interrupts = irqs[s2mmInterrupt].irqController->waitForInterrupt(irqs[s2mmInterrupt].num); + hwLock.lock(); // Wait until the data has been received by the RX channel. if ((c.bds = XAxiDma_BdRingFromHw(rxRing, readCoalesce, &bd)) < readCoalesce) { logger->warn("Got partial batch of {}/{} BDs.", c.bds, readCoalesce); if(errcnt-- == 0) { + hwLock.unlock(); throw RuntimeError("too many partial batches"); } } @@ -409,16 +461,20 @@ Dma::Completion Dma::readCompleteScatterGather() if (c.bds == 0) { c.bytes = 0; + hwLock.unlock(); return c; } - if (bd == nullptr) + if (bd == nullptr) { + hwLock.unlock(); throw RuntimeError("Bd was null."); + } curBd = bd; for (size_t i = 0; i < c.bds; i++) { ret = XAxiDma_BdGetSts(curBd); if ((ret & XAXIDMA_BD_STS_ALL_ERR_MASK) || (!(ret & XAXIDMA_BD_STS_COMPLETE_MASK))) { + hwLock.unlock(); throw RuntimeError("Bd Status register shows error: {}", ret); break; } @@ -429,14 +485,18 @@ Dma::Completion Dma::readCompleteScatterGather() // Free all processed RX BDs for future transmission. ret = XAxiDma_BdRingFree(rxRing, c.bds, bd); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("Failed to free {} TX BDs {}.", c.bds, ret); + } + hwLock.unlock(); return c; } bool Dma::writeSimple(const void *buf, size_t len) { + hwLock.lock(); XAxiDma_BdRing *ring = XAxiDma_GetTxRing(&xDma); if (not ring->HasDRE) { @@ -444,8 +504,10 @@ bool Dma::writeSimple(const void *buf, size_t len) ? XAXIDMA_MICROMODE_MIN_BUF_ALIGN : ring->DataWidth - 1; - if (reinterpret_cast(buf) & mask) + if (reinterpret_cast(buf) & mask) { + hwLock.unlock(); return false; + } } const bool dmaChannelHalted = @@ -454,8 +516,10 @@ bool Dma::writeSimple(const void *buf, size_t len) const bool dmaToDeviceBusy = XAxiDma_Busy(&xDma, XAXIDMA_DMA_TO_DEVICE); // If the engine is doing a transfer, cannot submit - if (not dmaChannelHalted and dmaToDeviceBusy) + if (not dmaChannelHalted and dmaToDeviceBusy) { + hwLock.unlock(); return false; + } // Set lower 32 bit of source address XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_SRCADDR_OFFSET, @@ -473,12 +537,13 @@ bool Dma::writeSimple(const void *buf, size_t len) // Set tail descriptor pointer XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_BUFFLEN_OFFSET, len); - + hwLock.unlock(); return true; } bool Dma::readSimple(void *buf, size_t len) { + hwLock.lock(); XAxiDma_BdRing *ring = XAxiDma_GetRxRing(&xDma); if (not ring->HasDRE) { @@ -486,16 +551,20 @@ bool Dma::readSimple(void *buf, size_t len) ? XAXIDMA_MICROMODE_MIN_BUF_ALIGN : ring->DataWidth - 1; - if (reinterpret_cast(buf) & mask) + if (reinterpret_cast(buf) & mask) { + hwLock.unlock(); return false; + } } const bool dmaChannelHalted = XAxiDma_ReadReg(ring->ChanBase, XAXIDMA_SR_OFFSET) & XAXIDMA_HALTED_MASK; const bool deviceToDmaBusy = XAxiDma_Busy(&xDma, XAXIDMA_DEVICE_TO_DMA); // If the engine is doing a transfer, cannot submit - if (not dmaChannelHalted and deviceToDmaBusy) + if (not dmaChannelHalted and deviceToDmaBusy) { + hwLock.unlock(); return false; + } // Set lower 32 bit of destination address XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_DESTADDR_OFFSET, LOWER_32_BITS(reinterpret_cast(buf))); @@ -512,6 +581,7 @@ bool Dma::readSimple(void *buf, size_t len) // Set tail descriptor pointer XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_BUFFLEN_OFFSET, len); + hwLock.unlock(); return true; } @@ -521,11 +591,12 @@ Dma::Completion Dma::writeCompleteSimple() while (!(XAxiDma_IntrGetIrq(&xDma, XAXIDMA_DMA_TO_DEVICE) & XAXIDMA_IRQ_IOC_MASK)) c.interrupts = irqs[mm2sInterrupt].irqController->waitForInterrupt(irqs[mm2sInterrupt]); + hwLock.lock(); XAxiDma_IntrAckIrq(&xDma, XAXIDMA_IRQ_IOC_MASK, XAXIDMA_DMA_TO_DEVICE); const XAxiDma_BdRing *ring = XAxiDma_GetTxRing(&xDma); const size_t bytesWritten = XAxiDma_ReadReg(ring->ChanBase, XAXIDMA_BUFFLEN_OFFSET); - + hwLock.unlock(); c.bytes = bytesWritten; return c; } @@ -536,10 +607,12 @@ Dma::Completion Dma::readCompleteSimple() while (!(XAxiDma_IntrGetIrq(&xDma, XAXIDMA_DEVICE_TO_DMA) & XAXIDMA_IRQ_IOC_MASK)) c.interrupts = irqs[s2mmInterrupt].irqController->waitForInterrupt(irqs[s2mmInterrupt]); + hwLock.lock(); XAxiDma_IntrAckIrq(&xDma, XAXIDMA_IRQ_IOC_MASK, XAXIDMA_DEVICE_TO_DMA); const XAxiDma_BdRing *ring = XAxiDma_GetRxRing(&xDma); const size_t bytesRead = XAxiDma_ReadReg(ring->ChanBase, XAXIDMA_BUFFLEN_OFFSET); + hwLock.unlock(); c.bytes = bytesRead; return c; diff --git a/fpga/src/villas-fpga-ctrl.cpp b/fpga/src/villas-fpga-ctrl.cpp index 07fcf7dc8..6e8ec20c4 100644 --- a/fpga/src/villas-fpga-ctrl.cpp +++ b/fpga/src/villas-fpga-ctrl.cpp @@ -51,7 +51,6 @@ void readFromDmaToStdOut(std::shared_ptr dma, dma->makeAccesibleFromVA(b); } - size_t cur = 0, next = 1; std::ios::sync_with_stdio(false); From 62cab0c4dcac36869d3f2596d483ce7061a06eca Mon Sep 17 00:00:00 2001 From: Niklas Eiling Date: Wed, 18 Jan 2023 15:20:57 +0100 Subject: [PATCH 11/15] clean up README.md add project description and related projects (MIOB and DINO) Signed-off-by: Niklas Eiling --- fpga/README.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fpga/README.md b/fpga/README.md index 1fb558ba8..9b774f6d6 100644 --- a/fpga/README.md +++ b/fpga/README.md @@ -2,8 +2,8 @@ [![build status](https://git.rwth-aachen.de/acs/public/villas/fpga/fpga/badges/master/pipeline.svg)](https://git.rwth-aachen.de/acs/public/villas/fpga/fpga/-/pipelines/) - -**TODO:** Write project description +VILLASfpga provides a flexbible, real-time capable interconnect between FPGAs and Linux, e.g., to connect simulators and devices for hardware-in-the loop simulations. VILLASfpga can guarantee fixed latencies in the nanosecond range. +VILLASfpga supports Xilinx FPGAs connected to a Linux system via PCI-Express or via a platform bus as found on MPSoC devices. ## Documentation @@ -19,7 +19,9 @@ User documentation is available here: - Steffen Vogel - Daniel Krebs [Institute for Automation of Complex Power Systems (ACS)](http://www.acs.eonerc.rwth-aachen.de) -[EON Energy Research Center (EONERC)](http://www.eonerc.rwth-aachen.de) [RWTH University Aachen, Germany](http://www.rwth-aachen.de) From 1e3294d14c068a4c9622987f532f8f435c4aaabd Mon Sep 17 00:00:00 2001 From: Niklas Eiling Date: Wed, 18 Jan 2023 15:21:50 +0100 Subject: [PATCH 12/15] start readFromDmaToStdOut in separate thread Signed-off-by: Niklas Eiling --- fpga/src/villas-fpga-ctrl.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fpga/src/villas-fpga-ctrl.cpp b/fpga/src/villas-fpga-ctrl.cpp index 6e8ec20c4..206d829e2 100644 --- a/fpga/src/villas-fpga-ctrl.cpp +++ b/fpga/src/villas-fpga-ctrl.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -151,12 +152,20 @@ int main(int argc, char* argv[]) const fpga::ConnectString parsedConnectString(connectStr); parsedConnectString.configCrossBar(dma, aurora_channels); + std::unique_ptr stdInThread = nullptr; if (!noDma && parsedConnectString.isDstStdout()) { auto formatter = fpga::getBufferedSampleFormatter(outputFormat, 16); - readFromDmaToStdOut(std::move(dma), std::move(formatter)); - } else if (!noDma && parsedConnectString.isSrcStdin()) { + // We copy the dma shared ptr but move the fomatter unqiue ptr as we don't need it + // in this thread anymore + stdInThread = std::make_unique(readFromDmaToStdOut, dma, std::move(formatter)); + } + if (!noDma && parsedConnectString.isSrcStdin()) { } + + if (stdInThread) { + stdInThread->join(); + } } catch (const RuntimeError &e) { logger->error("Error: {}", e.what()); return -1; From 87968ab73e0d7d59a16f9b801b032fa477f44849 Mon Sep 17 00:00:00 2001 From: Niklas Eiling Date: Mon, 30 Jan 2023 11:35:21 +0100 Subject: [PATCH 13/15] enable reading from stdin to DMA in villas-fpga-ctrl Signed-off-by: Niklas Eiling --- fpga/src/villas-fpga-ctrl.cpp | 51 ++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/fpga/src/villas-fpga-ctrl.cpp b/fpga/src/villas-fpga-ctrl.cpp index 206d829e2..def5b2e9a 100644 --- a/fpga/src/villas-fpga-ctrl.cpp +++ b/fpga/src/villas-fpga-ctrl.cpp @@ -36,6 +36,55 @@ using namespace villas; static std::shared_ptr pciDevices; static auto logger = villas::logging.get("ctrl"); +void writeToDmaFromStdIn(std::shared_ptr dma) +{ + auto &alloc = villas::HostRam::getAllocator(); + + const std::shared_ptr block[] = { + alloc.allocateBlock(0x200 * sizeof(uint32_t)), + alloc.allocateBlock(0x200 * sizeof(uint32_t)) + }; + villas::MemoryAccessor mem[] = {*block[0], *block[1]}; + + for (auto b : block) { + dma->makeAccesibleFromVA(b); + } + + size_t cur = 0, next = 1; + std::ios::sync_with_stdio(false); + std::string line; + bool firstXfer = true; + + while(true) { + // Read values from stdin + + std::getline(std::cin, line); + auto values = villas::utils::tokenize(line, ";"); + + size_t i = 0; + for (auto &value: values) { + if (value.empty()) continue; + + const float number = std::stof(value); + mem[cur][i++] = number; + } + + // Initiate write transfer + bool state = dma->write(*block[cur], i * sizeof(float)); + if (!state) + logger->error("Failed to write to device"); + + if (!firstXfer) { + auto bytesWritten = dma->writeComplete(); + logger->debug("Wrote {} bytes", bytesWritten.bytes); + } else { + firstXfer = false; + } + + cur = next; + next = (next + 1) % (sizeof(mem) / sizeof(mem[0])); + } +} void readFromDmaToStdOut(std::shared_ptr dma, std::unique_ptr formatter) @@ -160,7 +209,7 @@ int main(int argc, char* argv[]) stdInThread = std::make_unique(readFromDmaToStdOut, dma, std::move(formatter)); } if (!noDma && parsedConnectString.isSrcStdin()) { - + writeToDmaFromStdIn(dma); } if (stdInThread) { From 10ecf6c9a64447359822c086bb9d5a63ea79f84b Mon Sep 17 00:00:00 2001 From: Niklas Eiling Date: Mon, 30 Jan 2023 16:18:42 +0100 Subject: [PATCH 14/15] fix memoryBlocksMapped being defined in both Card and PcieCard Signed-off-by: Niklas Eiling --- fpga/include/villas/fpga/pcie_card.hpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fpga/include/villas/fpga/pcie_card.hpp b/fpga/include/villas/fpga/pcie_card.hpp index f7c16c805..8cc89d119 100644 --- a/fpga/include/villas/fpga/pcie_card.hpp +++ b/fpga/include/villas/fpga/pcie_card.hpp @@ -71,10 +71,6 @@ public: bool mapMemoryBlock(const MemoryBlock &block); bool unmapMemoryBlock(const MemoryBlock &block); -private: - // Cache a set of already mapped memory blocks - std::set memoryBlocksMapped; - public: // TODO: make this private bool doReset; // Reset VILLASfpga during startup? int affinity; // Affinity for MSI interrupts From c80e5c083ddf15d6ad17970f5244807d1adbb795 Mon Sep 17 00:00:00 2001 From: Niklas Eiling Date: Mon, 30 Jan 2023 16:26:11 +0100 Subject: [PATCH 15/15] remove map and umapmemoryblock from PcieCard Signed-off-by: Niklas Eiling --- fpga/lib/pcie_card.cpp | 63 +----------------------------------------- 1 file changed, 1 insertion(+), 62 deletions(-) diff --git a/fpga/lib/pcie_card.cpp b/fpga/lib/pcie_card.cpp index c75cc38f6..07b256fa3 100644 --- a/fpga/lib/pcie_card.cpp +++ b/fpga/lib/pcie_card.cpp @@ -152,7 +152,7 @@ std::list> PCIeCardFactory::make(json_t *json, std::sh PCIeCard::~PCIeCard() { - + } std::shared_ptr PCIeCard::lookupIp(const std::string &name) const @@ -188,67 +188,6 @@ std::shared_ptr PCIeCard::lookupIp(const ip::IpIdentifier &id) const return nullptr; } -bool PCIeCard::unmapMemoryBlock(const MemoryBlock &block) -{ - if (memoryBlocksMapped.find(block.getAddrSpaceId()) == memoryBlocksMapped.end()) { - throw std::runtime_error("Block " + std::to_string(block.getAddrSpaceId()) + " is not mapped but was requested to be unmapped."); - } - - auto &mm = MemoryManager::get(); - - auto translation = mm.getTranslation(addrSpaceIdDeviceToHost, block.getAddrSpaceId()); - - const uintptr_t iova = translation.getLocalAddr(0); - const size_t size = translation.getSize(); - - logger->debug("Unmap block {} at IOVA {:#x} of size {:#x}", - block.getAddrSpaceId(), iova, size); - vfioContainer->memoryUnmap(iova, size); - - memoryBlocksMapped.erase(block.getAddrSpaceId()); - - return true; -} - -bool PCIeCard::mapMemoryBlock(const MemoryBlock &block) -{ - if (not vfioContainer->isIommuEnabled()) { - logger->warn("VFIO mapping not supported without IOMMU"); - return false; - } - - auto &mm = MemoryManager::get(); - const auto &addrSpaceId = block.getAddrSpaceId(); - - if (memoryBlocksMapped.find(addrSpaceId) != memoryBlocksMapped.end()) - // Block already mapped - return true; - else - logger->debug("Create VFIO mapping for {}", addrSpaceId); - - auto translationFromProcess = mm.getTranslationFromProcess(addrSpaceId); - uintptr_t processBaseAddr = translationFromProcess.getLocalAddr(0); - uintptr_t iovaAddr = vfioContainer->memoryMap(processBaseAddr, - UINTPTR_MAX, - block.getSize()); - - if (iovaAddr == UINTPTR_MAX) { - logger->error("Cannot map memory at {:#x} of size {:#x}", - processBaseAddr, block.getSize()); - return false; - } - - mm.createMapping(iovaAddr, 0, block.getSize(), - "VFIO-D2H", - this->addrSpaceIdDeviceToHost, - addrSpaceId); - - // Remember that this block has already been mapped for later - memoryBlocksMapped.insert(addrSpaceId); - - return true; -} - bool PCIeCard::init() { logger = getLogger();