mirror of
https://git.rwth-aachen.de/acs/public/villas/node/
synced 2025-03-09 00:00:00 +01:00
Merge pull request #76 from VILLASframework/dma-ingress
Device to Host DMA
This commit is contained in:
commit
8fbf0f0669
14 changed files with 574 additions and 356 deletions
2
fpga/.vscode/launch.json
vendored
2
fpga/.vscode/launch.json
vendored
|
@ -10,7 +10,7 @@
|
|||
"request": "launch",
|
||||
"program": "${workspaceFolder}/build/src/villas-fpga-ctrl",
|
||||
"args": [
|
||||
"-c", "${workspaceFolder}/etc/fpgas.json", "--connect", "\"2<->stdout\"", "--no-dma"
|
||||
"-c", "${workspaceFolder}/etc/fpgas.json", "--connect", "\"2<->stdout\""
|
||||
],
|
||||
"stopAtEntry": false,
|
||||
"cwd": "${workspaceFolder}",
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
|
||||
[](https://git.rwth-aachen.de/acs/public/villas/fpga/fpga/-/pipelines/)
|
||||
|
||||
|
||||
**TODO:** Write project description
|
||||
VILLASfpga provides a flexbible, real-time capable interconnect between FPGAs and Linux, e.g., to connect simulators and devices for hardware-in-the loop simulations. VILLASfpga can guarantee fixed latencies in the nanosecond range.
|
||||
VILLASfpga supports Xilinx FPGAs connected to a Linux system via PCI-Express or via a platform bus as found on MPSoC devices.
|
||||
|
||||
## Documentation
|
||||
|
||||
|
@ -19,7 +19,9 @@ User documentation is available here: <https://villas.fein-aachen.org/doc/fpga.h
|
|||
|
||||
This project is released under the terms of the [Apache 2.0](LICENSE) license:
|
||||
|
||||
SPDX-FileCopyrightText: 2017 Institute for Automation of Complex Power Systems, EONERC
|
||||
SPDX-FileCopyrightText: 2022-2023 Niklas Eiling
|
||||
SPDX-FileCopyrightText: 2018-2023 Steffen Vogel
|
||||
SPDX-FileCopyrightText: 2018 Daniel Krebs
|
||||
SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
We kindly ask all academic publications employing components of VILLASframework to cite one of the following papers:
|
||||
|
@ -27,16 +29,16 @@ We kindly ask all academic publications employing components of VILLASframework
|
|||
- A. Monti et al., "[A Global Real-Time Superlab: Enabling High Penetration of Power Electronics in the Electric Grid](https://ieeexplore.ieee.org/document/8458285/)," in IEEE Power Electronics Magazine, vol. 5, no. 3, pp. 35-44, Sept. 2018.
|
||||
- S. Vogel, M. Mirz, L. Razik and A. Monti, "[An open solution for next-generation real-time power system simulation](http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8245739&isnumber=8244404)," 2017 IEEE Conference on Energy Internet and Energy System Integration (EI2), Beijing, 2017, pp. 1-6.
|
||||
|
||||
For other licensing options please consult [Prof. Antonello Monti](mailto:amonti@eonerc.rwth-aachen.de).
|
||||
## Related Projects
|
||||
|
||||
- [MIOB](https://github.com/RWTH-ACS/miob)
|
||||
- [DINO](https://github.com/RWTH-ACS/dino)
|
||||
|
||||
## Contact
|
||||
|
||||
[](http://www.acs.eonerc.rwth-aachen.de)
|
||||
|
||||
- Niklas Eiling <niklas.eiling@eonerc.rwth-aachen.de>
|
||||
- Steffen Vogel <post@steffenvogel.de>
|
||||
- Daniel Krebs <dkrebs@eonerc.rwth-aachen.de>
|
||||
|
||||
[Institute for Automation of Complex Power Systems (ACS)](http://www.acs.eonerc.rwth-aachen.de)
|
||||
[EON Energy Research Center (EONERC)](http://www.eonerc.rwth-aachen.de)
|
||||
[RWTH University Aachen, Germany](http://www.rwth-aachen.de)
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit 729b877a405b3bd80205fa1c54bfedbf2f030dc2
|
||||
Subproject commit 90e0c3df70200f0eccbb3b145393f81e31e15ebb
|
|
@ -36,17 +36,17 @@ public:
|
|||
|
||||
virtual ~Card();
|
||||
|
||||
bool mapMemoryBlock(const std::shared_ptr<MemoryBlock> block);
|
||||
bool unmapMemoryBlock(const MemoryBlock &block);
|
||||
std::shared_ptr<kernel::vfio::Container> vfioContainer;
|
||||
|
||||
std::shared_ptr<ip::Core> lookupIp(const std::string &name) const;
|
||||
std::shared_ptr<ip::Core> lookupIp(const Vlnv &vlnv) const;
|
||||
std::shared_ptr<ip::Core> lookupIp(const ip::IpIdentifier &id) const;
|
||||
|
||||
bool mapMemoryBlock(const MemoryBlock &block);
|
||||
bool unmapMemoryBlock(const MemoryBlock &block);
|
||||
std::shared_ptr<kernel::vfio::Container> vfioContainer;
|
||||
|
||||
protected:
|
||||
// Cache a set of already mapped memory blocks
|
||||
std::set<MemoryManager::AddressSpaceId> memoryBlocksMapped;
|
||||
// Keep a map of already mapped memory blocks
|
||||
std::map<MemoryManager::AddressSpaceId, std::shared_ptr<MemoryBlock>> memoryBlocksMapped;
|
||||
|
||||
Logger logger;
|
||||
};
|
||||
|
|
|
@ -38,19 +38,26 @@ public:
|
|||
// Stream to memory-mapped (S2MM)
|
||||
bool read(const MemoryBlock &mem, size_t len);
|
||||
|
||||
size_t writeComplete()
|
||||
struct Completion {
|
||||
Completion() : bytes(0), bds(0), interrupts(0) { }
|
||||
size_t bytes; // Number of bytes transferred
|
||||
size_t bds; // Number of buffer descriptors used (only for scatter-gather)
|
||||
size_t interrupts; // Number of interrupts received since last call (only if interrupts enabled)
|
||||
};
|
||||
|
||||
Completion writeComplete()
|
||||
{
|
||||
return hasScatterGather() ? writeCompleteScatterGather() : writeCompleteSimple();
|
||||
}
|
||||
|
||||
size_t readComplete()
|
||||
Completion readComplete()
|
||||
{
|
||||
return hasScatterGather() ? readCompleteScatterGather() : readCompleteSimple();
|
||||
}
|
||||
|
||||
bool memcpy(const MemoryBlock &src, const MemoryBlock &dst, size_t len);
|
||||
|
||||
void makeAccesibleFromVA(const MemoryBlock &mem);
|
||||
void makeAccesibleFromVA(std::shared_ptr<MemoryBlock> mem);
|
||||
bool makeInaccesibleFromVA(const MemoryBlock &mem);
|
||||
|
||||
inline
|
||||
|
@ -69,22 +76,6 @@ public:
|
|||
return getMasterPort(mm2sPort);
|
||||
}
|
||||
|
||||
private:
|
||||
bool writeScatterGather(const void* buf, size_t len);
|
||||
bool readScatterGather(void* buf, size_t len);
|
||||
size_t writeCompleteScatterGather();
|
||||
size_t readCompleteScatterGather();
|
||||
|
||||
bool writeSimple(const void* buf, size_t len);
|
||||
bool readSimple(void* buf, size_t len);
|
||||
size_t writeCompleteSimple();
|
||||
size_t readCompleteSimple();
|
||||
|
||||
void setupScatterGather();
|
||||
void setupScatterGatherRingRx();
|
||||
void setupScatterGatherRingTx();
|
||||
|
||||
public:
|
||||
static constexpr const char* s2mmPort = "S2MM";
|
||||
static constexpr const char* mm2sPort = "MM2S";
|
||||
|
||||
|
@ -92,8 +83,21 @@ public:
|
|||
|
||||
virtual
|
||||
void dump() override;
|
||||
|
||||
private:
|
||||
bool writeScatterGather(const void* buf, size_t len);
|
||||
bool readScatterGather(void* buf, size_t len);
|
||||
Completion writeCompleteScatterGather();
|
||||
Completion readCompleteScatterGather();
|
||||
|
||||
bool writeSimple(const void* buf, size_t len);
|
||||
bool readSimple(void* buf, size_t len);
|
||||
Completion writeCompleteSimple();
|
||||
Completion readCompleteSimple();
|
||||
|
||||
void setupScatterGather();
|
||||
void setupScatterGatherRingRx();
|
||||
void setupScatterGatherRingTx();
|
||||
|
||||
static constexpr char registerMemory[] = "Reg";
|
||||
|
||||
static constexpr char mm2sInterrupt[] = "mm2s_introut";
|
||||
|
@ -115,6 +119,7 @@ private:
|
|||
XAxiDma xDma;
|
||||
XAxiDma_Config xConfig;
|
||||
|
||||
std::mutex hwLock;
|
||||
|
||||
bool configDone = false;
|
||||
// use polling to wait for DMA completion or interrupts via efds
|
||||
|
@ -133,9 +138,10 @@ private:
|
|||
// When using SG: ringBdSize is the maximum number of BDs usable in the ring
|
||||
// Depending on alignment, the actual number of BDs usable can be smaller
|
||||
static constexpr size_t requestedRingBdSize = 2048;
|
||||
uint32_t actualRingBdSize = XAxiDma_BdRingCntCalc(XAXIDMA_BD_MINIMUM_ALIGNMENT, requestedRingBdSize);
|
||||
MemoryBlock::UniquePtr sgRingTx;
|
||||
MemoryBlock::UniquePtr sgRingRx;
|
||||
static constexpr size_t requestedRingBdSizeMemory = requestedRingBdSize * sizeof(XAxiDma_Bd);
|
||||
uint32_t actualRingBdSize = XAxiDma_BdRingCntCalc(XAXIDMA_BD_MINIMUM_ALIGNMENT, requestedRingBdSizeMemory);
|
||||
std::shared_ptr<MemoryBlock> sgRingTx;
|
||||
std::shared_ptr<MemoryBlock> sgRingRx;
|
||||
};
|
||||
|
||||
class DmaFactory : NodeFactory {
|
||||
|
|
|
@ -71,10 +71,6 @@ public:
|
|||
bool mapMemoryBlock(const MemoryBlock &block);
|
||||
bool unmapMemoryBlock(const MemoryBlock &block);
|
||||
|
||||
private:
|
||||
// Cache a set of already mapped memory blocks
|
||||
std::set<MemoryManager::AddressSpaceId> memoryBlocksMapped;
|
||||
|
||||
public: // TODO: make this private
|
||||
bool doReset; // Reset VILLASfpga during startup?
|
||||
int affinity; // Affinity for MSI interrupts
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
/** Helper function for directly using VILLASfpga outside of VILLASnode
|
||||
*
|
||||
* Author: Niklas Eiling <niklas.eiling@eonerc.rwth-aachen.de>
|
||||
* SPDX-FileCopyrightText: 2022 Steffen Vogel <post@steffenvogel.de>
|
||||
* Author: Niklas Eiling <niklas.eiling@rwth-aachen.de>
|
||||
* SPDX-FileCopyrightText: 2022 Niklas Eiling <niklas.eiling@eonerc.rwth-aachen.de>
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*********************************************************************************/
|
||||
|
@ -17,11 +15,120 @@ namespace fpga {
|
|||
std::shared_ptr<fpga::PCIeCard>
|
||||
setupFpgaCard(const std::string &configFile, const std::string &fpgaName);
|
||||
|
||||
void configCrossBarUsingConnectString(std::string connectString,
|
||||
std::shared_ptr<villas::fpga::ip::Dma> dma,
|
||||
std::vector<std::shared_ptr<fpga::ip::AuroraXilinx>>& aurora_channels);
|
||||
|
||||
void setupColorHandling();
|
||||
|
||||
class ConnectString {
|
||||
public:
|
||||
ConnectString(std::string& connectString, int maxPortNum = 7);
|
||||
|
||||
void parseString(std::string& connectString);
|
||||
int portStringToInt(std::string &str) const;
|
||||
|
||||
void configCrossBar(std::shared_ptr<villas::fpga::ip::Dma> dma,
|
||||
std::vector<std::shared_ptr<fpga::ip::AuroraXilinx>>& aurora_channels) const;
|
||||
|
||||
bool isBidirectional() const { return bidirectional; };
|
||||
bool isDmaLoopback() const { return dmaLoopback; };
|
||||
bool isSrcStdin() const { return srcIsStdin; };
|
||||
bool isDstStdout() const { return dstIsStdout; };
|
||||
int getSrcAsInt() const { return srcAsInt; };
|
||||
int getDstAsInt() const { return dstAsInt; };
|
||||
protected:
|
||||
villas::Logger log;
|
||||
int maxPortNum;
|
||||
bool bidirectional;
|
||||
bool invert;
|
||||
int srcAsInt;
|
||||
int dstAsInt;
|
||||
bool srcIsStdin;
|
||||
bool dstIsStdout;
|
||||
bool dmaLoopback;
|
||||
};
|
||||
|
||||
class BufferedSampleFormatter {
|
||||
public:
|
||||
virtual void format(float value) = 0;
|
||||
virtual void output(std::ostream& out)
|
||||
{
|
||||
out << buf.data() << std::flush;
|
||||
clearBuf();
|
||||
}
|
||||
virtual void clearBuf()
|
||||
{
|
||||
for (size_t i = 0; i < bufSamples && buf[i*bufSampleSize] != '\0'; i++) {
|
||||
buf[i*bufSampleSize] = '\0';
|
||||
}
|
||||
currentBufLoc = 0;
|
||||
}
|
||||
protected:
|
||||
std::vector<char> buf;
|
||||
const size_t bufSamples;
|
||||
const size_t bufSampleSize;
|
||||
size_t currentBufLoc;
|
||||
|
||||
BufferedSampleFormatter(const size_t bufSamples, const size_t bufSampleSize) :
|
||||
buf(bufSamples*bufSampleSize+1), // Leave room for a final `\0'
|
||||
bufSamples(bufSamples),
|
||||
bufSampleSize(bufSampleSize),
|
||||
currentBufLoc(0) {};
|
||||
BufferedSampleFormatter() = delete;
|
||||
BufferedSampleFormatter(const BufferedSampleFormatter&) = delete;
|
||||
virtual char* nextBufPos()
|
||||
{
|
||||
return &buf[(currentBufLoc++)*bufSampleSize];
|
||||
}
|
||||
};
|
||||
|
||||
class BufferedSampleFormatterShort : public BufferedSampleFormatter {
|
||||
public:
|
||||
BufferedSampleFormatterShort(size_t bufSizeInSamples) :
|
||||
BufferedSampleFormatter(bufSizeInSamples, formatStringSize) {};
|
||||
|
||||
virtual void format(float value) override
|
||||
{
|
||||
if (std::snprintf(nextBufPos(), formatStringSize+1, formatString, value) > (int)formatStringSize) {
|
||||
throw RuntimeError("Output buffer too small");
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
static constexpr char formatString[] = "%7f\n";
|
||||
static constexpr size_t formatStringSize = 9;
|
||||
};
|
||||
|
||||
class BufferedSampleFormatterLong : public BufferedSampleFormatter {
|
||||
public:
|
||||
BufferedSampleFormatterLong(size_t bufSizeInSamples) :
|
||||
BufferedSampleFormatter(bufSizeInSamples, formatStringSize),
|
||||
sampleCnt(0) {};
|
||||
|
||||
virtual void format(float value) override
|
||||
{
|
||||
if (std::snprintf(nextBufPos(), formatStringSize+1, formatString, sampleCnt, value) > (int)formatStringSize) {
|
||||
throw RuntimeError("Output buffer too small");
|
||||
}
|
||||
sampleCnt = (sampleCnt+1) % 100000;
|
||||
}
|
||||
|
||||
protected:
|
||||
static constexpr char formatString[] = "%05zd: %7f\n";
|
||||
static constexpr size_t formatStringSize = 16;
|
||||
size_t sampleCnt;
|
||||
};
|
||||
|
||||
|
||||
std::unique_ptr<BufferedSampleFormatter> getBufferedSampleFormatter(
|
||||
const std::string &format,
|
||||
size_t bufSizeInSamples)
|
||||
{
|
||||
if (format == "long") {
|
||||
return std::make_unique<BufferedSampleFormatterLong>(bufSizeInSamples);
|
||||
} else if (format == "short") {
|
||||
return std::make_unique<BufferedSampleFormatterShort>(bufSizeInSamples);
|
||||
} else {
|
||||
throw RuntimeError("Unknown output format '{}'", format);
|
||||
}
|
||||
}
|
||||
|
||||
} /* namespace fpga */
|
||||
} /* namespace villas */
|
||||
|
|
|
@ -22,13 +22,13 @@ Card::~Card()
|
|||
|
||||
// Unmap all memory blocks
|
||||
for (auto &mappedMemoryBlock : memoryBlocksMapped) {
|
||||
auto translation = mm.getTranslation(addrSpaceIdDeviceToHost, mappedMemoryBlock);
|
||||
auto translation = mm.getTranslation(addrSpaceIdDeviceToHost, mappedMemoryBlock.first);
|
||||
|
||||
const uintptr_t iova = translation.getLocalAddr(0);
|
||||
const size_t size = translation.getSize();
|
||||
|
||||
logger->debug("Unmap block {} at IOVA {:#x} of size {:#x}",
|
||||
mappedMemoryBlock, iova, size);
|
||||
mappedMemoryBlock.first, iova, size);
|
||||
vfioContainer->memoryUnmap(iova, size);
|
||||
}
|
||||
}
|
||||
|
@ -57,82 +57,73 @@ std::shared_ptr<ip::Core> Card::lookupIp(const Vlnv &vlnv) const
|
|||
|
||||
std::shared_ptr<ip::Core> Card::lookupIp(const ip::IpIdentifier &id) const
|
||||
{
|
||||
for(auto &ip : ips) {
|
||||
if(*ip == id) {
|
||||
return ip;
|
||||
}
|
||||
}
|
||||
for (auto &ip : ips) {
|
||||
if (*ip == id) {
|
||||
return ip;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool Card::unmapMemoryBlock(const MemoryBlock &block)
|
||||
bool Card::unmapMemoryBlock(const MemoryBlock& block)
|
||||
{
|
||||
if(memoryBlocksMapped.find(block.getAddrSpaceId())
|
||||
== memoryBlocksMapped.end()) {
|
||||
throw std::runtime_error(
|
||||
"Block " + std::to_string(block.getAddrSpaceId())
|
||||
+ " is not mapped but was requested to be unmapped.");
|
||||
}
|
||||
if (memoryBlocksMapped.find(block.getAddrSpaceId()) == memoryBlocksMapped.end()) {
|
||||
throw std::runtime_error("Block " + std::to_string(block.getAddrSpaceId()) + " is not mapped but was requested to be unmapped.");
|
||||
}
|
||||
|
||||
auto &mm = MemoryManager::get();
|
||||
auto &mm = MemoryManager::get();
|
||||
|
||||
auto translation = mm.getTranslation(addrSpaceIdDeviceToHost,
|
||||
block.getAddrSpaceId());
|
||||
auto translation = mm.getTranslation(addrSpaceIdDeviceToHost, block.getAddrSpaceId());
|
||||
|
||||
const uintptr_t iova = translation.getLocalAddr(0);
|
||||
const size_t size = translation.getSize();
|
||||
const uintptr_t iova = translation.getLocalAddr(0);
|
||||
const size_t size = translation.getSize();
|
||||
|
||||
logger->debug("Unmap block {} at IOVA {:#x} of size {:#x}",
|
||||
block.getAddrSpaceId(),
|
||||
iova,
|
||||
size);
|
||||
vfioContainer->memoryUnmap(iova, size);
|
||||
logger->debug("Unmap block {} at IOVA {:#x} of size {:#x}",
|
||||
block.getAddrSpaceId(), iova, size);
|
||||
vfioContainer->memoryUnmap(iova, size);
|
||||
|
||||
memoryBlocksMapped.erase(block.getAddrSpaceId());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Card::mapMemoryBlock(const MemoryBlock &block)
|
||||
{
|
||||
if(not vfioContainer->isIommuEnabled()) {
|
||||
logger->warn("VFIO mapping not supported without IOMMU");
|
||||
return false;
|
||||
}
|
||||
|
||||
auto &mm = MemoryManager::get();
|
||||
const auto &addrSpaceId = block.getAddrSpaceId();
|
||||
|
||||
if(memoryBlocksMapped.find(addrSpaceId) != memoryBlocksMapped.end())
|
||||
// Block already mapped
|
||||
return true;
|
||||
else
|
||||
logger->debug("Create VFIO mapping for {}", addrSpaceId);
|
||||
|
||||
auto translationFromProcess
|
||||
= mm.getTranslationFromProcess(addrSpaceId);
|
||||
uintptr_t processBaseAddr = translationFromProcess.getLocalAddr(0);
|
||||
uintptr_t iovaAddr = vfioContainer->memoryMap(processBaseAddr,
|
||||
UINTPTR_MAX,
|
||||
block.getSize());
|
||||
|
||||
if(iovaAddr == UINTPTR_MAX) {
|
||||
logger->error("Cannot map memory at {:#x} of size {:#x}",
|
||||
processBaseAddr,
|
||||
block.getSize());
|
||||
return false;
|
||||
}
|
||||
|
||||
mm.createMapping(iovaAddr,
|
||||
0,
|
||||
block.getSize(),
|
||||
"VFIO-D2H",
|
||||
this->addrSpaceIdDeviceToHost,
|
||||
addrSpaceId);
|
||||
|
||||
// Remember that this block has already been mapped for later
|
||||
memoryBlocksMapped.insert(addrSpaceId);
|
||||
memoryBlocksMapped.erase(block.getAddrSpaceId());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool Card::mapMemoryBlock(const std::shared_ptr<MemoryBlock> block)
|
||||
{
|
||||
if (not vfioContainer->isIommuEnabled()) {
|
||||
logger->warn("VFIO mapping not supported without IOMMU");
|
||||
return false;
|
||||
}
|
||||
|
||||
auto &mm = MemoryManager::get();
|
||||
const auto &addrSpaceId = block->getAddrSpaceId();
|
||||
|
||||
if (memoryBlocksMapped.find(addrSpaceId) != memoryBlocksMapped.end())
|
||||
// Block already mapped
|
||||
return true;
|
||||
else
|
||||
logger->debug("Create VFIO mapping for {}", addrSpaceId);
|
||||
|
||||
auto translationFromProcess = mm.getTranslationFromProcess(addrSpaceId);
|
||||
uintptr_t processBaseAddr = translationFromProcess.getLocalAddr(0);
|
||||
uintptr_t iovaAddr = vfioContainer->memoryMap(processBaseAddr,
|
||||
UINTPTR_MAX,
|
||||
block->getSize());
|
||||
|
||||
if (iovaAddr == UINTPTR_MAX) {
|
||||
logger->error("Cannot map memory at {:#x} of size {:#x}",
|
||||
processBaseAddr, block->getSize());
|
||||
return false;
|
||||
}
|
||||
|
||||
mm.createMapping(iovaAddr, 0, block->getSize(),
|
||||
"VFIO-D2H",
|
||||
this->addrSpaceIdDeviceToHost,
|
||||
addrSpaceId);
|
||||
|
||||
// Remember that this block has already been mapped for later
|
||||
memoryBlocksMapped.insert({addrSpaceId, block});
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@ bool Dma::init()
|
|||
|
||||
xConfig.BaseAddr = getBaseAddr(registerMemory);
|
||||
|
||||
hwLock.lock();
|
||||
if (XAxiDma_CfgInitialize(&xDma, &xConfig) != XST_SUCCESS) {
|
||||
logger->error("Cannot initialize Xilinx DMA driver");
|
||||
return false;
|
||||
|
@ -45,6 +46,7 @@ bool Dma::init()
|
|||
else
|
||||
logger->debug("DMA selftest passed");
|
||||
|
||||
hwLock.unlock();
|
||||
// Map buffer descriptors
|
||||
if (hasScatterGather()) {
|
||||
if (actualRingBdSize < 2*readCoalesce || actualRingBdSize < 2*writeCoalesce) {
|
||||
|
@ -54,8 +56,6 @@ bool Dma::init()
|
|||
setupScatterGather();
|
||||
}
|
||||
|
||||
|
||||
|
||||
irqs[mm2sInterrupt].irqController->enableInterrupt(irqs[mm2sInterrupt], polling);
|
||||
irqs[s2mmInterrupt].irqController->enableInterrupt(irqs[s2mmInterrupt], polling);
|
||||
|
||||
|
@ -72,6 +72,7 @@ void Dma::setupScatterGatherRingRx()
|
|||
{
|
||||
int ret;
|
||||
|
||||
hwLock.lock();
|
||||
auto *rxRingPtr = XAxiDma_GetRxRing(&xDma);
|
||||
|
||||
// Disable all RX interrupts before RxBD space setup
|
||||
|
@ -82,9 +83,9 @@ void Dma::setupScatterGatherRingRx()
|
|||
|
||||
// Allocate and map space for BD ring in host RAM
|
||||
auto &alloc = villas::HostRam::getAllocator();
|
||||
sgRingRx = alloc.allocateBlock(requestedRingBdSize * sizeof(uint16_t) * XAXIDMA_BD_NUM_WORDS);
|
||||
sgRingRx = alloc.allocateBlock(requestedRingBdSizeMemory);
|
||||
|
||||
if (not card->mapMemoryBlock(*sgRingRx))
|
||||
if (not card->mapMemoryBlock(sgRingRx))
|
||||
throw RuntimeError("Memory not accessible by DMA");
|
||||
|
||||
auto &mm = MemoryManager::get();
|
||||
|
@ -111,12 +112,15 @@ void Dma::setupScatterGatherRingRx()
|
|||
ret = XAxiDma_BdRingStart(rxRingPtr);
|
||||
if (ret != XST_SUCCESS)
|
||||
throw RuntimeError("Failed to start TX ring: {}", ret);
|
||||
|
||||
hwLock.unlock();
|
||||
}
|
||||
|
||||
void Dma::setupScatterGatherRingTx()
|
||||
{
|
||||
int ret;
|
||||
|
||||
hwLock.lock();
|
||||
auto *txRingPtr = XAxiDma_GetTxRing(&xDma);
|
||||
|
||||
// Disable all TX interrupts before TxBD space setup
|
||||
|
@ -127,9 +131,9 @@ void Dma::setupScatterGatherRingTx()
|
|||
|
||||
// Allocate and map space for BD ring in host RAM
|
||||
auto &alloc = villas::HostRam::getAllocator();
|
||||
sgRingTx = alloc.allocateBlock(requestedRingBdSize * sizeof(uint16_t) * XAXIDMA_BD_NUM_WORDS);
|
||||
sgRingTx = alloc.allocateBlock(requestedRingBdSizeMemory);
|
||||
|
||||
if (not card->mapMemoryBlock(*sgRingTx))
|
||||
if (not card->mapMemoryBlock(sgRingTx))
|
||||
throw RuntimeError("Memory not accessible by DMA");
|
||||
|
||||
auto &mm = MemoryManager::get();
|
||||
|
@ -156,6 +160,7 @@ void Dma::setupScatterGatherRingTx()
|
|||
ret = XAxiDma_BdRingStart(txRingPtr);
|
||||
if (ret != XST_SUCCESS)
|
||||
throw RuntimeError("Failed to start TX ring: {}", ret);
|
||||
hwLock.unlock();
|
||||
}
|
||||
|
||||
bool Dma::reset()
|
||||
|
@ -164,8 +169,10 @@ bool Dma::reset()
|
|||
return true;
|
||||
}
|
||||
|
||||
hwLock.lock();
|
||||
XAxiDma_IntrDisable(&xDma, XAXIDMA_IRQ_ALL_MASK, XAXIDMA_DMA_TO_DEVICE);
|
||||
XAxiDma_IntrDisable(&xDma, XAXIDMA_IRQ_ALL_MASK, XAXIDMA_DEVICE_TO_DMA);
|
||||
|
||||
XAxiDma_Reset(&xDma);
|
||||
|
||||
|
||||
|
@ -180,6 +187,7 @@ bool Dma::reset()
|
|||
|
||||
timeout--;
|
||||
}
|
||||
hwLock.unlock();
|
||||
logger->error("DMA reset timed out");
|
||||
|
||||
return false;
|
||||
|
@ -210,10 +218,10 @@ bool Dma::memcpy(const MemoryBlock &src, const MemoryBlock &dst, size_t len)
|
|||
if (this->write(src, len) == 0)
|
||||
return false;
|
||||
|
||||
if (not this->writeComplete())
|
||||
if (not this->writeComplete().bds)
|
||||
return false;
|
||||
|
||||
if (not this->readComplete())
|
||||
if (not this->readComplete().bds)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
@ -256,8 +264,6 @@ bool Dma::read(const MemoryBlock &mem, size_t len)
|
|||
if (buf == nullptr)
|
||||
throw RuntimeError("Buffer was null");
|
||||
|
||||
logger->debug("Read from stream and write to address {:p}", buf);
|
||||
|
||||
return hasScatterGather() ? readScatterGather(buf, len) : readSimple(buf, len);
|
||||
}
|
||||
|
||||
|
@ -267,23 +273,31 @@ bool Dma::writeScatterGather(const void *buf, size_t len)
|
|||
// buf is address from view of DMA controller
|
||||
|
||||
int ret = XST_FAILURE;
|
||||
|
||||
hwLock.lock();
|
||||
auto *txRing = XAxiDma_GetTxRing(&xDma);
|
||||
if (txRing == nullptr)
|
||||
if (txRing == nullptr) {
|
||||
hwLock.unlock();
|
||||
throw RuntimeError("TxRing was null.");
|
||||
}
|
||||
|
||||
XAxiDma_Bd *bd;
|
||||
ret = XAxiDma_BdRingAlloc(txRing, 1, &bd);
|
||||
if (ret != XST_SUCCESS)
|
||||
if (ret != XST_SUCCESS) {
|
||||
hwLock.unlock();
|
||||
throw RuntimeError("BdRingAlloc returned {}.", ret);
|
||||
}
|
||||
|
||||
ret = XAxiDma_BdSetBufAddr(bd, (uintptr_t) buf);
|
||||
if (ret != XST_SUCCESS)
|
||||
if (ret != XST_SUCCESS) {
|
||||
hwLock.unlock();
|
||||
throw RuntimeError("Setting BdBufAddr to {} returned {}.", buf, ret);
|
||||
}
|
||||
|
||||
ret = XAxiDma_BdSetLength(bd, len, txRing->MaxTransferLen);
|
||||
if (ret != XST_SUCCESS)
|
||||
if (ret != XST_SUCCESS) {
|
||||
hwLock.unlock();
|
||||
throw RuntimeError("Setting BdBufLength to {} returned {}.", len, ret);
|
||||
}
|
||||
|
||||
// We have a single descriptor so it is both start and end of the list
|
||||
XAxiDma_BdSetCtrl(bd, XAXIDMA_BD_CTRL_TXEOF_MASK | XAXIDMA_BD_CTRL_TXSOF_MASK);
|
||||
|
@ -294,8 +308,12 @@ bool Dma::writeScatterGather(const void *buf, size_t len)
|
|||
// Give control of BD to HW. We should not access it until transfer is finished.
|
||||
// Failure could also indicate that EOF is not set on last Bd
|
||||
ret = XAxiDma_BdRingToHw(txRing, 1, bd);
|
||||
if (ret != XST_SUCCESS)
|
||||
if (ret != XST_SUCCESS) {
|
||||
hwLock.unlock();
|
||||
throw RuntimeError("Enqueuing Bd and giving control to HW failed {}", ret);
|
||||
}
|
||||
|
||||
hwLock.unlock();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -307,25 +325,34 @@ bool Dma::readScatterGather(void *buf, size_t len)
|
|||
if (len < readCoalesce*readMsgSize)
|
||||
throw RuntimeError("Read size is smaller than readCoalesce*msgSize. Cannot setup BDs.");
|
||||
|
||||
hwLock.lock();
|
||||
auto *rxRing = XAxiDma_GetRxRing(&xDma);
|
||||
if (rxRing == nullptr)
|
||||
if (rxRing == nullptr) {
|
||||
hwLock.unlock();
|
||||
throw RuntimeError("RxRing was null.");
|
||||
}
|
||||
|
||||
XAxiDma_Bd *bd;
|
||||
ret = XAxiDma_BdRingAlloc(rxRing, readCoalesce, &bd);
|
||||
if (ret != XST_SUCCESS)
|
||||
if (ret != XST_SUCCESS) {
|
||||
hwLock.unlock();
|
||||
throw RuntimeError("Failed to alloc BD in RX ring: {}", ret);
|
||||
}
|
||||
|
||||
auto curBd = bd;
|
||||
char* curBuf = (char*)buf;
|
||||
for (size_t i = 0; i < readCoalesce; i++) {
|
||||
ret = XAxiDma_BdSetBufAddr(curBd, (uintptr_t) curBuf);
|
||||
if (ret != XST_SUCCESS)
|
||||
if (ret != XST_SUCCESS) {
|
||||
hwLock.unlock();
|
||||
throw RuntimeError("Failed to set buffer address {:x} on BD {:x}: {}", (uintptr_t) buf, (uintptr_t) bd, ret);
|
||||
}
|
||||
|
||||
ret = XAxiDma_BdSetLength(curBd, readMsgSize, rxRing->MaxTransferLen);
|
||||
if (ret != XST_SUCCESS)
|
||||
if (ret != XST_SUCCESS) {
|
||||
hwLock.unlock();
|
||||
throw RuntimeError("Rx set length {} on BD {:x} failed {}", len, (uintptr_t) bd, ret);
|
||||
}
|
||||
|
||||
|
||||
// Receive BDs do not need to set anything for the control
|
||||
|
@ -340,124 +367,136 @@ bool Dma::readScatterGather(void *buf, size_t len)
|
|||
}
|
||||
|
||||
ret = XAxiDma_BdRingToHw(rxRing, readCoalesce, bd);
|
||||
if (ret != XST_SUCCESS)
|
||||
if (ret != XST_SUCCESS) {
|
||||
hwLock.unlock();
|
||||
throw RuntimeError("Failed to submit BD to RX ring: {}", ret);
|
||||
}
|
||||
|
||||
hwLock.unlock();
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t
|
||||
Dma::writeCompleteScatterGather()
|
||||
Dma::Completion Dma::writeCompleteScatterGather()
|
||||
{
|
||||
Completion c;
|
||||
XAxiDma_Bd *bd = nullptr, *curBd;
|
||||
size_t processedBds = 0;
|
||||
auto txRing = XAxiDma_GetTxRing(&xDma);
|
||||
int ret = XST_FAILURE;
|
||||
size_t bytesWritten = 0;
|
||||
static size_t errcnt = 32;
|
||||
|
||||
if ((processedBds = XAxiDma_BdRingFromHw(txRing, 1, &bd)) == 0)
|
||||
c.interrupts = irqs[mm2sInterrupt].irqController->waitForInterrupt(irqs[mm2sInterrupt].num);
|
||||
|
||||
hwLock.lock();
|
||||
if ((c.bds = XAxiDma_BdRingFromHw(txRing, writeCoalesce, &bd)) < writeCoalesce)
|
||||
{
|
||||
/*auto intrNum = */irqs[mm2sInterrupt].irqController->waitForInterrupt(irqs[mm2sInterrupt].num);
|
||||
//logger->info("Got {} interrupts (id: {}) from write channel", intrNum, irqs[mm2sInterrupt].num);
|
||||
processedBds = XAxiDma_BdRingFromHw(txRing, 1, &bd);
|
||||
logger->warn("Send partial batch of {}/{} BDs.", c.bds, writeCoalesce);
|
||||
if(errcnt-- == 0) {
|
||||
hwLock.unlock();
|
||||
throw RuntimeError("too many partial batches");
|
||||
}
|
||||
}
|
||||
|
||||
// Acknowledge the interrupt
|
||||
auto irqStatus = XAxiDma_BdRingGetIrq(txRing);
|
||||
XAxiDma_BdRingAckIrq(txRing, irqStatus);
|
||||
|
||||
if (bd == nullptr)
|
||||
if (c.bds == 0) {
|
||||
c.bytes = 0;
|
||||
hwLock.unlock();
|
||||
return c;
|
||||
}
|
||||
|
||||
if (bd == nullptr) {
|
||||
hwLock.unlock();
|
||||
throw RuntimeError("Bd was null.");
|
||||
}
|
||||
|
||||
curBd = bd;
|
||||
for (size_t i = 0; i < processedBds; i++) {
|
||||
for (size_t i = 0; i < c.bds; i++) {
|
||||
ret = XAxiDma_BdGetSts(curBd);
|
||||
if ((ret & XAXIDMA_BD_STS_ALL_ERR_MASK) || (!(ret & XAXIDMA_BD_STS_COMPLETE_MASK))) {
|
||||
hwLock.unlock();
|
||||
throw RuntimeError("Bd Status register shows error: {}", ret);
|
||||
break;
|
||||
}
|
||||
|
||||
bytesWritten += XAxiDma_BdGetLength(bd, txRing->MaxTransferLen);
|
||||
c.bytes += XAxiDma_BdGetLength(bd, txRing->MaxTransferLen);
|
||||
curBd = (XAxiDma_Bd *) XAxiDma_BdRingNext(txRing, curBd);
|
||||
}
|
||||
|
||||
ret = XAxiDma_BdRingFree(txRing, processedBds, bd);
|
||||
if (ret != XST_SUCCESS)
|
||||
throw RuntimeError("Failed to free {} TX BDs {}", processedBds, ret);
|
||||
ret = XAxiDma_BdRingFree(txRing, c.bds, bd);
|
||||
if (ret != XST_SUCCESS) {
|
||||
hwLock.unlock();
|
||||
throw RuntimeError("Failed to free {} TX BDs {}", c.bds, ret);
|
||||
}
|
||||
|
||||
return bytesWritten;
|
||||
hwLock.unlock();
|
||||
return c;
|
||||
}
|
||||
|
||||
size_t
|
||||
Dma::readCompleteScatterGather()
|
||||
Dma::Completion Dma::readCompleteScatterGather()
|
||||
{
|
||||
Completion c;
|
||||
XAxiDma_Bd *bd = nullptr, *curBd;
|
||||
size_t processedBds = 0;
|
||||
auto rxRing = XAxiDma_GetRxRing(&xDma);
|
||||
int ret = XST_FAILURE;
|
||||
size_t bytesRead = 0;
|
||||
static size_t errcnt = 32;
|
||||
|
||||
c.interrupts = irqs[s2mmInterrupt].irqController->waitForInterrupt(irqs[s2mmInterrupt].num);
|
||||
|
||||
hwLock.lock();
|
||||
// Wait until the data has been received by the RX channel.
|
||||
if ((processedBds = XAxiDma_BdRingFromHw(rxRing, readCoalesce, &bd)) < readCoalesce)
|
||||
if ((c.bds = XAxiDma_BdRingFromHw(rxRing, readCoalesce, &bd)) < readCoalesce)
|
||||
{
|
||||
if (processedBds != 0) {
|
||||
//Ignore partial batches
|
||||
logger->warn("Ignoring partial batch of {} BDs.", processedBds);
|
||||
ret = XAxiDma_BdRingFree(rxRing, processedBds, bd);
|
||||
if (ret != XST_SUCCESS)
|
||||
throw RuntimeError("Failed to free {} RX BDs {}", processedBds, ret);
|
||||
logger->warn("Got partial batch of {}/{} BDs.", c.bds, readCoalesce);
|
||||
if(errcnt-- == 0) {
|
||||
hwLock.unlock();
|
||||
throw RuntimeError("too many partial batches");
|
||||
}
|
||||
//auto intrNum =
|
||||
irqs[s2mmInterrupt].irqController->waitForInterrupt(irqs[s2mmInterrupt].num);
|
||||
//If we got a partial batch on the first call, we have to receive up to readCoalesce*2
|
||||
//to make sure we get a full batch of readCoalesce messages
|
||||
processedBds = XAxiDma_BdRingFromHw(rxRing, readCoalesce*2, &bd);
|
||||
}
|
||||
if(processedBds < readCoalesce) {
|
||||
// We got less than we expected. We already tried two times so let's give up.
|
||||
throw RuntimeError("Read only {} BDs, expected {}.", processedBds, readCoalesce);
|
||||
} else if(processedBds > readCoalesce) {
|
||||
// If the first try was a partial batch, we receive two batches on the second try
|
||||
// We ignore the first batch and only process the second one
|
||||
while (processedBds > readCoalesce) {
|
||||
bd = (XAxiDma_Bd *) XAxiDma_BdRingNext(rxRing, bd);
|
||||
processedBds--;
|
||||
}
|
||||
ret = XAxiDma_BdRingFree(rxRing, processedBds-readCoalesce, bd);
|
||||
if (ret != XST_SUCCESS)
|
||||
throw RuntimeError("Failed to free {} RX BDs {}", processedBds, ret);
|
||||
}
|
||||
// At this point we have exactly readCoalesce BDs.
|
||||
|
||||
// Acknowledge the interrupt. Has no effect if no interrupt has occured.
|
||||
auto irqStatus = XAxiDma_BdRingGetIrq(rxRing);
|
||||
XAxiDma_BdRingAckIrq(rxRing, irqStatus);
|
||||
|
||||
if (bd == nullptr)
|
||||
if (c.bds == 0) {
|
||||
c.bytes = 0;
|
||||
hwLock.unlock();
|
||||
return c;
|
||||
}
|
||||
|
||||
if (bd == nullptr) {
|
||||
hwLock.unlock();
|
||||
throw RuntimeError("Bd was null.");
|
||||
}
|
||||
|
||||
curBd = bd;
|
||||
for (size_t i = 0; i < processedBds; i++) {
|
||||
for (size_t i = 0; i < c.bds; i++) {
|
||||
ret = XAxiDma_BdGetSts(curBd);
|
||||
if ((ret & XAXIDMA_BD_STS_ALL_ERR_MASK) || (!(ret & XAXIDMA_BD_STS_COMPLETE_MASK))) {
|
||||
hwLock.unlock();
|
||||
throw RuntimeError("Bd Status register shows error: {}", ret);
|
||||
break;
|
||||
}
|
||||
|
||||
bytesRead += XAxiDma_BdGetActualLength(bd, rxRing->MaxTransferLen);
|
||||
c.bytes += XAxiDma_BdGetActualLength(bd, rxRing->MaxTransferLen);
|
||||
curBd = (XAxiDma_Bd *) XAxiDma_BdRingNext(rxRing, curBd);
|
||||
}
|
||||
|
||||
// Free all processed RX BDs for future transmission.
|
||||
ret = XAxiDma_BdRingFree(rxRing, processedBds, bd);
|
||||
if (ret != XST_SUCCESS)
|
||||
throw RuntimeError("Failed to free {} TX BDs {}.", processedBds, ret);
|
||||
ret = XAxiDma_BdRingFree(rxRing, c.bds, bd);
|
||||
if (ret != XST_SUCCESS) {
|
||||
hwLock.unlock();
|
||||
throw RuntimeError("Failed to free {} TX BDs {}.", c.bds, ret);
|
||||
}
|
||||
|
||||
return bytesRead;
|
||||
hwLock.unlock();
|
||||
return c;
|
||||
}
|
||||
|
||||
bool Dma::writeSimple(const void *buf, size_t len)
|
||||
{
|
||||
hwLock.lock();
|
||||
XAxiDma_BdRing *ring = XAxiDma_GetTxRing(&xDma);
|
||||
|
||||
if (not ring->HasDRE) {
|
||||
|
@ -465,8 +504,10 @@ bool Dma::writeSimple(const void *buf, size_t len)
|
|||
? XAXIDMA_MICROMODE_MIN_BUF_ALIGN
|
||||
: ring->DataWidth - 1;
|
||||
|
||||
if (reinterpret_cast<uintptr_t>(buf) & mask)
|
||||
if (reinterpret_cast<uintptr_t>(buf) & mask) {
|
||||
hwLock.unlock();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const bool dmaChannelHalted =
|
||||
|
@ -475,8 +516,10 @@ bool Dma::writeSimple(const void *buf, size_t len)
|
|||
const bool dmaToDeviceBusy = XAxiDma_Busy(&xDma, XAXIDMA_DMA_TO_DEVICE);
|
||||
|
||||
// If the engine is doing a transfer, cannot submit
|
||||
if (not dmaChannelHalted and dmaToDeviceBusy)
|
||||
if (not dmaChannelHalted and dmaToDeviceBusy) {
|
||||
hwLock.unlock();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set lower 32 bit of source address
|
||||
XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_SRCADDR_OFFSET,
|
||||
|
@ -494,12 +537,13 @@ bool Dma::writeSimple(const void *buf, size_t len)
|
|||
|
||||
// Set tail descriptor pointer
|
||||
XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_BUFFLEN_OFFSET, len);
|
||||
|
||||
hwLock.unlock();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Dma::readSimple(void *buf, size_t len)
|
||||
{
|
||||
hwLock.lock();
|
||||
XAxiDma_BdRing *ring = XAxiDma_GetRxRing(&xDma);
|
||||
|
||||
if (not ring->HasDRE) {
|
||||
|
@ -507,16 +551,20 @@ bool Dma::readSimple(void *buf, size_t len)
|
|||
? XAXIDMA_MICROMODE_MIN_BUF_ALIGN
|
||||
: ring->DataWidth - 1;
|
||||
|
||||
if (reinterpret_cast<uintptr_t>(buf) & mask)
|
||||
if (reinterpret_cast<uintptr_t>(buf) & mask) {
|
||||
hwLock.unlock();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const bool dmaChannelHalted = XAxiDma_ReadReg(ring->ChanBase, XAXIDMA_SR_OFFSET) & XAXIDMA_HALTED_MASK;
|
||||
const bool deviceToDmaBusy = XAxiDma_Busy(&xDma, XAXIDMA_DEVICE_TO_DMA);
|
||||
|
||||
// If the engine is doing a transfer, cannot submit
|
||||
if (not dmaChannelHalted and deviceToDmaBusy)
|
||||
if (not dmaChannelHalted and deviceToDmaBusy) {
|
||||
hwLock.unlock();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set lower 32 bit of destination address
|
||||
XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_DESTADDR_OFFSET, LOWER_32_BITS(reinterpret_cast<uintptr_t>(buf)));
|
||||
|
@ -533,42 +581,48 @@ bool Dma::readSimple(void *buf, size_t len)
|
|||
// Set tail descriptor pointer
|
||||
XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_BUFFLEN_OFFSET, len);
|
||||
|
||||
hwLock.unlock();
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t
|
||||
Dma::writeCompleteSimple()
|
||||
Dma::Completion Dma::writeCompleteSimple()
|
||||
{
|
||||
Completion c;
|
||||
while (!(XAxiDma_IntrGetIrq(&xDma, XAXIDMA_DMA_TO_DEVICE) & XAXIDMA_IRQ_IOC_MASK))
|
||||
irqs[mm2sInterrupt].irqController->waitForInterrupt(irqs[mm2sInterrupt]);
|
||||
c.interrupts = irqs[mm2sInterrupt].irqController->waitForInterrupt(irqs[mm2sInterrupt]);
|
||||
|
||||
hwLock.lock();
|
||||
XAxiDma_IntrAckIrq(&xDma, XAXIDMA_IRQ_IOC_MASK, XAXIDMA_DMA_TO_DEVICE);
|
||||
|
||||
const XAxiDma_BdRing *ring = XAxiDma_GetTxRing(&xDma);
|
||||
const size_t bytesWritten = XAxiDma_ReadReg(ring->ChanBase, XAXIDMA_BUFFLEN_OFFSET);
|
||||
|
||||
return bytesWritten;
|
||||
hwLock.unlock();
|
||||
c.bytes = bytesWritten;
|
||||
return c;
|
||||
}
|
||||
|
||||
size_t
|
||||
Dma::readCompleteSimple()
|
||||
Dma::Completion Dma::readCompleteSimple()
|
||||
{
|
||||
Completion c;
|
||||
while (!(XAxiDma_IntrGetIrq(&xDma, XAXIDMA_DEVICE_TO_DMA) & XAXIDMA_IRQ_IOC_MASK))
|
||||
irqs[s2mmInterrupt].irqController->waitForInterrupt(irqs[s2mmInterrupt]);
|
||||
c.interrupts = irqs[s2mmInterrupt].irqController->waitForInterrupt(irqs[s2mmInterrupt]);
|
||||
|
||||
hwLock.lock();
|
||||
XAxiDma_IntrAckIrq(&xDma, XAXIDMA_IRQ_IOC_MASK, XAXIDMA_DEVICE_TO_DMA);
|
||||
|
||||
const XAxiDma_BdRing *ring = XAxiDma_GetRxRing(&xDma);
|
||||
const size_t bytesRead = XAxiDma_ReadReg(ring->ChanBase, XAXIDMA_BUFFLEN_OFFSET);
|
||||
hwLock.unlock();
|
||||
|
||||
return bytesRead;
|
||||
c.bytes = bytesRead;
|
||||
return c;
|
||||
}
|
||||
|
||||
void Dma::makeAccesibleFromVA(const MemoryBlock &mem)
|
||||
void Dma::makeAccesibleFromVA(std::shared_ptr<MemoryBlock> mem)
|
||||
{
|
||||
// Only symmetric mapping supported currently
|
||||
if (isMemoryBlockAccesible(mem, s2mmInterface) and
|
||||
isMemoryBlockAccesible(mem, mm2sInterface))
|
||||
if (isMemoryBlockAccesible(*mem, s2mmInterface) and
|
||||
isMemoryBlockAccesible(*mem, mm2sInterface))
|
||||
return;
|
||||
|
||||
// Try mapping via FPGA-card (VFIO)
|
||||
|
@ -576,8 +630,8 @@ void Dma::makeAccesibleFromVA(const MemoryBlock &mem)
|
|||
throw RuntimeError("Memory not accessible by DMA");
|
||||
|
||||
// Sanity-check if mapping worked, this shouldn't be neccessary
|
||||
if (not isMemoryBlockAccesible(mem, s2mmInterface) or
|
||||
not isMemoryBlockAccesible(mem, mm2sInterface))
|
||||
if (not isMemoryBlockAccesible(*mem, s2mmInterface) or
|
||||
not isMemoryBlockAccesible(*mem, mm2sInterface))
|
||||
throw RuntimeError("Mapping memory via card didn't work, but reported success?!");
|
||||
}
|
||||
|
||||
|
|
|
@ -152,7 +152,7 @@ std::list<std::shared_ptr<PCIeCard>> PCIeCardFactory::make(json_t *json, std::sh
|
|||
|
||||
PCIeCard::~PCIeCard()
|
||||
{
|
||||
|
||||
|
||||
}
|
||||
|
||||
std::shared_ptr<ip::Core> PCIeCard::lookupIp(const std::string &name) const
|
||||
|
@ -188,67 +188,6 @@ std::shared_ptr<ip::Core> PCIeCard::lookupIp(const ip::IpIdentifier &id) const
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
bool PCIeCard::unmapMemoryBlock(const MemoryBlock &block)
|
||||
{
|
||||
if (memoryBlocksMapped.find(block.getAddrSpaceId()) == memoryBlocksMapped.end()) {
|
||||
throw std::runtime_error("Block " + std::to_string(block.getAddrSpaceId()) + " is not mapped but was requested to be unmapped.");
|
||||
}
|
||||
|
||||
auto &mm = MemoryManager::get();
|
||||
|
||||
auto translation = mm.getTranslation(addrSpaceIdDeviceToHost, block.getAddrSpaceId());
|
||||
|
||||
const uintptr_t iova = translation.getLocalAddr(0);
|
||||
const size_t size = translation.getSize();
|
||||
|
||||
logger->debug("Unmap block {} at IOVA {:#x} of size {:#x}",
|
||||
block.getAddrSpaceId(), iova, size);
|
||||
vfioContainer->memoryUnmap(iova, size);
|
||||
|
||||
memoryBlocksMapped.erase(block.getAddrSpaceId());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PCIeCard::mapMemoryBlock(const MemoryBlock &block)
|
||||
{
|
||||
if (not vfioContainer->isIommuEnabled()) {
|
||||
logger->warn("VFIO mapping not supported without IOMMU");
|
||||
return false;
|
||||
}
|
||||
|
||||
auto &mm = MemoryManager::get();
|
||||
const auto &addrSpaceId = block.getAddrSpaceId();
|
||||
|
||||
if (memoryBlocksMapped.find(addrSpaceId) != memoryBlocksMapped.end())
|
||||
// Block already mapped
|
||||
return true;
|
||||
else
|
||||
logger->debug("Create VFIO mapping for {}", addrSpaceId);
|
||||
|
||||
auto translationFromProcess = mm.getTranslationFromProcess(addrSpaceId);
|
||||
uintptr_t processBaseAddr = translationFromProcess.getLocalAddr(0);
|
||||
uintptr_t iovaAddr = vfioContainer->memoryMap(processBaseAddr,
|
||||
UINTPTR_MAX,
|
||||
block.getSize());
|
||||
|
||||
if (iovaAddr == UINTPTR_MAX) {
|
||||
logger->error("Cannot map memory at {:#x} of size {:#x}",
|
||||
processBaseAddr, block.getSize());
|
||||
return false;
|
||||
}
|
||||
|
||||
mm.createMapping(iovaAddr, 0, block.getSize(),
|
||||
"VFIO-D2H",
|
||||
this->addrSpaceIdDeviceToHost,
|
||||
addrSpaceId);
|
||||
|
||||
// Remember that this block has already been mapped for later
|
||||
memoryBlocksMapped.insert(addrSpaceId);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PCIeCard::init()
|
||||
{
|
||||
logger = getLogger();
|
||||
|
|
|
@ -35,45 +35,42 @@ using namespace villas;
|
|||
static std::shared_ptr<kernel::pci::DeviceList> pciDevices;
|
||||
static auto logger = villas::logging.get("streamer");
|
||||
|
||||
const std::shared_ptr<villas::fpga::ip::Node> portStringToStreamVertex(std::string &str,
|
||||
std::shared_ptr<villas::fpga::ip::Dma> dma,
|
||||
std::vector<std::shared_ptr<fpga::ip::AuroraXilinx>>& aurora_channels)
|
||||
fpga::ConnectString::ConnectString(std::string& connectString, int maxPortNum) :
|
||||
log(villas::logging.get("ConnectString")),
|
||||
maxPortNum(maxPortNum),
|
||||
bidirectional(false),
|
||||
invert(false),
|
||||
srcAsInt(-1),
|
||||
dstAsInt(-1),
|
||||
srcIsStdin(false),
|
||||
dstIsStdout(false),
|
||||
dmaLoopback(false)
|
||||
{
|
||||
if (str == "stdin" || str == "stdout") {
|
||||
return dma;
|
||||
} else {
|
||||
int port = std::stoi(str);
|
||||
|
||||
if (port > 7 || port < 0)
|
||||
throw std::runtime_error("Invalid port number");
|
||||
|
||||
return aurora_channels[port];
|
||||
}
|
||||
parseString(connectString);
|
||||
}
|
||||
// parses a string lik "1->2" or "1<->stdout" and configures the crossbar
|
||||
void fpga::configCrossBarUsingConnectString(std::string connectString,
|
||||
std::shared_ptr<villas::fpga::ip::Dma> dma,
|
||||
std::vector<std::shared_ptr<fpga::ip::AuroraXilinx>>& aurora_channels)
|
||||
{
|
||||
bool bidirectional = false;
|
||||
bool invert = false;
|
||||
|
||||
void fpga::ConnectString::parseString(std::string& connectString)
|
||||
{
|
||||
if (connectString.empty())
|
||||
return;
|
||||
|
||||
if (connectString == "loopback") {
|
||||
logger->info("Connecting loopback");
|
||||
// is this working?
|
||||
dma->connectLoopback();
|
||||
srcIsStdin = true;
|
||||
dstIsStdout = true;
|
||||
bidirectional = true;
|
||||
dmaLoopback = true;
|
||||
return;
|
||||
}
|
||||
|
||||
static std::regex re("([0-9]+)([<\\->]+)([0-9]+|stdin|stdout)");
|
||||
static const std::regex regex("([0-9]+)([<\\->]+)([0-9]+|stdin|stdout)");
|
||||
std::smatch match;
|
||||
|
||||
if (!std::regex_match(connectString, match, re)) {
|
||||
if (!std::regex_match(connectString, match, regex) || match.size() != 4) {
|
||||
logger->error("Invalid connect string: {}", connectString);
|
||||
throw std::runtime_error("Invalid connect string");
|
||||
}
|
||||
|
||||
if (match[2] == "<->") {
|
||||
bidirectional = true;
|
||||
} else if(match[2] == "<-") {
|
||||
|
@ -82,11 +79,61 @@ void fpga::configCrossBarUsingConnectString(std::string connectString,
|
|||
|
||||
std::string srcStr = (invert ? match[3] : match[1]);
|
||||
std::string dstStr = (invert ? match[1] : match[3]);
|
||||
logger->info("Connect string {}: Connecting {} to {}, {}directional",
|
||||
connectString, srcStr, dstStr,
|
||||
|
||||
srcAsInt = portStringToInt(srcStr);
|
||||
dstAsInt = portStringToInt(dstStr);
|
||||
if (srcAsInt == -1) {
|
||||
srcIsStdin = true;
|
||||
}
|
||||
if (dstAsInt == -1) {
|
||||
dstIsStdout = true;
|
||||
}
|
||||
}
|
||||
|
||||
int fpga::ConnectString::portStringToInt(std::string &str) const
|
||||
{
|
||||
if (str == "stdin" || str == "stdout") {
|
||||
return -1;
|
||||
} else {
|
||||
const int port = std::stoi(str);
|
||||
|
||||
if (port > maxPortNum || port < 0)
|
||||
throw std::runtime_error("Invalid port number");
|
||||
|
||||
return port;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// parses a string like "1->2" or "1<->stdout" and configures the crossbar accordingly
|
||||
void fpga::ConnectString::configCrossBar(std::shared_ptr<villas::fpga::ip::Dma> dma,
|
||||
std::vector<std::shared_ptr<fpga::ip::AuroraXilinx>>& aurora_channels) const
|
||||
{
|
||||
if (dmaLoopback) {
|
||||
log->info("Configuring DMA loopback");
|
||||
dma->connectLoopback();
|
||||
return;
|
||||
}
|
||||
|
||||
log->info("Connecting {} to {}, {}directional",
|
||||
(srcAsInt==-1 ? "stdin" : std::to_string(srcAsInt)),
|
||||
(dstAsInt==-1 ? "stdout" : std::to_string(dstAsInt)),
|
||||
(bidirectional ? "bi" : "uni"));
|
||||
auto src = portStringToStreamVertex(srcStr, dma, aurora_channels);
|
||||
auto dest = portStringToStreamVertex(dstStr, dma, aurora_channels);
|
||||
|
||||
std::shared_ptr<fpga::ip::Node> src;
|
||||
std::shared_ptr<fpga::ip::Node> dest;
|
||||
if (srcIsStdin) {
|
||||
src = dma;
|
||||
} else {
|
||||
src = aurora_channels[srcAsInt];
|
||||
}
|
||||
|
||||
if (dstIsStdout) {
|
||||
dest = dma;
|
||||
} else {
|
||||
dest = aurora_channels[dstAsInt];
|
||||
}
|
||||
|
||||
src->connect(src->getDefaultMasterPort(), dest->getDefaultSlavePort());
|
||||
if (bidirectional) {
|
||||
dest->connect(dest->getDefaultMasterPort(), src->getDefaultSlavePort());
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include <string>
|
||||
#include <algorithm>
|
||||
#include <jansson.h>
|
||||
#include <thread>
|
||||
|
||||
#include <CLI11.hpp>
|
||||
#include <rang.hpp>
|
||||
|
@ -33,9 +34,103 @@
|
|||
using namespace villas;
|
||||
|
||||
static std::shared_ptr<kernel::pci::DeviceList> pciDevices;
|
||||
static auto logger = villas::logging.get("cat");
|
||||
static auto logger = villas::logging.get("ctrl");
|
||||
|
||||
void writeToDmaFromStdIn(std::shared_ptr<villas::fpga::ip::Dma> dma)
|
||||
{
|
||||
auto &alloc = villas::HostRam::getAllocator();
|
||||
|
||||
const std::shared_ptr<villas::MemoryBlock> block[] = {
|
||||
alloc.allocateBlock(0x200 * sizeof(uint32_t)),
|
||||
alloc.allocateBlock(0x200 * sizeof(uint32_t))
|
||||
};
|
||||
villas::MemoryAccessor<int32_t> mem[] = {*block[0], *block[1]};
|
||||
|
||||
for (auto b : block) {
|
||||
dma->makeAccesibleFromVA(b);
|
||||
}
|
||||
|
||||
size_t cur = 0, next = 1;
|
||||
std::ios::sync_with_stdio(false);
|
||||
std::string line;
|
||||
bool firstXfer = true;
|
||||
|
||||
while(true) {
|
||||
// Read values from stdin
|
||||
|
||||
std::getline(std::cin, line);
|
||||
auto values = villas::utils::tokenize(line, ";");
|
||||
|
||||
size_t i = 0;
|
||||
for (auto &value: values) {
|
||||
if (value.empty()) continue;
|
||||
|
||||
const float number = std::stof(value);
|
||||
mem[cur][i++] = number;
|
||||
}
|
||||
|
||||
// Initiate write transfer
|
||||
bool state = dma->write(*block[cur], i * sizeof(float));
|
||||
if (!state)
|
||||
logger->error("Failed to write to device");
|
||||
|
||||
if (!firstXfer) {
|
||||
auto bytesWritten = dma->writeComplete();
|
||||
logger->debug("Wrote {} bytes", bytesWritten.bytes);
|
||||
} else {
|
||||
firstXfer = false;
|
||||
}
|
||||
|
||||
cur = next;
|
||||
next = (next + 1) % (sizeof(mem) / sizeof(mem[0]));
|
||||
}
|
||||
}
|
||||
|
||||
void readFromDmaToStdOut(std::shared_ptr<villas::fpga::ip::Dma> dma,
|
||||
std::unique_ptr<fpga::BufferedSampleFormatter> formatter)
|
||||
{
|
||||
auto &alloc = villas::HostRam::getAllocator();
|
||||
|
||||
const std::shared_ptr<villas::MemoryBlock> block[] = {
|
||||
alloc.allocateBlock(0x200 * sizeof(uint32_t)),
|
||||
alloc.allocateBlock(0x200 * sizeof(uint32_t))
|
||||
};
|
||||
villas::MemoryAccessor<int32_t> mem[] = {*block[0], *block[1]};
|
||||
|
||||
for (auto b : block) {
|
||||
dma->makeAccesibleFromVA(b);
|
||||
}
|
||||
|
||||
size_t cur = 0, next = 1;
|
||||
std::ios::sync_with_stdio(false);
|
||||
|
||||
// Setup read transfer
|
||||
dma->read(*block[0], block[0]->getSize());
|
||||
|
||||
while (true) {
|
||||
logger->trace("Read from stream and write to address {}:{:p}", block[next]->getAddrSpaceId(), block[next]->getOffset());
|
||||
// We could use the number of interrupts to determine if we missed a chunk of data
|
||||
dma->read(*block[next], block[next]->getSize());
|
||||
auto c = dma->readComplete();
|
||||
|
||||
if (c.interrupts > 1) {
|
||||
logger->warn("Missed {} interrupts", c.interrupts - 1);
|
||||
}
|
||||
|
||||
logger->trace("bytes: {}, intrs: {}, bds: {}",
|
||||
c.bytes, c.interrupts, c.bds);
|
||||
|
||||
for (size_t i = 0; i*4 < c.bytes; i++) {
|
||||
int32_t ival = mem[cur][i];
|
||||
float fval = *((float*)(&ival)); // cppcheck-suppress invalidPointerCast
|
||||
formatter->format(fval);
|
||||
}
|
||||
formatter->output(std::cout);
|
||||
|
||||
cur = next;
|
||||
next = (next + 1) % (sizeof(mem) / sizeof(mem[0]));
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
|
@ -53,7 +148,12 @@ int main(int argc, char* argv[])
|
|||
app.add_option("-x,--connect", connectStr, "Connect a FPGA port with another or stdin/stdout");
|
||||
bool noDma = false;
|
||||
app.add_flag("--no-dma", noDma, "Do not setup DMA, only setup FPGA and Crossbar links");
|
||||
|
||||
std::string outputFormat = "short";
|
||||
app.add_option("--output-format", outputFormat, "Output format (short, long)");
|
||||
bool dumpGraph = false;
|
||||
app.add_flag("--dump-graph", dumpGraph, "Dumps the graph of memory regions into \"graph.dot\"");
|
||||
bool dumpAuroraChannels = true;
|
||||
app.add_flag("--dump-aurora", dumpAuroraChannels, "Dumps the detected Aurora channels.");
|
||||
app.parse(argc, argv);
|
||||
|
||||
// Logging setup
|
||||
|
@ -66,13 +166,6 @@ int main(int argc, char* argv[])
|
|||
return 1;
|
||||
}
|
||||
|
||||
//FIXME: This must be called before card is intialized, because the card descructor
|
||||
// still accesses the allocated memory. This order ensures that the allocator
|
||||
// is destroyed AFTER the card.
|
||||
auto &alloc = villas::HostRam::getAllocator();
|
||||
villas::MemoryAccessor<int32_t> mem[] = {alloc.allocate<int32_t>(0x200), alloc.allocate<int32_t>(0x200)};
|
||||
const villas::MemoryBlock block[] = {mem[0].getMemoryBlock(), mem[1].getMemoryBlock()};
|
||||
|
||||
auto card = fpga::setupFpgaCard(configFile, fpgaName);
|
||||
|
||||
std::vector<std::shared_ptr<fpga::ip::AuroraXilinx>> aurora_channels;
|
||||
|
@ -95,52 +188,32 @@ int main(int argc, char* argv[])
|
|||
return 1;
|
||||
}
|
||||
|
||||
for (auto aurora : aurora_channels)
|
||||
aurora->dump();
|
||||
|
||||
// Configure Crossbar switch
|
||||
fpga::configCrossBarUsingConnectString(connectStr, dma, aurora_channels);
|
||||
|
||||
if (!noDma) {
|
||||
for (auto b : block) {
|
||||
dma->makeAccesibleFromVA(b);
|
||||
}
|
||||
|
||||
if (dumpGraph) {
|
||||
auto &mm = MemoryManager::get();
|
||||
mm.getGraph().dump("graph.dot");
|
||||
}
|
||||
|
||||
// Setup read transfer
|
||||
dma->read(block[0], block[0].getSize());
|
||||
size_t cur = 0, next = 1;
|
||||
while (true) {
|
||||
dma->read(block[next], block[next].getSize());
|
||||
auto bytesRead = dma->readComplete();
|
||||
// Setup read transfer
|
||||
if (dumpAuroraChannels) {
|
||||
for (auto aurora : aurora_channels)
|
||||
aurora->dump();
|
||||
}
|
||||
// Configure Crossbar switch
|
||||
const fpga::ConnectString parsedConnectString(connectStr);
|
||||
parsedConnectString.configCrossBar(dma, aurora_channels);
|
||||
|
||||
//auto valuesRead = bytesRead / sizeof(int32_t);
|
||||
//logger->info("Read {} bytes", bytesRead);
|
||||
std::unique_ptr<std::thread> stdInThread = nullptr;
|
||||
if (!noDma && parsedConnectString.isDstStdout()) {
|
||||
auto formatter = fpga::getBufferedSampleFormatter(outputFormat, 16);
|
||||
// We copy the dma shared ptr but move the fomatter unqiue ptr as we don't need it
|
||||
// in this thread anymore
|
||||
stdInThread = std::make_unique<std::thread>(readFromDmaToStdOut, dma, std::move(formatter));
|
||||
}
|
||||
if (!noDma && parsedConnectString.isSrcStdin()) {
|
||||
writeToDmaFromStdIn(dma);
|
||||
}
|
||||
|
||||
//for (size_t i = 0; i < valuesRead; i++)
|
||||
// std::cerr << std::hex << mem[i] << ";";
|
||||
//std::cerr << std::endl;
|
||||
|
||||
for (size_t i = 0; i*4 < bytesRead; i++) {
|
||||
int32_t ival = mem[cur][i];
|
||||
float fval = *((float*)(&ival)); // cppcheck-suppress invalidPointerCast
|
||||
//std::cerr << std::hex << ival << ",";
|
||||
std::cerr << fval << std::endl;
|
||||
/*int64_t ival = (int64_t)(mem[1] & 0xFFFF) << 48 |
|
||||
(int64_t)(mem[1] & 0xFFFF0000) << 16 |
|
||||
(int64_t)(mem[0] & 0xFFFF) << 16 |
|
||||
(int64_t)(mem[0] & 0xFFFF0000) >> 16;
|
||||
double dval = *((double*)(&ival));
|
||||
std::cerr << std::hex << ival << "," << dval << std::endl;
|
||||
bytesRead -= 8;*/
|
||||
//logger->info("Read value: {}", dval);
|
||||
}
|
||||
cur = next;
|
||||
next = (next + 1) % (sizeof(mem)/sizeof(mem[0]));
|
||||
}
|
||||
if (stdInThread) {
|
||||
stdInThread->join();
|
||||
}
|
||||
} catch (const RuntimeError &e) {
|
||||
logger->error("Error: {}", e.what());
|
||||
|
|
|
@ -63,12 +63,15 @@ Test(fpga, dma, .description = "DMA")
|
|||
auto dst = bram->getAllocator().allocate<char>(len);
|
||||
#else
|
||||
// ... only works with IOMMU enabled currently
|
||||
auto src = HostRam::getAllocator().allocate<char>(len);
|
||||
auto dst = HostRam::getAllocator().allocate<char>(len);
|
||||
auto &alloc = villas::HostRam::getAllocator();
|
||||
const std::shared_ptr<villas::MemoryBlock> srcBlock = alloc.allocateBlock(len);
|
||||
const std::shared_ptr<villas::MemoryBlock> dstBlock = alloc.allocateBlock(len);
|
||||
villas::MemoryAccessor<char> src(*srcBlock);
|
||||
villas::MemoryAccessor<char> dst(*dstBlock);
|
||||
#endif
|
||||
// Make sure memory is accessible for DMA
|
||||
dma->makeAccesibleFromVA(src.getMemoryBlock());
|
||||
dma->makeAccesibleFromVA(dst.getMemoryBlock());
|
||||
dma->makeAccesibleFromVA(srcBlock);
|
||||
dma->makeAccesibleFromVA(dstBlock);
|
||||
|
||||
// Get new random data
|
||||
const size_t lenRandom = utils::readRandom(&src, len);
|
||||
|
|
|
@ -81,7 +81,7 @@ Test(fpga, rtds, .description = "RTDS")
|
|||
"Failed to initiate DMA read");
|
||||
|
||||
// logger->info("Wait read");
|
||||
const size_t bytesRead = dma->readComplete();
|
||||
const size_t bytesRead = dma->readComplete().bytes;
|
||||
cr_assert(bytesRead > 0,
|
||||
"Failed to complete DMA read");
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue