From 498af9fd1c5b23e084382adfa51dd95677527a14 Mon Sep 17 00:00:00 2001 From: Niklas Eiling Date: Fri, 6 Jan 2023 17:21:47 +0100 Subject: [PATCH] ips/dma: make read correctly wait on interrupts Modify villas-fpga-ctrl to fit the new behavior of Dma. Makes reading from DMA work even when we are too slow and only receive partial batches of BDs. Signed-off-by: Niklas Eiling --- fpga/include/villas/fpga/ips/dma.hpp | 3 +- fpga/lib/ips/dma.cpp | 42 ++++++++-------------------- fpga/src/villas-fpga-ctrl.cpp | 41 ++++++++++++++------------- 3 files changed, 35 insertions(+), 51 deletions(-) diff --git a/fpga/include/villas/fpga/ips/dma.hpp b/fpga/include/villas/fpga/ips/dma.hpp index 5f4f21a0f..ed29786f2 100644 --- a/fpga/include/villas/fpga/ips/dma.hpp +++ b/fpga/include/villas/fpga/ips/dma.hpp @@ -133,7 +133,8 @@ private: // When using SG: ringBdSize is the maximum number of BDs usable in the ring // Depending on alignment, the actual number of BDs usable can be smaller static constexpr size_t requestedRingBdSize = 2048; - uint32_t actualRingBdSize = XAxiDma_BdRingCntCalc(XAXIDMA_BD_MINIMUM_ALIGNMENT, requestedRingBdSize); + static constexpr size_t requestedRingBdSizeMemory = requestedRingBdSize * sizeof(XAxiDma_Bd); + uint32_t actualRingBdSize = XAxiDma_BdRingCntCalc(XAXIDMA_BD_MINIMUM_ALIGNMENT, requestedRingBdSizeMemory); std::shared_ptr sgRingTx; std::shared_ptr sgRingRx; }; diff --git a/fpga/lib/ips/dma.cpp b/fpga/lib/ips/dma.cpp index c129b6f82..92aae09ae 100644 --- a/fpga/lib/ips/dma.cpp +++ b/fpga/lib/ips/dma.cpp @@ -82,7 +82,7 @@ void Dma::setupScatterGatherRingRx() // Allocate and map space for BD ring in host RAM auto &alloc = villas::HostRam::getAllocator(); - sgRingRx = alloc.allocateBlock(requestedRingBdSize * sizeof(uint16_t) * XAXIDMA_BD_NUM_WORDS); + sgRingRx = alloc.allocateBlock(requestedRingBdSizeMemory); if (not card->mapMemoryBlock(sgRingRx)) throw RuntimeError("Memory not accessible by DMA"); @@ -127,7 +127,7 @@ void Dma::setupScatterGatherRingTx() // Allocate and map space for BD ring in host RAM auto &alloc = villas::HostRam::getAllocator(); - sgRingTx = alloc.allocateBlock(requestedRingBdSize * sizeof(uint16_t) * XAXIDMA_BD_NUM_WORDS); + sgRingTx = alloc.allocateBlock(requestedRingBdSizeMemory); if (not card->mapMemoryBlock(sgRingTx)) throw RuntimeError("Memory not accessible by DMA"); @@ -256,8 +256,6 @@ bool Dma::read(const MemoryBlock &mem, size_t len) if (buf == nullptr) throw RuntimeError("Buffer was null"); - logger->debug("Read from stream and write to address {:p}", buf); - return hasScatterGather() ? readScatterGather(buf, len) : readSimple(buf, len); } @@ -396,43 +394,27 @@ Dma::readCompleteScatterGather() auto rxRing = XAxiDma_GetRxRing(&xDma); int ret = XST_FAILURE; size_t bytesRead = 0; + static size_t errcnt = 32; + + //auto intrNum = + irqs[s2mmInterrupt].irqController->waitForInterrupt(irqs[s2mmInterrupt].num); // Wait until the data has been received by the RX channel. if ((processedBds = XAxiDma_BdRingFromHw(rxRing, readCoalesce, &bd)) < readCoalesce) { - if (processedBds != 0) { - //Ignore partial batches - logger->warn("Ignoring partial batch of {} BDs.", processedBds); - ret = XAxiDma_BdRingFree(rxRing, processedBds, bd); - if (ret != XST_SUCCESS) - throw RuntimeError("Failed to free {} RX BDs {}", processedBds, ret); + logger->warn("Got partial batch of {}/{} BDs.", processedBds, readCoalesce); + if(errcnt-- == 0) { + throw RuntimeError("too many partial batches"); } - //auto intrNum = - irqs[s2mmInterrupt].irqController->waitForInterrupt(irqs[s2mmInterrupt].num); - //If we got a partial batch on the first call, we have to receive up to readCoalesce*2 - //to make sure we get a full batch of readCoalesce messages - processedBds = XAxiDma_BdRingFromHw(rxRing, readCoalesce*2, &bd); } - if(processedBds < readCoalesce) { - // We got less than we expected. We already tried two times so let's give up. - throw RuntimeError("Read only {} BDs, expected {}.", processedBds, readCoalesce); - } else if(processedBds > readCoalesce) { - // If the first try was a partial batch, we receive two batches on the second try - // We ignore the first batch and only process the second one - while (processedBds > readCoalesce) { - bd = (XAxiDma_Bd *) XAxiDma_BdRingNext(rxRing, bd); - processedBds--; - } - ret = XAxiDma_BdRingFree(rxRing, processedBds-readCoalesce, bd); - if (ret != XST_SUCCESS) - throw RuntimeError("Failed to free {} RX BDs {}", processedBds, ret); - } - // At this point we have exactly readCoalesce BDs. // Acknowledge the interrupt. Has no effect if no interrupt has occured. auto irqStatus = XAxiDma_BdRingGetIrq(rxRing); XAxiDma_BdRingAckIrq(rxRing, irqStatus); + if (processedBds == 0) + return 0; + if (bd == nullptr) throw RuntimeError("Bd was null."); diff --git a/fpga/src/villas-fpga-ctrl.cpp b/fpga/src/villas-fpga-ctrl.cpp index a15112bb1..b6de02200 100644 --- a/fpga/src/villas-fpga-ctrl.cpp +++ b/fpga/src/villas-fpga-ctrl.cpp @@ -53,37 +53,38 @@ void readFromDmaToStdOut(std::shared_ptr dma) auto &mm = MemoryManager::get(); mm.getGraph().dump("graph.dot"); + + size_t cur = 0, next = 1; + std::ios::sync_with_stdio(false); + size_t samplecnt = 0; + static const char outputfmt[] = "%05zd: %7f\n"; + static const size_t outputfmtSize = 16; + char outputbuf[16][outputfmtSize] = {0}; + size_t bytesRead; + // Setup read transfer dma->read(*block[0], block[0]->getSize()); - size_t cur = 0, next = 1; + while (true) { + //logger->debug("Read from stream and write to address {:p}", *block[next]); dma->read(*block[next], block[next]->getSize()); - auto bytesRead = dma->readComplete(); - // Setup read transfer - - //auto valuesRead = bytesRead / sizeof(int32_t); - //logger->info("Read {} bytes", bytesRead); - - //for (size_t i = 0; i < valuesRead; i++) - // std::cerr << std::hex << mem[i] << ";"; - //std::cerr << std::endl; + bytesRead = dma->readComplete(); for (size_t i = 0; i*4 < bytesRead; i++) { int32_t ival = mem[cur][i]; float fval = *((float*)(&ival)); // cppcheck-suppress invalidPointerCast //std::cerr << std::hex << ival << ","; - std::cerr << fval << std::endl; - /*int64_t ival = (int64_t)(mem[1] & 0xFFFF) << 48 | - (int64_t)(mem[1] & 0xFFFF0000) << 16 | - (int64_t)(mem[0] & 0xFFFF) << 16 | - (int64_t)(mem[0] & 0xFFFF0000) >> 16; - double dval = *((double*)(&ival)); - std::cerr << std::hex << ival << "," << dval << std::endl; - bytesRead -= 8;*/ - //logger->info("Read value: {}", dval); + //std::cout << samplecnt++ << ": " << fval << '\n'; + if (std::snprintf(outputbuf[i], outputfmtSize+1, outputfmt, (samplecnt++%100000), fval) > (int)outputfmtSize) { + throw RuntimeError("Output buffer too small"); + } } + for (size_t i = 0; i < sizeof(outputbuf)/sizeof(outputbuf[0])-bytesRead/4; i++) { + outputbuf[i][0] = '\0'; + } + std::cout << *outputbuf << std::flush; cur = next; - next = (next + 1) % (sizeof(mem)/sizeof(mem[0])); + next = (next + 1) % (sizeof(mem) / sizeof(mem[0])); } }