From e6f035cd3119b0a701c7f3867bfc4df17ac22ae6 Mon Sep 17 00:00:00 2001 From: Niklas Eiling Date: Thu, 12 Jan 2023 12:11:28 +0100 Subject: [PATCH] add basic thread-safety to ips/dma Signed-off-by: Niklas Eiling --- fpga/include/villas/fpga/ips/dma.hpp | 1 + fpga/lib/ips/dma.cpp | 127 +++++++++++++++++++++------ fpga/src/villas-fpga-ctrl.cpp | 1 - 3 files changed, 101 insertions(+), 28 deletions(-) diff --git a/fpga/include/villas/fpga/ips/dma.hpp b/fpga/include/villas/fpga/ips/dma.hpp index 5740db13d..6e788e1d3 100644 --- a/fpga/include/villas/fpga/ips/dma.hpp +++ b/fpga/include/villas/fpga/ips/dma.hpp @@ -119,6 +119,7 @@ private: XAxiDma xDma; XAxiDma_Config xConfig; + std::mutex hwLock; bool configDone = false; // use polling to wait for DMA completion or interrupts via efds diff --git a/fpga/lib/ips/dma.cpp b/fpga/lib/ips/dma.cpp index b17d24901..378145f47 100644 --- a/fpga/lib/ips/dma.cpp +++ b/fpga/lib/ips/dma.cpp @@ -33,6 +33,7 @@ bool Dma::init() xConfig.BaseAddr = getBaseAddr(registerMemory); + hwLock.lock(); if (XAxiDma_CfgInitialize(&xDma, &xConfig) != XST_SUCCESS) { logger->error("Cannot initialize Xilinx DMA driver"); return false; @@ -45,6 +46,7 @@ bool Dma::init() else logger->debug("DMA selftest passed"); + hwLock.unlock(); // Map buffer descriptors if (hasScatterGather()) { if (actualRingBdSize < 2*readCoalesce || actualRingBdSize < 2*writeCoalesce) { @@ -54,8 +56,6 @@ bool Dma::init() setupScatterGather(); } - - irqs[mm2sInterrupt].irqController->enableInterrupt(irqs[mm2sInterrupt], polling); irqs[s2mmInterrupt].irqController->enableInterrupt(irqs[s2mmInterrupt], polling); @@ -72,6 +72,7 @@ void Dma::setupScatterGatherRingRx() { int ret; + hwLock.lock(); auto *rxRingPtr = XAxiDma_GetRxRing(&xDma); // Disable all RX interrupts before RxBD space setup @@ -111,12 +112,15 @@ void Dma::setupScatterGatherRingRx() ret = XAxiDma_BdRingStart(rxRingPtr); if (ret != XST_SUCCESS) throw RuntimeError("Failed to start TX ring: {}", ret); + + hwLock.unlock(); } void Dma::setupScatterGatherRingTx() { int ret; + hwLock.lock(); auto *txRingPtr = XAxiDma_GetTxRing(&xDma); // Disable all TX interrupts before TxBD space setup @@ -156,6 +160,7 @@ void Dma::setupScatterGatherRingTx() ret = XAxiDma_BdRingStart(txRingPtr); if (ret != XST_SUCCESS) throw RuntimeError("Failed to start TX ring: {}", ret); + hwLock.unlock(); } bool Dma::reset() @@ -164,8 +169,10 @@ bool Dma::reset() return true; } + hwLock.lock(); XAxiDma_IntrDisable(&xDma, XAXIDMA_IRQ_ALL_MASK, XAXIDMA_DMA_TO_DEVICE); XAxiDma_IntrDisable(&xDma, XAXIDMA_IRQ_ALL_MASK, XAXIDMA_DEVICE_TO_DMA); + XAxiDma_Reset(&xDma); @@ -180,6 +187,7 @@ bool Dma::reset() timeout--; } + hwLock.unlock(); logger->error("DMA reset timed out"); return false; @@ -265,23 +273,31 @@ bool Dma::writeScatterGather(const void *buf, size_t len) // buf is address from view of DMA controller int ret = XST_FAILURE; - + hwLock.lock(); auto *txRing = XAxiDma_GetTxRing(&xDma); - if (txRing == nullptr) + if (txRing == nullptr) { + hwLock.unlock(); throw RuntimeError("TxRing was null."); + } XAxiDma_Bd *bd; ret = XAxiDma_BdRingAlloc(txRing, 1, &bd); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("BdRingAlloc returned {}.", ret); + } ret = XAxiDma_BdSetBufAddr(bd, (uintptr_t) buf); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("Setting BdBufAddr to {} returned {}.", buf, ret); + } ret = XAxiDma_BdSetLength(bd, len, txRing->MaxTransferLen); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("Setting BdBufLength to {} returned {}.", len, ret); + } // We have a single descriptor so it is both start and end of the list XAxiDma_BdSetCtrl(bd, XAXIDMA_BD_CTRL_TXEOF_MASK | XAXIDMA_BD_CTRL_TXSOF_MASK); @@ -292,8 +308,12 @@ bool Dma::writeScatterGather(const void *buf, size_t len) // Give control of BD to HW. We should not access it until transfer is finished. // Failure could also indicate that EOF is not set on last Bd ret = XAxiDma_BdRingToHw(txRing, 1, bd); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("Enqueuing Bd and giving control to HW failed {}", ret); + } + + hwLock.unlock(); return true; } @@ -305,25 +325,34 @@ bool Dma::readScatterGather(void *buf, size_t len) if (len < readCoalesce*readMsgSize) throw RuntimeError("Read size is smaller than readCoalesce*msgSize. Cannot setup BDs."); + hwLock.lock(); auto *rxRing = XAxiDma_GetRxRing(&xDma); - if (rxRing == nullptr) + if (rxRing == nullptr) { + hwLock.unlock(); throw RuntimeError("RxRing was null."); + } XAxiDma_Bd *bd; ret = XAxiDma_BdRingAlloc(rxRing, readCoalesce, &bd); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("Failed to alloc BD in RX ring: {}", ret); + } auto curBd = bd; char* curBuf = (char*)buf; for (size_t i = 0; i < readCoalesce; i++) { ret = XAxiDma_BdSetBufAddr(curBd, (uintptr_t) curBuf); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("Failed to set buffer address {:x} on BD {:x}: {}", (uintptr_t) buf, (uintptr_t) bd, ret); + } ret = XAxiDma_BdSetLength(curBd, readMsgSize, rxRing->MaxTransferLen); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("Rx set length {} on BD {:x} failed {}", len, (uintptr_t) bd, ret); + } // Receive BDs do not need to set anything for the control @@ -338,9 +367,12 @@ bool Dma::readScatterGather(void *buf, size_t len) } ret = XAxiDma_BdRingToHw(rxRing, readCoalesce, bd); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("Failed to submit BD to RX ring: {}", ret); + } + hwLock.unlock(); return true; } @@ -350,25 +382,40 @@ Dma::Completion Dma::writeCompleteScatterGather() XAxiDma_Bd *bd = nullptr, *curBd; auto txRing = XAxiDma_GetTxRing(&xDma); int ret = XST_FAILURE; + static size_t errcnt = 32; - if ((c.bds = XAxiDma_BdRingFromHw(txRing, 1, &bd)) == 0) + c.interrupts = irqs[mm2sInterrupt].irqController->waitForInterrupt(irqs[mm2sInterrupt].num); + + hwLock.lock(); + if ((c.bds = XAxiDma_BdRingFromHw(txRing, writeCoalesce, &bd)) < writeCoalesce) { - c.interrupts = irqs[mm2sInterrupt].irqController->waitForInterrupt(irqs[mm2sInterrupt].num); - //logger->info("Got {} interrupts (id: {}) from write channel", intrNum, irqs[mm2sInterrupt].num); - c.bds = XAxiDma_BdRingFromHw(txRing, 1, &bd); + logger->warn("Send partial batch of {}/{} BDs.", c.bds, writeCoalesce); + if(errcnt-- == 0) { + hwLock.unlock(); + throw RuntimeError("too many partial batches"); + } } // Acknowledge the interrupt auto irqStatus = XAxiDma_BdRingGetIrq(txRing); XAxiDma_BdRingAckIrq(txRing, irqStatus); - if (bd == nullptr) + if (c.bds == 0) { + c.bytes = 0; + hwLock.unlock(); + return c; + } + + if (bd == nullptr) { + hwLock.unlock(); throw RuntimeError("Bd was null."); + } curBd = bd; for (size_t i = 0; i < c.bds; i++) { ret = XAxiDma_BdGetSts(curBd); if ((ret & XAXIDMA_BD_STS_ALL_ERR_MASK) || (!(ret & XAXIDMA_BD_STS_COMPLETE_MASK))) { + hwLock.unlock(); throw RuntimeError("Bd Status register shows error: {}", ret); break; } @@ -378,9 +425,12 @@ Dma::Completion Dma::writeCompleteScatterGather() } ret = XAxiDma_BdRingFree(txRing, c.bds, bd); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("Failed to free {} TX BDs {}", c.bds, ret); + } + hwLock.unlock(); return c; } @@ -394,11 +444,13 @@ Dma::Completion Dma::readCompleteScatterGather() c.interrupts = irqs[s2mmInterrupt].irqController->waitForInterrupt(irqs[s2mmInterrupt].num); + hwLock.lock(); // Wait until the data has been received by the RX channel. if ((c.bds = XAxiDma_BdRingFromHw(rxRing, readCoalesce, &bd)) < readCoalesce) { logger->warn("Got partial batch of {}/{} BDs.", c.bds, readCoalesce); if(errcnt-- == 0) { + hwLock.unlock(); throw RuntimeError("too many partial batches"); } } @@ -409,16 +461,20 @@ Dma::Completion Dma::readCompleteScatterGather() if (c.bds == 0) { c.bytes = 0; + hwLock.unlock(); return c; } - if (bd == nullptr) + if (bd == nullptr) { + hwLock.unlock(); throw RuntimeError("Bd was null."); + } curBd = bd; for (size_t i = 0; i < c.bds; i++) { ret = XAxiDma_BdGetSts(curBd); if ((ret & XAXIDMA_BD_STS_ALL_ERR_MASK) || (!(ret & XAXIDMA_BD_STS_COMPLETE_MASK))) { + hwLock.unlock(); throw RuntimeError("Bd Status register shows error: {}", ret); break; } @@ -429,14 +485,18 @@ Dma::Completion Dma::readCompleteScatterGather() // Free all processed RX BDs for future transmission. ret = XAxiDma_BdRingFree(rxRing, c.bds, bd); - if (ret != XST_SUCCESS) + if (ret != XST_SUCCESS) { + hwLock.unlock(); throw RuntimeError("Failed to free {} TX BDs {}.", c.bds, ret); + } + hwLock.unlock(); return c; } bool Dma::writeSimple(const void *buf, size_t len) { + hwLock.lock(); XAxiDma_BdRing *ring = XAxiDma_GetTxRing(&xDma); if (not ring->HasDRE) { @@ -444,8 +504,10 @@ bool Dma::writeSimple(const void *buf, size_t len) ? XAXIDMA_MICROMODE_MIN_BUF_ALIGN : ring->DataWidth - 1; - if (reinterpret_cast(buf) & mask) + if (reinterpret_cast(buf) & mask) { + hwLock.unlock(); return false; + } } const bool dmaChannelHalted = @@ -454,8 +516,10 @@ bool Dma::writeSimple(const void *buf, size_t len) const bool dmaToDeviceBusy = XAxiDma_Busy(&xDma, XAXIDMA_DMA_TO_DEVICE); // If the engine is doing a transfer, cannot submit - if (not dmaChannelHalted and dmaToDeviceBusy) + if (not dmaChannelHalted and dmaToDeviceBusy) { + hwLock.unlock(); return false; + } // Set lower 32 bit of source address XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_SRCADDR_OFFSET, @@ -473,12 +537,13 @@ bool Dma::writeSimple(const void *buf, size_t len) // Set tail descriptor pointer XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_BUFFLEN_OFFSET, len); - + hwLock.unlock(); return true; } bool Dma::readSimple(void *buf, size_t len) { + hwLock.lock(); XAxiDma_BdRing *ring = XAxiDma_GetRxRing(&xDma); if (not ring->HasDRE) { @@ -486,16 +551,20 @@ bool Dma::readSimple(void *buf, size_t len) ? XAXIDMA_MICROMODE_MIN_BUF_ALIGN : ring->DataWidth - 1; - if (reinterpret_cast(buf) & mask) + if (reinterpret_cast(buf) & mask) { + hwLock.unlock(); return false; + } } const bool dmaChannelHalted = XAxiDma_ReadReg(ring->ChanBase, XAXIDMA_SR_OFFSET) & XAXIDMA_HALTED_MASK; const bool deviceToDmaBusy = XAxiDma_Busy(&xDma, XAXIDMA_DEVICE_TO_DMA); // If the engine is doing a transfer, cannot submit - if (not dmaChannelHalted and deviceToDmaBusy) + if (not dmaChannelHalted and deviceToDmaBusy) { + hwLock.unlock(); return false; + } // Set lower 32 bit of destination address XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_DESTADDR_OFFSET, LOWER_32_BITS(reinterpret_cast(buf))); @@ -512,6 +581,7 @@ bool Dma::readSimple(void *buf, size_t len) // Set tail descriptor pointer XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_BUFFLEN_OFFSET, len); + hwLock.unlock(); return true; } @@ -521,11 +591,12 @@ Dma::Completion Dma::writeCompleteSimple() while (!(XAxiDma_IntrGetIrq(&xDma, XAXIDMA_DMA_TO_DEVICE) & XAXIDMA_IRQ_IOC_MASK)) c.interrupts = irqs[mm2sInterrupt].irqController->waitForInterrupt(irqs[mm2sInterrupt]); + hwLock.lock(); XAxiDma_IntrAckIrq(&xDma, XAXIDMA_IRQ_IOC_MASK, XAXIDMA_DMA_TO_DEVICE); const XAxiDma_BdRing *ring = XAxiDma_GetTxRing(&xDma); const size_t bytesWritten = XAxiDma_ReadReg(ring->ChanBase, XAXIDMA_BUFFLEN_OFFSET); - + hwLock.unlock(); c.bytes = bytesWritten; return c; } @@ -536,10 +607,12 @@ Dma::Completion Dma::readCompleteSimple() while (!(XAxiDma_IntrGetIrq(&xDma, XAXIDMA_DEVICE_TO_DMA) & XAXIDMA_IRQ_IOC_MASK)) c.interrupts = irqs[s2mmInterrupt].irqController->waitForInterrupt(irqs[s2mmInterrupt]); + hwLock.lock(); XAxiDma_IntrAckIrq(&xDma, XAXIDMA_IRQ_IOC_MASK, XAXIDMA_DEVICE_TO_DMA); const XAxiDma_BdRing *ring = XAxiDma_GetRxRing(&xDma); const size_t bytesRead = XAxiDma_ReadReg(ring->ChanBase, XAXIDMA_BUFFLEN_OFFSET); + hwLock.unlock(); c.bytes = bytesRead; return c; diff --git a/fpga/src/villas-fpga-ctrl.cpp b/fpga/src/villas-fpga-ctrl.cpp index 07fcf7dc8..6e8ec20c4 100644 --- a/fpga/src/villas-fpga-ctrl.cpp +++ b/fpga/src/villas-fpga-ctrl.cpp @@ -51,7 +51,6 @@ void readFromDmaToStdOut(std::shared_ptr dma, dma->makeAccesibleFromVA(b); } - size_t cur = 0, next = 1; std::ios::sync_with_stdio(false);