diff --git a/fpga/lib/ips/dma.cpp b/fpga/lib/ips/dma.cpp index 9c22a7e01..3442fdc18 100644 --- a/fpga/lib/ips/dma.cpp +++ b/fpga/lib/ips/dma.cpp @@ -57,11 +57,12 @@ bool Dma::init() { setupScatterGather(); } - irqs[mm2sInterrupt].irqController->enableInterrupt(irqs[mm2sInterrupt], - polling); - irqs[s2mmInterrupt].irqController->enableInterrupt(irqs[s2mmInterrupt], - polling); - + if (!polling) { + irqs[mm2sInterrupt].irqController->enableInterrupt(irqs[mm2sInterrupt], + polling); + irqs[s2mmInterrupt].irqController->enableInterrupt(irqs[s2mmInterrupt], + polling); + } return true; } @@ -341,9 +342,10 @@ bool Dma::writeScatterGather(const void *buf, size_t len) { bool Dma::readScatterGather(void *buf, size_t len) { int ret = XST_FAILURE; - if (len < readCoalesce * readMsgSize) + if (len < readCoalesce * readMsgSize) { throw RuntimeError( "Read size is smaller than readCoalesce*msgSize. Cannot setup BDs."); + } hwLock.lock(); auto *rxRing = XAxiDma_GetRxRing(&xDma); @@ -404,10 +406,29 @@ Dma::Completion Dma::writeCompleteScatterGather() { int ret = XST_FAILURE; static size_t errcnt = 32; - c.interrupts = irqs[mm2sInterrupt].irqController->waitForInterrupt( - irqs[mm2sInterrupt].num); + uint32_t irqStatus = 0; + if (polling) { + hwLock.lock(); + XAxiDma_Bd *CurBdPtr = txRing->HwHead; + volatile uint32_t BdSts; + // Poll BD status to avoid accessing PCIe address space + do { + BdSts = XAxiDma_ReadReg((UINTPTR)CurBdPtr, XAXIDMA_BD_STS_OFFSET); + } while (!(BdSts & XAXIDMA_BD_STS_COMPLETE_MASK)); + // At this point, we know that the transmission is complete, but we haven't accessed the + // PCIe address space, yet. The subsequent DMA Controller management can be done in a + // separate thread to keep latencies in this thread extremly low. We know that we have + // received one BD. + do { + // This takes 1.5 us + irqStatus = XAxiDma_BdRingGetIrq(txRing); + } while (!(irqStatus & XAXIDMA_IRQ_IOC_MASK)); + } else { + c.interrupts = irqs[mm2sInterrupt].irqController->waitForInterrupt( + irqs[mm2sInterrupt].num); + hwLock.lock(); + } - hwLock.lock(); if ((c.bds = XAxiDma_BdRingFromHw(txRing, writeCoalesce, &bd)) < writeCoalesce) { logger->warn("Send partial batch of {}/{} BDs.", c.bds, writeCoalesce); @@ -418,7 +439,9 @@ Dma::Completion Dma::writeCompleteScatterGather() { } // Acknowledge the interrupt - auto irqStatus = XAxiDma_BdRingGetIrq(txRing); + if (!polling) { + irqStatus = XAxiDma_BdRingGetIrq(txRing); + } XAxiDma_BdRingAckIrq(txRing, irqStatus); if (c.bds == 0) { @@ -462,8 +485,30 @@ Dma::Completion Dma::readCompleteScatterGather() { int ret = XST_FAILURE; static size_t errcnt = 32; - ssize_t intrs = irqs[s2mmInterrupt].irqController->waitForInterrupt( - irqs[s2mmInterrupt].num); + ssize_t intrs = 0; + uint32_t irqStatus = 0; + if (polling) { + hwLock.lock(); + XAxiDma_Bd *CurBdPtr = rxRing->HwHead; + volatile uint32_t BdSts; + // Poll BD status to avoid accessing PCIe address space + do { + BdSts = XAxiDma_ReadReg((UINTPTR)CurBdPtr, XAXIDMA_BD_STS_OFFSET); + } while (!(BdSts & XAXIDMA_BD_STS_COMPLETE_MASK)); + // At this point, we know that the transmission is complete, but we haven't accessed the + // PCIe address space, yet. The subsequent DMA Controller management can be done in a + // separate thread to keep latencies in this thread extremly low. We know that we have + // received one BD. + do { + // This takes 1.5 us + irqStatus = XAxiDma_BdRingGetIrq(rxRing); + } while (!(irqStatus & XAXIDMA_IRQ_IOC_MASK)); + intrs = 1; + } else { + intrs = irqs[s2mmInterrupt].irqController->waitForInterrupt( + irqs[s2mmInterrupt].num); + hwLock.lock(); + } if (intrs < 0) { logger->warn("Interrupt error or timeout: {}", intrs); @@ -471,14 +516,17 @@ Dma::Completion Dma::readCompleteScatterGather() { // Free all RX BDs for future transmission. int bds = XAxiDma_BdRingFromHw(rxRing, XAXIDMA_ALL_BDS, &bd); XAxiDma_BdRingFree(rxRing, bds, bd); + hwLock.unlock(); c.interrupts = 0; return c; } else { + hwLock.unlock(); c.interrupts = intrs; } - hwLock.lock(); - auto irqStatus = XAxiDma_BdRingGetIrq(rxRing); + if (!polling) { + irqStatus = XAxiDma_BdRingGetIrq(rxRing); + } XAxiDma_BdRingAckIrq(rxRing, irqStatus); if (!(irqStatus & XAXIDMA_IRQ_IOC_MASK)) { logger->error("Expected IOC interrupt but IRQ status is: {:#x}", irqStatus); diff --git a/lib/nodes/fpga.cpp b/lib/nodes/fpga.cpp index dabdc3314..e2c5bfa2e 100644 --- a/lib/nodes/fpga.cpp +++ b/lib/nodes/fpga.cpp @@ -169,12 +169,12 @@ int FpgaNode::check() { return 0; } int FpgaNode::start() { // enque first read - dma->read(*(blockRx[0]), blockRx[0]->getSize()); + // dma->read(*(blockRx[0]), blockRx[0]->getSize()); return Node::start(); } int FpgaNode::_read(Sample *smps[], unsigned cnt) { - static size_t cur = 0, next = 1; + static size_t cur = 0, next = 0; unsigned read; Sample *smp = smps[0]; @@ -199,8 +199,8 @@ int FpgaNode::_read(Sample *smps[], unsigned cnt) { smp->flags = (int)SampleFlags::HAS_DATA; smp->signals = in.signals; - cur = next; - next = (next + 1) % (sizeof(blockRx) / sizeof(blockRx[0])); + //cur = next; + //next = (next + 1) % (sizeof(blockRx) / sizeof(blockRx[0])); return 1; }