mirror of
https://git.rwth-aachen.de/acs/public/villas/node/
synced 2025-03-09 00:00:00 +01:00
939 lines
27 KiB
C++
939 lines
27 KiB
C++
/* DMA driver
|
|
*
|
|
* Author: Niklas Eiling <niklas.eiling@eonerc.rwth-aachen.de>
|
|
* Author: Daniel Krebs <github@daniel-krebs.net>
|
|
* SPDX-FileCopyrightText: 2018-2024 Institute for Automation of Complex Power Systems, RWTH Aachen University
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <sys/types.h>
|
|
|
|
#include <villas/fpga/card.hpp>
|
|
#include <villas/fpga/ips/dma.hpp>
|
|
#include <villas/fpga/ips/intc.hpp>
|
|
#include <villas/memory.hpp>
|
|
|
|
#include <xilinx/xaxidma.h>
|
|
#include <xilinx/xaxidma_bd.h>
|
|
#include <xilinx/xaxidma_hw.h>
|
|
|
|
// Max. size of a DMA transfer in simple mode
|
|
#define FPGA_DMA_BOUNDARY 0x1000
|
|
|
|
using namespace villas::fpga::ip;
|
|
|
|
bool Dma::init() {
|
|
// Check if configJson has been called
|
|
if (!configDone)
|
|
throw RuntimeError("DMA device not configured");
|
|
|
|
if (hasScatterGather())
|
|
logger->info("Scatter-Gather support: {}", hasScatterGather());
|
|
|
|
xConfig.BaseAddr = getBaseAddr(registerMemory);
|
|
|
|
hwLock.lock();
|
|
if (XAxiDma_CfgInitialize(&xDma, &xConfig) != XST_SUCCESS) {
|
|
logger->error("Cannot initialize Xilinx DMA driver");
|
|
return false;
|
|
}
|
|
|
|
if (XAxiDma_Selftest(&xDma) != XST_SUCCESS) {
|
|
logger->error("DMA selftest failed");
|
|
return false;
|
|
} else {
|
|
logger->debug("DMA selftest passed");
|
|
}
|
|
|
|
hwLock.unlock();
|
|
// Map buffer descriptors
|
|
if (hasScatterGather()) {
|
|
if (actualRingBdSize < readCoalesce || actualRingBdSize < writeCoalesce) {
|
|
throw RuntimeError(
|
|
"Ring buffer size is too small for coalesce value {} < {}",
|
|
actualRingBdSize, std::max(readCoalesce, writeCoalesce));
|
|
}
|
|
setupScatterGather();
|
|
}
|
|
|
|
if (!polling) {
|
|
irqs[mm2sInterrupt].irqController->enableInterrupt(irqs[mm2sInterrupt],
|
|
polling);
|
|
irqs[s2mmInterrupt].irqController->enableInterrupt(irqs[s2mmInterrupt],
|
|
polling);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void Dma::setupScatterGather() {
|
|
// Allocate and map space for BD ring in host RAM
|
|
auto &alloc = villas::HostRam::getAllocator();
|
|
sgRing = alloc.allocateBlock(2 * requestedRingBdSizeMemory);
|
|
|
|
if (not card->mapMemoryBlock(sgRing))
|
|
throw RuntimeError("Memory not accessible by DMA");
|
|
|
|
auto &mm = MemoryManager::get();
|
|
auto trans = mm.getTranslation(busMasterInterfaces[sgInterface],
|
|
sgRing->getAddrSpaceId());
|
|
|
|
auto physAddr = reinterpret_cast<uintptr_t>(trans.getLocalAddr(0));
|
|
auto virtAddr = reinterpret_cast<uintptr_t>(
|
|
mm.getTranslationFromProcess(sgRing->getAddrSpaceId()).getLocalAddr(0));
|
|
setupScatterGatherRingRx(physAddr, virtAddr);
|
|
|
|
setupScatterGatherRingTx(physAddr + requestedRingBdSizeMemory,
|
|
virtAddr + requestedRingBdSizeMemory);
|
|
}
|
|
|
|
void Dma::setupScatterGatherRingRx(uintptr_t physAddr, uintptr_t virtAddr) {
|
|
int ret;
|
|
|
|
hwLock.lock();
|
|
auto *rxRingPtr = XAxiDma_GetRxRing(&xDma);
|
|
|
|
// Disable all RX interrupts before RxBD space setup
|
|
XAxiDma_BdRingIntDisable(rxRingPtr, XAXIDMA_IRQ_ALL_MASK);
|
|
|
|
// Set delay and coalescing
|
|
XAxiDma_BdRingSetCoalesce(rxRingPtr, readCoalesce, delay);
|
|
|
|
// Setup Rx BD space
|
|
ret = XAxiDma_BdRingCreate(rxRingPtr, physAddr, virtAddr,
|
|
XAXIDMA_BD_MINIMUM_ALIGNMENT, actualRingBdSize);
|
|
if (ret != XST_SUCCESS)
|
|
throw RuntimeError("Failed to create RX ring: {}", ret);
|
|
|
|
// Setup an all-zero BD as the template for the Rx channel.
|
|
XAxiDma_Bd bdTemplate;
|
|
XAxiDma_BdClear(&bdTemplate);
|
|
|
|
ret = XAxiDma_BdRingClone(rxRingPtr, &bdTemplate);
|
|
if (ret != XST_SUCCESS)
|
|
throw RuntimeError("Failed to clone BD template: {}", ret);
|
|
|
|
if (cyclic) {
|
|
// Enable Cyclic DMA mode
|
|
XAxiDma_BdRingEnableCyclicDMA(rxRingPtr);
|
|
XAxiDma_SelectCyclicMode(&xDma, XAXIDMA_DEVICE_TO_DMA, 1);
|
|
}
|
|
// Enable completion interrupt
|
|
if (!polling) {
|
|
XAxiDma_IntrEnable(&xDma, XAXIDMA_IRQ_IOC_MASK, XAXIDMA_DEVICE_TO_DMA);
|
|
}
|
|
// Start the RX channel
|
|
ret = XAxiDma_BdRingStart(rxRingPtr);
|
|
if (ret != XST_SUCCESS)
|
|
throw RuntimeError("Failed to start TX ring: {}", ret);
|
|
|
|
hwLock.unlock();
|
|
}
|
|
|
|
void Dma::setupScatterGatherRingTx(uintptr_t physAddr, uintptr_t virtAddr) {
|
|
int ret;
|
|
|
|
hwLock.lock();
|
|
auto *txRingPtr = XAxiDma_GetTxRing(&xDma);
|
|
|
|
// Disable all TX interrupts before TxBD space setup
|
|
XAxiDma_BdRingIntDisable(txRingPtr, XAXIDMA_IRQ_ALL_MASK);
|
|
|
|
// Set TX delay and coalesce
|
|
XAxiDma_BdRingSetCoalesce(txRingPtr, writeCoalesce, delay);
|
|
|
|
// Setup TxBD space
|
|
ret = XAxiDma_BdRingCreate(txRingPtr, physAddr, virtAddr,
|
|
XAXIDMA_BD_MINIMUM_ALIGNMENT, actualRingBdSize);
|
|
if (ret != XST_SUCCESS)
|
|
throw RuntimeError("Failed to create TX BD ring: {}", ret);
|
|
|
|
// We create an all-zero BD as the template.
|
|
XAxiDma_Bd BdTemplate;
|
|
XAxiDma_BdClear(&BdTemplate);
|
|
|
|
ret = XAxiDma_BdRingClone(txRingPtr, &BdTemplate);
|
|
if (ret != XST_SUCCESS)
|
|
throw RuntimeError("Failed to clone TX ring BD: {}", ret);
|
|
|
|
// Enable completion interrupt
|
|
if (!polling) {
|
|
XAxiDma_IntrEnable(&xDma, XAXIDMA_IRQ_IOC_MASK, XAXIDMA_DMA_TO_DEVICE);
|
|
}
|
|
// Start the TX channel
|
|
ret = XAxiDma_BdRingStart(txRingPtr);
|
|
if (ret != XST_SUCCESS)
|
|
throw RuntimeError("Failed to start TX ring: {}", ret);
|
|
hwLock.unlock();
|
|
}
|
|
|
|
bool Dma::reset() {
|
|
if (xDma.RegBase == 0) {
|
|
return true;
|
|
}
|
|
|
|
hwLock.lock();
|
|
XAxiDma_IntrDisable(&xDma, XAXIDMA_IRQ_ALL_MASK, XAXIDMA_DMA_TO_DEVICE);
|
|
XAxiDma_IntrDisable(&xDma, XAXIDMA_IRQ_ALL_MASK, XAXIDMA_DEVICE_TO_DMA);
|
|
|
|
XAxiDma_Reset(&xDma);
|
|
|
|
// Value taken from libxil implementation
|
|
int timeout = 500;
|
|
|
|
while (timeout > 0) {
|
|
if (XAxiDma_ResetIsDone(&xDma)) {
|
|
logger->info("DMA has been reset.");
|
|
return true;
|
|
}
|
|
|
|
timeout--;
|
|
}
|
|
hwLock.unlock();
|
|
logger->error("DMA reset timed out");
|
|
|
|
return false;
|
|
}
|
|
|
|
Dma::~Dma() {
|
|
// Unmap memory in our ownership, MemoryBlock gets deleted and removed from
|
|
// graph by this destructor as well.
|
|
if (hasScatterGather()) {
|
|
// Fix memory leak in upstream Xilinx code
|
|
auto txRingPtr = XAxiDma_GetTxRing(&xDma);
|
|
auto rxRingPtr = XAxiDma_GetRxRing(&xDma);
|
|
if (txRingPtr) {
|
|
free(txRingPtr->CyclicBd);
|
|
txRingPtr->CyclicBd = nullptr;
|
|
}
|
|
if (rxRingPtr) {
|
|
free(rxRingPtr->CyclicBd);
|
|
rxRingPtr->CyclicBd = nullptr;
|
|
}
|
|
// Unmap SG memory Blocks
|
|
if (sgRing) {
|
|
card->unmapMemoryBlock(*sgRing);
|
|
}
|
|
}
|
|
Dma::reset();
|
|
}
|
|
|
|
bool Dma::memcpy(const MemoryBlock &src, const MemoryBlock &dst, size_t len) {
|
|
if (len == 0)
|
|
return true;
|
|
|
|
if (not connectLoopback())
|
|
return false;
|
|
|
|
if (this->read(dst, len) == 0)
|
|
return false;
|
|
|
|
if (this->write(src, len) == 0)
|
|
return false;
|
|
|
|
if (not this->writeComplete().bds)
|
|
return false;
|
|
|
|
if (not this->readComplete().bds)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Dma::write(const MemoryBlock &mem, size_t len) {
|
|
if (len == 0)
|
|
return true;
|
|
|
|
if (len > FPGA_DMA_BOUNDARY)
|
|
return false;
|
|
|
|
auto &mm = MemoryManager::get();
|
|
|
|
// User has to make sure that memory is accessible, otherwise this will throw
|
|
auto trans = mm.getTranslation(busMasterInterfaces[mm2sInterface],
|
|
mem.getAddrSpaceId());
|
|
const void *buf = reinterpret_cast<void *>(trans.getLocalAddr(0));
|
|
if (buf == nullptr)
|
|
throw RuntimeError("Buffer was null");
|
|
|
|
logger->trace("Write to stream from address {:p}", buf);
|
|
|
|
return hasScatterGather() ? writeScatterGather(buf, len)
|
|
: writeSimple(buf, len);
|
|
}
|
|
|
|
bool Dma::read(const MemoryBlock &mem, size_t len) {
|
|
if (len == 0)
|
|
return true;
|
|
|
|
if (len > FPGA_DMA_BOUNDARY)
|
|
return false;
|
|
|
|
auto &mm = MemoryManager::get();
|
|
|
|
// User has to make sure that memory is accessible, otherwise this will throw
|
|
auto trans = mm.getTranslation(busMasterInterfaces[s2mmInterface],
|
|
mem.getAddrSpaceId());
|
|
void *buf = reinterpret_cast<void *>(trans.getLocalAddr(0));
|
|
if (buf == nullptr)
|
|
throw RuntimeError("Buffer was null");
|
|
|
|
return hasScatterGather() ? readScatterGather(buf, len)
|
|
: readSimple(buf, len);
|
|
}
|
|
|
|
// Reuse existing single BD bypassing BdRingFree, Alloc, ToHw
|
|
bool Dma::writeScatterGatherFast() {
|
|
hwLock.lock();
|
|
auto *txRing = XAxiDma_GetTxRing(&xDma);
|
|
if (txRing == nullptr) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("RxRing was null.");
|
|
}
|
|
XAxiDma_Bd *CurBdPtr = txRing->HwHead;
|
|
|
|
// Clear the bit we are polling on in complete
|
|
uint32_t BdSts = XAxiDma_ReadReg((UINTPTR)CurBdPtr, XAXIDMA_BD_STS_OFFSET);
|
|
BdSts &= ~XAXIDMA_BD_STS_COMPLETE_MASK;
|
|
XAxiDma_BdWrite(CurBdPtr, XAXIDMA_BD_STS_OFFSET, BdSts);
|
|
|
|
uintptr_t tdesc = ((uintptr_t)txRing->HwTail +
|
|
(txRing->FirstBdPhysAddr - txRing->FirstBdAddr)) &
|
|
XAXIDMA_DESC_LSB_MASK;
|
|
XAxiDma_WriteReg(txRing->ChanBase, XAXIDMA_TDESC_OFFSET, tdesc);
|
|
|
|
hwLock.unlock();
|
|
return true;
|
|
}
|
|
|
|
bool Dma::writeScatterGatherPrepare(const MemoryBlock &mem, size_t len) {
|
|
|
|
auto &mm = MemoryManager::get();
|
|
|
|
// User has to make sure that memory is accessible, otherwise this will throw
|
|
auto trans = mm.getTranslation(busMasterInterfaces[mm2sInterface],
|
|
mem.getAddrSpaceId());
|
|
void *buf = reinterpret_cast<void *>(trans.getLocalAddr(0));
|
|
if (buf == nullptr) {
|
|
throw RuntimeError("Buffer was null");
|
|
}
|
|
hwLock.lock();
|
|
|
|
auto bd = writeScatterGatherSetupBd(buf, len);
|
|
|
|
hwLock.unlock();
|
|
|
|
return bd != nullptr;
|
|
}
|
|
|
|
XAxiDma_Bd *Dma::writeScatterGatherSetupBd(const void *buf, size_t len) {
|
|
uint32_t ret = XST_FAILURE;
|
|
if (len == 0)
|
|
return nullptr;
|
|
|
|
if (len > FPGA_DMA_BOUNDARY)
|
|
return nullptr;
|
|
|
|
auto *txRing = XAxiDma_GetTxRing(&xDma);
|
|
if (txRing == nullptr) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("TxRing was null.");
|
|
}
|
|
|
|
XAxiDma_Bd *bd;
|
|
ret = XAxiDma_BdRingAlloc(txRing, 1, &bd);
|
|
if (ret != XST_SUCCESS) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("Write: BdRingAlloc returned {}.", ret);
|
|
}
|
|
|
|
ret = XAxiDma_BdSetBufAddr(bd, (uintptr_t)buf);
|
|
if (ret != XST_SUCCESS) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("Setting BdBufAddr to {} returned {}.", buf, ret);
|
|
}
|
|
|
|
ret = XAxiDma_BdSetLength(bd, len, txRing->MaxTransferLen);
|
|
if (ret != XST_SUCCESS) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("Setting BdBufLength to {} returned {}.", len, ret);
|
|
}
|
|
|
|
// We have a single descriptor so it is both start and end of the list
|
|
XAxiDma_BdSetCtrl(bd,
|
|
XAXIDMA_BD_CTRL_TXEOF_MASK | XAXIDMA_BD_CTRL_TXSOF_MASK);
|
|
|
|
// TODO: Check if we really need this
|
|
XAxiDma_BdSetId(bd, (uintptr_t)buf);
|
|
return bd;
|
|
}
|
|
|
|
// Write a single message
|
|
bool Dma::writeScatterGather(const void *buf, size_t len) {
|
|
// buf is address from view of DMA controller
|
|
|
|
int ret = XST_FAILURE;
|
|
hwLock.lock();
|
|
auto *txRing = XAxiDma_GetTxRing(&xDma);
|
|
if (txRing == nullptr) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("TxRing was null.");
|
|
}
|
|
|
|
XAxiDma_Bd *bd = writeScatterGatherSetupBd(buf, len);
|
|
// Give control of BD to HW. We should not access it until transfer is finished.
|
|
// Failure could also indicate that EOF is not set on last Bd
|
|
ret = XAxiDma_BdRingToHw(txRing, 1, bd);
|
|
if (ret != XST_SUCCESS) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("Enqueuing Bd and giving control to HW failed {}", ret);
|
|
}
|
|
|
|
hwLock.unlock();
|
|
|
|
return true;
|
|
}
|
|
|
|
// Reuse existing single BD bypassing BdRingFree, Alloc, ToHw
|
|
bool Dma::readScatterGatherFast() {
|
|
hwLock.lock();
|
|
auto *rxRing = XAxiDma_GetRxRing(&xDma);
|
|
if (rxRing == nullptr) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("RxRing was null.");
|
|
}
|
|
XAxiDma_Bd *CurBdPtr = rxRing->HwHead;
|
|
// Poll BD status to avoid accessing PCIe address space
|
|
uint32_t BdSts = XAxiDma_ReadReg((UINTPTR)CurBdPtr, XAXIDMA_BD_STS_OFFSET);
|
|
|
|
// Clear the bit we are polling on in complete
|
|
BdSts &= ~XAXIDMA_BD_STS_COMPLETE_MASK;
|
|
XAxiDma_BdWrite(CurBdPtr, XAXIDMA_BD_STS_OFFSET, BdSts);
|
|
|
|
uintptr_t tdesc = ((uintptr_t)rxRing->HwTail +
|
|
(rxRing->FirstBdPhysAddr - rxRing->FirstBdAddr)) &
|
|
XAXIDMA_DESC_LSB_MASK;
|
|
XAxiDma_WriteReg(rxRing->ChanBase, XAXIDMA_TDESC_OFFSET, tdesc);
|
|
|
|
hwLock.unlock();
|
|
return true;
|
|
}
|
|
|
|
bool Dma::readScatterGatherPrepare(const MemoryBlock &mem, size_t len) {
|
|
|
|
auto &mm = MemoryManager::get();
|
|
|
|
// User has to make sure that memory is accessible, otherwise this will throw
|
|
auto trans = mm.getTranslation(busMasterInterfaces[s2mmInterface],
|
|
mem.getAddrSpaceId());
|
|
void *buf = reinterpret_cast<void *>(trans.getLocalAddr(0));
|
|
if (buf == nullptr) {
|
|
throw RuntimeError("Buffer was null");
|
|
}
|
|
hwLock.lock();
|
|
|
|
auto bd = readScatterGatherSetupBd(buf, len);
|
|
|
|
hwLock.unlock();
|
|
|
|
return bd != nullptr;
|
|
}
|
|
|
|
XAxiDma_Bd *Dma::readScatterGatherSetupBd(void *buf, size_t len) {
|
|
uint32_t ret = XST_FAILURE;
|
|
if (len == 0)
|
|
return nullptr;
|
|
|
|
if (len > FPGA_DMA_BOUNDARY)
|
|
return nullptr;
|
|
|
|
auto *rxRing = XAxiDma_GetRxRing(&xDma);
|
|
if (rxRing == nullptr) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("RxRing was null.");
|
|
}
|
|
|
|
XAxiDma_Bd *bd;
|
|
ret = XAxiDma_BdRingAlloc(rxRing, readCoalesce, &bd);
|
|
if (ret != XST_SUCCESS) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("Failed to alloc BD in RX ring: {}", ret);
|
|
}
|
|
|
|
auto curBd = bd;
|
|
char *curBuf = (char *)buf;
|
|
for (size_t i = 0; i < readCoalesce; i++) {
|
|
ret = XAxiDma_BdSetBufAddr(curBd, (uintptr_t)curBuf);
|
|
if (ret != XST_SUCCESS) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("Failed to set buffer address {:x} on BD {:x}: {}",
|
|
(uintptr_t)buf, (uintptr_t)bd, ret);
|
|
}
|
|
|
|
ret = XAxiDma_BdSetLength(curBd, readMsgSize, rxRing->MaxTransferLen);
|
|
if (ret != XST_SUCCESS) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("Rx set length {} on BD {:x} failed {}", len,
|
|
(uintptr_t)bd, ret);
|
|
}
|
|
|
|
// Receive BDs do not need to set anything for the control
|
|
// The hardware will set the SOF/EOF bits per stream status
|
|
XAxiDma_BdSetCtrl(curBd, 0);
|
|
|
|
// TODO: Check if we really need this
|
|
XAxiDma_BdSetId(curBd, (uintptr_t)buf);
|
|
|
|
curBuf += readMsgSize;
|
|
curBd = (XAxiDma_Bd *)XAxiDma_BdRingNext(rxRing, curBd);
|
|
}
|
|
return bd;
|
|
}
|
|
|
|
bool Dma::readScatterGather(void *buf, size_t len) {
|
|
uint32_t ret = XST_FAILURE;
|
|
|
|
if (len < readCoalesce * readMsgSize) {
|
|
throw RuntimeError(
|
|
"Read size is smaller than readCoalesce*msgSize. Cannot setup BDs.");
|
|
}
|
|
|
|
hwLock.lock();
|
|
auto *rxRing = XAxiDma_GetRxRing(&xDma);
|
|
if (rxRing == nullptr) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("RxRing was null.");
|
|
}
|
|
|
|
XAxiDma_Bd *bd = readScatterGatherSetupBd(buf, len);
|
|
|
|
ret = XAxiDma_BdRingToHw(rxRing, readCoalesce, bd);
|
|
if (ret != XST_SUCCESS) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("Failed to submit BD to RX ring: {}", ret);
|
|
}
|
|
|
|
hwLock.unlock();
|
|
return true;
|
|
}
|
|
|
|
size_t Dma::writeScatterGatherPoll(bool lock) {
|
|
if (lock) {
|
|
hwLock.lock();
|
|
}
|
|
auto txRing = XAxiDma_GetTxRing(&xDma);
|
|
XAxiDma_Bd *CurBdPtr = txRing->HwHead;
|
|
volatile uint32_t BdSts;
|
|
// Poll BD status to avoid accessing PCIe address space
|
|
do {
|
|
BdSts = XAxiDma_ReadReg((UINTPTR)CurBdPtr, XAXIDMA_BD_STS_OFFSET);
|
|
} while (!(BdSts & XAXIDMA_BD_STS_COMPLETE_MASK));
|
|
// At this point, we know that the transmission is complete, but we haven't accessed the
|
|
// PCIe address space, yet. We know that we have received at least one BD.
|
|
if (lock) {
|
|
hwLock.unlock();
|
|
}
|
|
return XAxiDma_BdGetActualLength(CurBdPtr, XAXIDMA_MCHAN_MAX_TRANSFER_LEN);
|
|
}
|
|
|
|
Dma::Completion Dma::writeCompleteScatterGather() {
|
|
Completion c;
|
|
XAxiDma_Bd *bd = nullptr, *curBd;
|
|
auto txRing = XAxiDma_GetTxRing(&xDma);
|
|
uint32_t ret = XST_FAILURE;
|
|
static size_t errcnt = 32;
|
|
|
|
uint32_t irqStatus = 0;
|
|
if (polling) {
|
|
hwLock.lock();
|
|
writeScatterGatherPoll(false);
|
|
} else {
|
|
c.interrupts = irqs[mm2sInterrupt].irqController->waitForInterrupt(
|
|
irqs[mm2sInterrupt].num);
|
|
hwLock.lock();
|
|
}
|
|
|
|
if ((c.bds = XAxiDma_BdRingFromHw(txRing, writeCoalesce, &bd)) <
|
|
writeCoalesce) {
|
|
logger->warn("Send partial batch of {}/{} BDs.", c.bds, writeCoalesce);
|
|
if (errcnt-- == 0) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("too many partial batches");
|
|
}
|
|
}
|
|
|
|
// Acknowledge the interrupt
|
|
if (!polling) {
|
|
irqStatus = XAxiDma_BdRingGetIrq(txRing);
|
|
XAxiDma_BdRingAckIrq(txRing, irqStatus);
|
|
}
|
|
|
|
if (c.bds == 0) {
|
|
c.bytes = 0;
|
|
hwLock.unlock();
|
|
return c;
|
|
}
|
|
|
|
if (bd == nullptr) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("Bd was null.");
|
|
}
|
|
|
|
curBd = bd;
|
|
for (size_t i = 0; i < c.bds; i++) {
|
|
ret = XAxiDma_BdGetSts(curBd);
|
|
if ((ret & XAXIDMA_BD_STS_ALL_ERR_MASK) ||
|
|
(!(ret & XAXIDMA_BD_STS_COMPLETE_MASK))) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("Write: Bd Status register shows error: {:#x}", ret);
|
|
}
|
|
|
|
c.bytes += XAxiDma_BdGetLength(bd, txRing->MaxTransferLen);
|
|
curBd = (XAxiDma_Bd *)XAxiDma_BdRingNext(txRing, curBd);
|
|
}
|
|
|
|
ret = XAxiDma_BdRingFree(txRing, c.bds, bd);
|
|
if (ret != XST_SUCCESS) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("Failed to free {} TX BDs {}", c.bds, ret);
|
|
}
|
|
|
|
hwLock.unlock();
|
|
return c;
|
|
}
|
|
|
|
size_t Dma::readScatterGatherPoll(bool lock) {
|
|
if (lock) {
|
|
hwLock.lock();
|
|
}
|
|
auto rxRing = XAxiDma_GetRxRing(&xDma);
|
|
XAxiDma_Bd *CurBdPtr = rxRing->HwHead;
|
|
volatile uint32_t BdSts;
|
|
// Poll BD status to avoid accessing PCIe address space
|
|
do {
|
|
BdSts = XAxiDma_ReadReg((UINTPTR)CurBdPtr, XAXIDMA_BD_STS_OFFSET);
|
|
} while (!(BdSts & XAXIDMA_BD_STS_COMPLETE_MASK));
|
|
// At this point, we know that the transmission is complete, but we haven't accessed the
|
|
// PCIe address space, yet. We know that we have received at least one BD.
|
|
if (lock) {
|
|
hwLock.unlock();
|
|
}
|
|
return XAxiDma_BdGetActualLength(CurBdPtr, XAXIDMA_MCHAN_MAX_TRANSFER_LEN);
|
|
}
|
|
|
|
Dma::Completion Dma::readCompleteScatterGather() {
|
|
Completion c;
|
|
XAxiDma_Bd *bd = nullptr, *curBd;
|
|
auto rxRing = XAxiDma_GetRxRing(&xDma);
|
|
int ret = XST_FAILURE;
|
|
static size_t errcnt = 32;
|
|
|
|
ssize_t intrs = 0;
|
|
uint32_t irqStatus = 0;
|
|
if (polling) {
|
|
hwLock.lock();
|
|
readScatterGatherPoll(false);
|
|
intrs = 1;
|
|
} else {
|
|
intrs = irqs[s2mmInterrupt].irqController->waitForInterrupt(
|
|
irqs[s2mmInterrupt].num);
|
|
hwLock.lock();
|
|
}
|
|
|
|
if (intrs < 0) {
|
|
logger->warn("Interrupt error or timeout: {}", intrs);
|
|
|
|
// Free all RX BDs for future transmission.
|
|
int bds = XAxiDma_BdRingFromHw(rxRing, XAXIDMA_ALL_BDS, &bd);
|
|
XAxiDma_BdRingFree(rxRing, bds, bd);
|
|
hwLock.unlock();
|
|
|
|
c.interrupts = 0;
|
|
return c;
|
|
} else {
|
|
c.interrupts = intrs;
|
|
}
|
|
if (!polling) {
|
|
irqStatus = XAxiDma_BdRingGetIrq(rxRing);
|
|
XAxiDma_BdRingAckIrq(rxRing, irqStatus);
|
|
if (!(irqStatus & XAXIDMA_IRQ_IOC_MASK)) {
|
|
logger->error("Expected IOC interrupt but IRQ status is: {:#x}",
|
|
irqStatus);
|
|
return c;
|
|
}
|
|
}
|
|
// Wait until the data has been received by the RX channel.
|
|
if ((c.bds = XAxiDma_BdRingFromHw(rxRing, readCoalesce, &bd)) <
|
|
readCoalesce) {
|
|
logger->warn("Got partial batch of {}/{} BDs.", c.bds, readCoalesce);
|
|
if (errcnt-- == 0) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("too many partial batches");
|
|
}
|
|
} else {
|
|
errcnt = 32;
|
|
}
|
|
|
|
if (c.bds == 0) {
|
|
c.bytes = 0;
|
|
hwLock.unlock();
|
|
return c;
|
|
}
|
|
|
|
if (bd == nullptr) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("Bd was null.");
|
|
}
|
|
|
|
curBd = bd;
|
|
for (size_t i = 0; i < c.bds; i++) {
|
|
// logger->trace("Read BD {}/{}: {:#x}", i, c.bds,
|
|
// XAxiDma_BdGetBufAddr(curBd));
|
|
ret = XAxiDma_BdGetSts(curBd);
|
|
if ((ret & XAXIDMA_BD_STS_ALL_ERR_MASK) ||
|
|
(!(ret & XAXIDMA_BD_STS_COMPLETE_MASK))) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("Read: Bd Status register shows error: {}", ret);
|
|
}
|
|
|
|
c.bytes += XAxiDma_BdGetActualLength(bd, rxRing->MaxTransferLen);
|
|
curBd = (XAxiDma_Bd *)XAxiDma_BdRingNext(rxRing, curBd);
|
|
}
|
|
|
|
// Free all processed RX BDs for future transmission.
|
|
ret = XAxiDma_BdRingFree(rxRing, c.bds, bd);
|
|
if (ret != XST_SUCCESS) {
|
|
hwLock.unlock();
|
|
throw RuntimeError("Failed to free {} TX BDs {}.", c.bds, ret);
|
|
}
|
|
|
|
hwLock.unlock();
|
|
return c;
|
|
}
|
|
|
|
bool Dma::writeSimple(const void *buf, size_t len) {
|
|
hwLock.lock();
|
|
XAxiDma_BdRing *ring = XAxiDma_GetTxRing(&xDma);
|
|
|
|
if (not ring->HasDRE) {
|
|
const uint32_t mask = xDma.MicroDmaMode ? XAXIDMA_MICROMODE_MIN_BUF_ALIGN
|
|
: ring->DataWidth - 1;
|
|
|
|
if (reinterpret_cast<uintptr_t>(buf) & mask) {
|
|
hwLock.unlock();
|
|
return false;
|
|
}
|
|
}
|
|
|
|
const bool dmaChannelHalted =
|
|
XAxiDma_ReadReg(ring->ChanBase, XAXIDMA_SR_OFFSET) & XAXIDMA_HALTED_MASK;
|
|
|
|
const bool dmaToDeviceBusy = XAxiDma_Busy(&xDma, XAXIDMA_DMA_TO_DEVICE);
|
|
|
|
// If the engine is doing a transfer, cannot submit
|
|
if (not dmaChannelHalted and dmaToDeviceBusy) {
|
|
hwLock.unlock();
|
|
return false;
|
|
}
|
|
|
|
// Set lower 32 bit of source address
|
|
XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_SRCADDR_OFFSET,
|
|
LOWER_32_BITS(reinterpret_cast<uintptr_t>(buf)));
|
|
|
|
// If neccessary, set upper 32 bit of source address
|
|
if (xDma.AddrWidth > 32)
|
|
XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_SRCADDR_MSB_OFFSET,
|
|
UPPER_32_BITS(reinterpret_cast<uintptr_t>(buf)));
|
|
|
|
// Start DMA channel
|
|
auto channelControl = XAxiDma_ReadReg(ring->ChanBase, XAXIDMA_CR_OFFSET);
|
|
channelControl |= XAXIDMA_CR_RUNSTOP_MASK;
|
|
XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_CR_OFFSET, channelControl);
|
|
|
|
// Set tail descriptor pointer
|
|
XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_BUFFLEN_OFFSET, len);
|
|
hwLock.unlock();
|
|
return true;
|
|
}
|
|
|
|
bool Dma::readSimple(void *buf, size_t len) {
|
|
hwLock.lock();
|
|
XAxiDma_BdRing *ring = XAxiDma_GetRxRing(&xDma);
|
|
|
|
if (not ring->HasDRE) {
|
|
const uint32_t mask = xDma.MicroDmaMode ? XAXIDMA_MICROMODE_MIN_BUF_ALIGN
|
|
: ring->DataWidth - 1;
|
|
|
|
if (reinterpret_cast<uintptr_t>(buf) & mask) {
|
|
hwLock.unlock();
|
|
return false;
|
|
}
|
|
}
|
|
|
|
const bool dmaChannelHalted =
|
|
XAxiDma_ReadReg(ring->ChanBase, XAXIDMA_SR_OFFSET) & XAXIDMA_HALTED_MASK;
|
|
const bool deviceToDmaBusy = XAxiDma_Busy(&xDma, XAXIDMA_DEVICE_TO_DMA);
|
|
|
|
// If the engine is doing a transfer, cannot submit
|
|
if (not dmaChannelHalted and deviceToDmaBusy) {
|
|
hwLock.unlock();
|
|
return false;
|
|
}
|
|
|
|
// Set lower 32 bit of destination address
|
|
XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_DESTADDR_OFFSET,
|
|
LOWER_32_BITS(reinterpret_cast<uintptr_t>(buf)));
|
|
|
|
// If neccessary, set upper 32 bit of destination address
|
|
if (xDma.AddrWidth > 32)
|
|
XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_DESTADDR_MSB_OFFSET,
|
|
UPPER_32_BITS(reinterpret_cast<uintptr_t>(buf)));
|
|
|
|
// Start DMA channel
|
|
auto channelControl = XAxiDma_ReadReg(ring->ChanBase, XAXIDMA_CR_OFFSET);
|
|
channelControl |= XAXIDMA_CR_RUNSTOP_MASK;
|
|
XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_CR_OFFSET, channelControl);
|
|
|
|
// Set tail descriptor pointer
|
|
XAxiDma_WriteReg(ring->ChanBase, XAXIDMA_BUFFLEN_OFFSET, len);
|
|
|
|
hwLock.unlock();
|
|
return true;
|
|
}
|
|
|
|
Dma::Completion Dma::writeCompleteSimple() {
|
|
Completion c;
|
|
while (!(XAxiDma_IntrGetIrq(&xDma, XAXIDMA_DMA_TO_DEVICE) &
|
|
XAXIDMA_IRQ_IOC_MASK))
|
|
c.interrupts = irqs[mm2sInterrupt].irqController->waitForInterrupt(
|
|
irqs[mm2sInterrupt]);
|
|
|
|
hwLock.lock();
|
|
XAxiDma_IntrAckIrq(&xDma, XAXIDMA_IRQ_IOC_MASK, XAXIDMA_DMA_TO_DEVICE);
|
|
|
|
const XAxiDma_BdRing *ring = XAxiDma_GetTxRing(&xDma);
|
|
const size_t bytesWritten =
|
|
XAxiDma_ReadReg(ring->ChanBase, XAXIDMA_BUFFLEN_OFFSET);
|
|
hwLock.unlock();
|
|
c.bytes = bytesWritten;
|
|
return c;
|
|
}
|
|
|
|
Dma::Completion Dma::readCompleteSimple() {
|
|
Completion c;
|
|
while (!(XAxiDma_IntrGetIrq(&xDma, XAXIDMA_DEVICE_TO_DMA) &
|
|
XAXIDMA_IRQ_IOC_MASK))
|
|
c.interrupts = irqs[s2mmInterrupt].irqController->waitForInterrupt(
|
|
irqs[s2mmInterrupt]);
|
|
|
|
hwLock.lock();
|
|
XAxiDma_IntrAckIrq(&xDma, XAXIDMA_IRQ_IOC_MASK, XAXIDMA_DEVICE_TO_DMA);
|
|
|
|
const XAxiDma_BdRing *ring = XAxiDma_GetRxRing(&xDma);
|
|
const size_t bytesRead =
|
|
XAxiDma_ReadReg(ring->ChanBase, XAXIDMA_BUFFLEN_OFFSET);
|
|
hwLock.unlock();
|
|
|
|
c.bytes = bytesRead;
|
|
return c;
|
|
}
|
|
|
|
void Dma::makeAccesibleFromVA(std::shared_ptr<MemoryBlock> mem) {
|
|
// Only symmetric mapping supported currently
|
|
if (isMemoryBlockAccesible(*mem, s2mmInterface) and
|
|
isMemoryBlockAccesible(*mem, mm2sInterface))
|
|
return;
|
|
|
|
// Try mapping via FPGA-card (VFIO)
|
|
if (not card->mapMemoryBlock(mem))
|
|
throw RuntimeError("Memory not accessible by DMA");
|
|
|
|
// Sanity-check if mapping worked, this shouldn't be neccessary
|
|
if (not isMemoryBlockAccesible(*mem, s2mmInterface) or
|
|
not isMemoryBlockAccesible(*mem, mm2sInterface))
|
|
throw RuntimeError(
|
|
"Mapping memory via card didn't work, but reported success?!");
|
|
}
|
|
|
|
bool Dma::isMemoryBlockAccesible(const MemoryBlock &mem,
|
|
const std::string &interface) {
|
|
auto &mm = MemoryManager::get();
|
|
|
|
try {
|
|
mm.findPath(getMasterAddrSpaceByInterface(interface), mem.getAddrSpaceId());
|
|
} catch (const std::out_of_range &) {
|
|
return false; // Not (yet) accessible
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void Dma::dump() {
|
|
Core::dump();
|
|
|
|
logger->info(
|
|
"S2MM_DMACR: {:x}",
|
|
XAxiDma_ReadReg(xDma.RegBase, XAXIDMA_RX_OFFSET + XAXIDMA_CR_OFFSET));
|
|
logger->info(
|
|
"S2MM_DMASR: {:x}",
|
|
XAxiDma_ReadReg(xDma.RegBase, XAXIDMA_RX_OFFSET + XAXIDMA_SR_OFFSET));
|
|
|
|
if (!hasScatterGather())
|
|
logger->info("S2MM_LENGTH: {:x}",
|
|
XAxiDma_ReadReg(xDma.RegBase,
|
|
XAXIDMA_RX_OFFSET + XAXIDMA_BUFFLEN_OFFSET));
|
|
|
|
logger->info(
|
|
"MM2S_DMACR: {:x}",
|
|
XAxiDma_ReadReg(xDma.RegBase, XAXIDMA_TX_OFFSET + XAXIDMA_CR_OFFSET));
|
|
logger->info(
|
|
"MM2S_DMASR: {:x}",
|
|
XAxiDma_ReadReg(xDma.RegBase, XAXIDMA_TX_OFFSET + XAXIDMA_SR_OFFSET));
|
|
}
|
|
|
|
void DmaFactory::parse(Core &ip, json_t *cfg) {
|
|
NodeFactory::parse(ip, cfg);
|
|
|
|
auto &dma = dynamic_cast<Dma &>(ip);
|
|
|
|
dma.polling = dma.card->polling;
|
|
// Sensible default configuration
|
|
dma.xConfig.HasStsCntrlStrm = 0;
|
|
dma.xConfig.HasMm2S = 1;
|
|
dma.xConfig.HasMm2SDRE = 0;
|
|
dma.xConfig.Mm2SDataWidth = 32;
|
|
dma.xConfig.HasS2Mm = 1;
|
|
dma.xConfig.HasS2MmDRE = 0;
|
|
dma.xConfig.HasSg = 1;
|
|
dma.xConfig.S2MmDataWidth = 32;
|
|
dma.xConfig.Mm2sNumChannels = 1;
|
|
dma.xConfig.S2MmNumChannels = 1;
|
|
dma.xConfig.Mm2SBurstSize = 16;
|
|
dma.xConfig.S2MmBurstSize = 16;
|
|
dma.xConfig.MicroDmaMode = 0;
|
|
dma.xConfig.AddrWidth = 32;
|
|
dma.xConfig.SgLengthWidth = 14;
|
|
|
|
json_error_t err;
|
|
int ret = json_unpack_ex(
|
|
cfg, &err, 0,
|
|
"{ s: { s?: i, s?: i, s?: i, s?: i, s?: i, s?: i, s?: i, s?: i, s?: i, "
|
|
"s?: i, s?: i, s?: i, s?: i } }",
|
|
"parameters", "c_sg_include_stscntrl_strm", &dma.xConfig.HasStsCntrlStrm,
|
|
"c_include_mm2s", &dma.xConfig.HasMm2S, "c_include_mm2s_dre",
|
|
&dma.xConfig.HasMm2SDRE, "c_m_axi_mm2s_data_width",
|
|
&dma.xConfig.Mm2SDataWidth, "c_include_s2mm", &dma.xConfig.HasS2Mm,
|
|
"c_include_s2mm_dre", &dma.xConfig.HasS2MmDRE, "c_m_axi_s2mm_data_width",
|
|
&dma.xConfig.S2MmDataWidth, "c_include_sg", &dma.xConfig.HasSg,
|
|
"c_num_mm2s_channels", &dma.xConfig.Mm2sNumChannels,
|
|
"c_num_s2mm_channels", &dma.xConfig.S2MmNumChannels, "c_micro_dma",
|
|
&dma.xConfig.MicroDmaMode, "c_addr_width", &dma.xConfig.AddrWidth,
|
|
"c_sg_length_width", &dma.xConfig.SgLengthWidth);
|
|
if (ret != 0)
|
|
throw ConfigError(cfg, err, "", "Failed to parse DMA configuration");
|
|
|
|
dma.configDone = true;
|
|
}
|
|
|
|
static DmaFactory f;
|