1
0
Fork 0
mirror of https://git.rwth-aachen.de/acs/public/villas/node/ synced 2025-03-09 00:00:00 +01:00

fpga: optimize sg descriptor rings

we are now using only one memory block for both sg rings. This is
required so that the SG interface can benefit from a read cache

Signed-off-by: Niklas Eiling <niklas.eiling@eonerc.rwth-aachen.de>
This commit is contained in:
Niklas Eiling 2024-03-20 14:57:16 +01:00 committed by pipeacosta
parent 5e1750e885
commit 69b5425c0c
2 changed files with 38 additions and 47 deletions

View file

@ -89,8 +89,8 @@ private:
Completion readCompleteSimple();
void setupScatterGather();
void setupScatterGatherRingRx();
void setupScatterGatherRingTx();
void setupScatterGatherRingRx(uintptr_t physAddr, uintptr_t virtAddr);
void setupScatterGatherRingTx(uintptr_t physAddr, uintptr_t virtAddr);
static constexpr char registerMemory[] = "Reg";
@ -115,6 +115,7 @@ private:
bool configDone = false;
// use polling to wait for DMA completion or interrupts via efds
bool polling = false;
bool cyclic = false;
// Timeout after which the DMA controller issues in interrupt if no data has been received
// Delay is 125 x <delay> x (clock period of SG clock). SG clock is 100 MHz by default.
int delay = 0;
@ -128,13 +129,12 @@ private:
// When using SG: ringBdSize is the maximum number of BDs usable in the ring
// Depending on alignment, the actual number of BDs usable can be smaller
static constexpr size_t requestedRingBdSize = 2048;
static constexpr size_t requestedRingBdSize = 1;
static constexpr size_t requestedRingBdSizeMemory =
requestedRingBdSize * sizeof(XAxiDma_Bd);
uint32_t actualRingBdSize = XAxiDma_BdRingCntCalc(
XAXIDMA_BD_MINIMUM_ALIGNMENT, requestedRingBdSizeMemory);
std::shared_ptr<MemoryBlock> sgRingTx;
std::shared_ptr<MemoryBlock> sgRingRx;
uint32_t actualRingBdSize = 1; //XAxiDma_BdRingCntCalc(
//XAXIDMA_BD_MINIMUM_ALIGNMENT, requestedRingBdSizeMemory);
std::shared_ptr<MemoryBlock> sgRing;
};
class DmaFactory : NodeFactory {

View file

@ -9,6 +9,7 @@
#include <sstream>
#include <string>
#include <sys/types.h>
#include <xilinx/xaxidma.h>
#include <villas/memory.hpp>
@ -48,10 +49,9 @@ bool Dma::init() {
hwLock.unlock();
// Map buffer descriptors
if (hasScatterGather()) {
if (actualRingBdSize < 2 * readCoalesce ||
actualRingBdSize < 2 * writeCoalesce) {
if (actualRingBdSize < readCoalesce || actualRingBdSize < writeCoalesce) {
throw RuntimeError(
"Ring buffer size is too small for coalesce value {} < 2*{}",
"Ring buffer size is too small for coalesce value {} < {}",
actualRingBdSize, std::max(readCoalesce, writeCoalesce));
}
setupScatterGather();
@ -67,11 +67,27 @@ bool Dma::init() {
}
void Dma::setupScatterGather() {
setupScatterGatherRingRx();
setupScatterGatherRingTx();
// Allocate and map space for BD ring in host RAM
auto &alloc = villas::HostRam::getAllocator();
sgRing = alloc.allocateBlock(2 * requestedRingBdSizeMemory);
if (not card->mapMemoryBlock(sgRing))
throw RuntimeError("Memory not accessible by DMA");
auto &mm = MemoryManager::get();
auto trans = mm.getTranslation(busMasterInterfaces[sgInterface],
sgRing->getAddrSpaceId());
auto physAddr = reinterpret_cast<uintptr_t>(trans.getLocalAddr(0));
auto virtAddr = reinterpret_cast<uintptr_t>(
mm.getTranslationFromProcess(sgRing->getAddrSpaceId()).getLocalAddr(0));
setupScatterGatherRingRx(physAddr, virtAddr);
setupScatterGatherRingTx(physAddr + requestedRingBdSizeMemory,
virtAddr + requestedRingBdSizeMemory);
}
void Dma::setupScatterGatherRingRx() {
void Dma::setupScatterGatherRingRx(uintptr_t physAddr, uintptr_t virtAddr) {
int ret;
hwLock.lock();
@ -83,20 +99,6 @@ void Dma::setupScatterGatherRingRx() {
// Set delay and coalescing
XAxiDma_BdRingSetCoalesce(rxRingPtr, readCoalesce, delay);
// Allocate and map space for BD ring in host RAM
auto &alloc = villas::HostRam::getAllocator();
sgRingRx = alloc.allocateBlock(requestedRingBdSizeMemory);
if (not card->mapMemoryBlock(sgRingRx))
throw RuntimeError("Memory not accessible by DMA");
auto &mm = MemoryManager::get();
auto trans = mm.getTranslation(busMasterInterfaces[sgInterface],
sgRingRx->getAddrSpaceId());
auto physAddr = reinterpret_cast<uintptr_t>(trans.getLocalAddr(0));
auto virtAddr = reinterpret_cast<uintptr_t>(
mm.getTranslationFromProcess(sgRingRx->getAddrSpaceId()).getLocalAddr(0));
// Setup Rx BD space
ret = XAxiDma_BdRingCreate(rxRingPtr, physAddr, virtAddr,
XAXIDMA_BD_MINIMUM_ALIGNMENT, actualRingBdSize);
@ -111,6 +113,11 @@ void Dma::setupScatterGatherRingRx() {
if (ret != XST_SUCCESS)
throw RuntimeError("Failed to clone BD template: {}", ret);
if (cyclic) {
/* Enable Cyclic DMA mode */
XAxiDma_BdRingEnableCyclicDMA(rxRingPtr);
XAxiDma_SelectCyclicMode(&xDma, XAXIDMA_DEVICE_TO_DMA, 1);
}
// Enable completion interrupt
XAxiDma_IntrEnable(&xDma, XAXIDMA_IRQ_IOC_MASK, XAXIDMA_DEVICE_TO_DMA);
// Start the RX channel
@ -121,7 +128,7 @@ void Dma::setupScatterGatherRingRx() {
hwLock.unlock();
}
void Dma::setupScatterGatherRingTx() {
void Dma::setupScatterGatherRingTx(uintptr_t physAddr, uintptr_t virtAddr) {
int ret;
hwLock.lock();
@ -133,20 +140,6 @@ void Dma::setupScatterGatherRingTx() {
// Set TX delay and coalesce
XAxiDma_BdRingSetCoalesce(txRingPtr, writeCoalesce, delay);
// Allocate and map space for BD ring in host RAM
auto &alloc = villas::HostRam::getAllocator();
sgRingTx = alloc.allocateBlock(requestedRingBdSizeMemory);
if (not card->mapMemoryBlock(sgRingTx))
throw RuntimeError("Memory not accessible by DMA");
auto &mm = MemoryManager::get();
auto trans = mm.getTranslation(busMasterInterfaces[sgInterface],
sgRingTx->getAddrSpaceId());
auto physAddr = reinterpret_cast<uintptr_t>(trans.getLocalAddr(0));
auto virtAddr = reinterpret_cast<uintptr_t>(
mm.getTranslationFromProcess(sgRingTx->getAddrSpaceId()).getLocalAddr(0));
// Setup TxBD space
ret = XAxiDma_BdRingCreate(txRingPtr, physAddr, virtAddr,
XAXIDMA_BD_MINIMUM_ALIGNMENT, actualRingBdSize);
@ -163,6 +156,7 @@ void Dma::setupScatterGatherRingTx() {
// Enable completion interrupt
XAxiDma_IntrEnable(&xDma, XAXIDMA_IRQ_IOC_MASK, XAXIDMA_DMA_TO_DEVICE);
// Start the TX channel
ret = XAxiDma_BdRingStart(txRingPtr);
if (ret != XST_SUCCESS)
@ -214,11 +208,8 @@ Dma::~Dma() {
rxRingPtr->CyclicBd = nullptr;
}
// unampe SG memory Blocks
if (sgRingTx) {
card->unmapMemoryBlock(*sgRingTx);
}
if (sgRingRx) {
card->unmapMemoryBlock(*sgRingRx);
if (sgRing) {
card->unmapMemoryBlock(*sgRing);
}
}
Dma::reset();