mirror of
https://git.rwth-aachen.de/acs/public/villas/node/
synced 2025-03-09 00:00:00 +01:00
wip GPU RTT
This commit is contained in:
parent
f7781d47af
commit
d853d5e0d3
10 changed files with 148 additions and 9 deletions
|
@ -71,8 +71,8 @@ public:
|
|||
const std::string& port,
|
||||
bool isMaster)
|
||||
{
|
||||
for(auto& [vertexId, vertex] : vertices) {
|
||||
(void) vertexId;
|
||||
for(auto& vertexEntry : vertices) {
|
||||
auto& vertex = vertexEntry.second;
|
||||
if(vertex->nodeName == node and vertex->portName == port and vertex->isMaster == isMaster)
|
||||
return vertex;
|
||||
}
|
||||
|
|
|
@ -28,10 +28,14 @@ public:
|
|||
getDefaultMasterPort() const
|
||||
{ return getMasterPort(rtdsOutputStreamPort); }
|
||||
|
||||
MemoryBlock
|
||||
getRegisterMemory() const
|
||||
{ return MemoryBlock(0, 1 << 10, getAddressSpaceId(registerMemory)); }
|
||||
|
||||
private:
|
||||
bool updateStatus();
|
||||
|
||||
private:
|
||||
public:
|
||||
static constexpr const char* rtdsOutputStreamPort = "rtds_output";
|
||||
|
||||
struct StatusControlRegister { uint32_t
|
||||
|
|
|
@ -92,7 +92,7 @@ protected:
|
|||
{ return { registerMemory }; }
|
||||
|
||||
|
||||
private:
|
||||
public:
|
||||
/* Register definitions */
|
||||
|
||||
static constexpr uintptr_t registerControlAddr = 0x00;
|
||||
|
|
|
@ -11,6 +11,19 @@ namespace villas {
|
|||
namespace fpga {
|
||||
namespace ip {
|
||||
|
||||
union ControlRegister {
|
||||
uint32_t value;
|
||||
struct { uint32_t
|
||||
ap_start : 1,
|
||||
ap_done : 1,
|
||||
ap_idle : 1,
|
||||
ap_ready : 1,
|
||||
_res1 : 3,
|
||||
auto_restart : 1,
|
||||
_res2 : 24;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
class Rtds2Gpu : public IpNode, public Hls
|
||||
{
|
||||
|
|
|
@ -351,6 +351,13 @@ void Gpu::memcpyKernel(const MemoryBlock& src, const MemoryBlock& dst, size_t si
|
|||
cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
MemoryTranslation
|
||||
Gpu::translate(const MemoryBlock& dst)
|
||||
{
|
||||
auto& mm = MemoryManager::get();
|
||||
return mm.getTranslation(masterPciEAddrSpaceId, dst.getAddrSpaceId());
|
||||
}
|
||||
|
||||
|
||||
std::unique_ptr<villas::MemoryBlock, villas::MemoryBlock::deallocator_fn>
|
||||
GpuAllocator::allocateBlock(size_t size)
|
||||
|
|
|
@ -36,6 +36,9 @@ public:
|
|||
|
||||
void memcpyKernel(const MemoryBlock& src, const MemoryBlock& dst, size_t size);
|
||||
|
||||
MemoryTranslation
|
||||
translate(const MemoryBlock& dst);
|
||||
|
||||
private:
|
||||
bool registerIoMemory(const MemoryBlock& mem);
|
||||
bool registerHostMemory(const MemoryBlock& mem);
|
||||
|
|
|
@ -22,11 +22,10 @@ bool Rtds2Gpu::init()
|
|||
status.value = 0;
|
||||
started = false;
|
||||
|
||||
maxFrameSize = getMaxFrameSize();
|
||||
// maxFrameSize = getMaxFrameSize();
|
||||
maxFrameSize = 16;
|
||||
logger->info("Max. frame size supported: {}", maxFrameSize);
|
||||
|
||||
// maxFrameSize = 16;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -754,6 +754,7 @@ VfioGroup::attach(VfioContainer& container, int groupIndex)
|
|||
<< (container.isIommuEnabled() ? "" : "noiommu-")
|
||||
<< groupIndex;
|
||||
|
||||
logger->debug("path: {}", groupPath.str().c_str());
|
||||
group->fd = open(groupPath.str().c_str(), O_RDWR);
|
||||
if (group->fd < 0) {
|
||||
logger->error("Failed to open VFIO group {}", group->index);
|
||||
|
|
|
@ -11,7 +11,8 @@ set(SOURCES
|
|||
)
|
||||
|
||||
if(CMAKE_CUDA_COMPILER)
|
||||
list(APPEND SOURCES gpu.cpp)
|
||||
enable_language(CUDA)
|
||||
list(APPEND SOURCES gpu.cpp gpu_kernels.cu)
|
||||
endif()
|
||||
|
||||
add_executable(unit-tests ${SOURCES})
|
||||
|
|
|
@ -23,6 +23,8 @@
|
|||
|
||||
#include <criterion/criterion.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <villas/log.hpp>
|
||||
#include <villas/memory.hpp>
|
||||
#include <villas/fpga/card.hpp>
|
||||
|
@ -32,6 +34,7 @@
|
|||
#include <villas/fpga/ips/switch.hpp>
|
||||
#include <villas/fpga/ips/dma.hpp>
|
||||
#include <villas/fpga/ips/rtds.hpp>
|
||||
#include <villas/gpu.hpp>
|
||||
|
||||
#include "global.hpp"
|
||||
|
||||
|
@ -206,7 +209,7 @@ Test(fpga, rtds2gpu_rtt_cpu, .description = "Rtds2Gpu RTT via CPU")
|
|||
cr_assert(gpu2rtds->connect(rtds));
|
||||
|
||||
|
||||
for(size_t i = 1; i <= 10000; i++) {
|
||||
for(size_t i = 1; i <= 10000; ) {
|
||||
rtds2gpu->doorbellReset(*doorbell);
|
||||
rtds2gpu->startOnce(dmaRam.getMemoryBlock(), SAMPLE_COUNT, DATA_OFFSET * 4, DOORBELL_OFFSET * 4);
|
||||
|
||||
|
@ -233,6 +236,114 @@ Test(fpga, rtds2gpu_rtt_cpu, .description = "Rtds2Gpu RTT via CPU")
|
|||
}
|
||||
}
|
||||
|
||||
logger->info(TXT_GREEN("Passed"));
|
||||
}
|
||||
}
|
||||
|
||||
void gpu_rtds_rtt_start(volatile uint32_t* dataIn, volatile reg_doorbell_t* doorbellIn,
|
||||
volatile uint32_t* dataOut, volatile villas::fpga::ip::ControlRegister* controlRegister);
|
||||
|
||||
void gpu_rtds_rtt_stop();
|
||||
|
||||
Test(fpga, rtds2gpu_rtt_gpu, .description = "Rtds2Gpu RTT via GPU")
|
||||
{
|
||||
auto logger = loggerGetOrCreate("unittest:rtds2gpu");
|
||||
|
||||
/* Collect neccessary IPs */
|
||||
|
||||
auto gpu2rtds = dynamic_cast<villas::fpga::ip::Gpu2Rtds*>(
|
||||
state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:gpu2rtds:")));
|
||||
|
||||
auto rtds2gpu = dynamic_cast<villas::fpga::ip::Rtds2Gpu*>(
|
||||
state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:rtds2gpu:")));
|
||||
|
||||
cr_assert_not_null(gpu2rtds, "No Gpu2Rtds IP found");
|
||||
cr_assert_not_null(rtds2gpu, "No Rtds2Gpu IP not found");
|
||||
|
||||
villas::Plugin* plugin = villas::Plugin::lookup(villas::Plugin::Type::Gpu, "");
|
||||
auto gpuPlugin = dynamic_cast<villas::gpu::GpuFactory*>(plugin);
|
||||
cr_assert_not_null(gpuPlugin, "No GPU plugin found");
|
||||
|
||||
auto gpus = gpuPlugin->make();
|
||||
cr_assert(gpus.size() > 0, "No GPUs found");
|
||||
|
||||
// just get first cpu
|
||||
auto& gpu = gpus.front();
|
||||
|
||||
// allocate memory on GPU and make accessible by to PCIe/FPGA
|
||||
auto gpuRam = gpu->getAllocator().allocate<uint32_t>(SAMPLE_COUNT + 1);
|
||||
cr_assert(gpu->makeAccessibleToPCIeAndVA(gpuRam.getMemoryBlock()));
|
||||
|
||||
// make Gpu2Rtds IP register memory on FPGA accessible to GPU
|
||||
cr_assert(gpu->makeAccessibleFromPCIeOrHostRam(gpu2rtds->getRegisterMemory()));
|
||||
|
||||
auto tr = gpu->translate(gpuRam.getMemoryBlock());
|
||||
|
||||
auto dataIn = reinterpret_cast<uint32_t*>(tr.getLocalAddr(DATA_OFFSET * sizeof(uint32_t)));
|
||||
auto doorbellIn = reinterpret_cast<reg_doorbell_t*>(tr.getLocalAddr(DOORBELL_OFFSET * sizeof(uint32_t)));
|
||||
|
||||
|
||||
auto gpu2rtdsRegisters = gpu->translate(gpu2rtds->getRegisterMemory());
|
||||
|
||||
auto frameRegister = reinterpret_cast<uint32_t*>(gpu2rtdsRegisters.getLocalAddr(gpu2rtds->registerFrameOffset));
|
||||
auto controlRegister = reinterpret_cast<villas::fpga::ip::ControlRegister*>(gpu2rtdsRegisters.getLocalAddr(gpu2rtds->registerControlAddr));
|
||||
|
||||
// auto doorbellInCpu = reinterpret_cast<reg_doorbell_t*>(&gpuRam[DOORBELL_OFFSET]);
|
||||
|
||||
for(auto& ip : state.cards.front()->ips) {
|
||||
if(*ip != villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:user:rtds_axis:"))
|
||||
continue;
|
||||
|
||||
auto& rtds = dynamic_cast<villas::fpga::ip::Rtds&>(*ip);
|
||||
logger->info("Testing {}", rtds);
|
||||
|
||||
|
||||
// TEST: rtds loopback via switch, this should always work and have RTT=1
|
||||
//cr_assert(rtds.connect(rtds));
|
||||
//logger->info("loopback");
|
||||
//while(1);
|
||||
|
||||
cr_assert(rtds.connect(*rtds2gpu));
|
||||
cr_assert(gpu2rtds->connect(rtds));
|
||||
|
||||
// launch once so they are configured
|
||||
cr_assert(rtds2gpu->startOnce(gpuRam.getMemoryBlock(), SAMPLE_COUNT, DATA_OFFSET * 4, DOORBELL_OFFSET * 4));
|
||||
cr_assert(gpu2rtds->startOnce(SAMPLE_COUNT));
|
||||
|
||||
rtds2gpu->setAutoRestart(true);
|
||||
rtds2gpu->start();
|
||||
|
||||
logger->info("GPU RTT RTDS");
|
||||
|
||||
std::string dummy;
|
||||
|
||||
// logger->info("Press enter to proceed");
|
||||
// std::cin >> dummy;
|
||||
|
||||
gpu_rtds_rtt_start(dataIn, doorbellIn, frameRegister, controlRegister);
|
||||
|
||||
// while(1) {
|
||||
// cr_assert(rtds2gpu->startOnce(gpuRam.getMemoryBlock(), SAMPLE_COUNT, DATA_OFFSET * 4, DOORBELL_OFFSET * 4));
|
||||
// }
|
||||
|
||||
// for(int i = 0; i < 10000; i++) {
|
||||
// while(not doorbellInCpu->is_valid);
|
||||
// logger->debug("received data");
|
||||
// }
|
||||
|
||||
// logger->info("Press enter to cancel");
|
||||
// std::cin >> dummy;
|
||||
|
||||
while(1) {
|
||||
sleep(1);
|
||||
// logger->debug("Current sequence number: {}", doorbellInCpu->seq_nr);
|
||||
logger->debug("Still running");
|
||||
}
|
||||
|
||||
gpu_rtds_rtt_stop();
|
||||
|
||||
|
||||
|
||||
logger->info(TXT_GREEN("Passed"));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue