From d853d5e0d3accacf99dfab768501cde9d0932c78 Mon Sep 17 00:00:00 2001 From: Daniel Krebs Date: Wed, 6 Jun 2018 09:55:14 +0200 Subject: [PATCH] wip GPU RTT --- fpga/include/villas/fpga/ip_node.hpp | 4 +- fpga/include/villas/fpga/ips/gpu2rtds.hpp | 6 +- fpga/include/villas/fpga/ips/hls.hpp | 2 +- fpga/include/villas/fpga/ips/rtds2gpu.hpp | 13 +++ fpga/lib/gpu/gpu.cpp | 7 ++ fpga/lib/gpu/include/villas/gpu.hpp | 3 + fpga/lib/ips/rtds2gpu/rtds2gpu.cpp | 5 +- fpga/lib/kernel/vfio.cpp | 1 + fpga/tests/CMakeLists.txt | 3 +- fpga/tests/rtds2gpu.cpp | 113 +++++++++++++++++++++- 10 files changed, 148 insertions(+), 9 deletions(-) diff --git a/fpga/include/villas/fpga/ip_node.hpp b/fpga/include/villas/fpga/ip_node.hpp index 1257529c2..a89e9842f 100644 --- a/fpga/include/villas/fpga/ip_node.hpp +++ b/fpga/include/villas/fpga/ip_node.hpp @@ -71,8 +71,8 @@ public: const std::string& port, bool isMaster) { - for(auto& [vertexId, vertex] : vertices) { - (void) vertexId; + for(auto& vertexEntry : vertices) { + auto& vertex = vertexEntry.second; if(vertex->nodeName == node and vertex->portName == port and vertex->isMaster == isMaster) return vertex; } diff --git a/fpga/include/villas/fpga/ips/gpu2rtds.hpp b/fpga/include/villas/fpga/ips/gpu2rtds.hpp index b19702bef..1aab1a2aa 100644 --- a/fpga/include/villas/fpga/ips/gpu2rtds.hpp +++ b/fpga/include/villas/fpga/ips/gpu2rtds.hpp @@ -28,10 +28,14 @@ public: getDefaultMasterPort() const { return getMasterPort(rtdsOutputStreamPort); } + MemoryBlock + getRegisterMemory() const + { return MemoryBlock(0, 1 << 10, getAddressSpaceId(registerMemory)); } + private: bool updateStatus(); -private: +public: static constexpr const char* rtdsOutputStreamPort = "rtds_output"; struct StatusControlRegister { uint32_t diff --git a/fpga/include/villas/fpga/ips/hls.hpp b/fpga/include/villas/fpga/ips/hls.hpp index 685af9050..1184fdfd9 100644 --- a/fpga/include/villas/fpga/ips/hls.hpp +++ b/fpga/include/villas/fpga/ips/hls.hpp @@ -92,7 +92,7 @@ protected: { return { registerMemory }; } -private: +public: /* Register definitions */ static constexpr uintptr_t registerControlAddr = 0x00; diff --git a/fpga/include/villas/fpga/ips/rtds2gpu.hpp b/fpga/include/villas/fpga/ips/rtds2gpu.hpp index 0b35848cf..b956805c3 100644 --- a/fpga/include/villas/fpga/ips/rtds2gpu.hpp +++ b/fpga/include/villas/fpga/ips/rtds2gpu.hpp @@ -11,6 +11,19 @@ namespace villas { namespace fpga { namespace ip { +union ControlRegister { + uint32_t value; + struct { uint32_t + ap_start : 1, + ap_done : 1, + ap_idle : 1, + ap_ready : 1, + _res1 : 3, + auto_restart : 1, + _res2 : 24; + }; +}; + class Rtds2Gpu : public IpNode, public Hls { diff --git a/fpga/lib/gpu/gpu.cpp b/fpga/lib/gpu/gpu.cpp index e8f7d58ec..ffb7b7ad3 100644 --- a/fpga/lib/gpu/gpu.cpp +++ b/fpga/lib/gpu/gpu.cpp @@ -351,6 +351,13 @@ void Gpu::memcpyKernel(const MemoryBlock& src, const MemoryBlock& dst, size_t si cudaDeviceSynchronize(); } +MemoryTranslation +Gpu::translate(const MemoryBlock& dst) +{ + auto& mm = MemoryManager::get(); + return mm.getTranslation(masterPciEAddrSpaceId, dst.getAddrSpaceId()); +} + std::unique_ptr GpuAllocator::allocateBlock(size_t size) diff --git a/fpga/lib/gpu/include/villas/gpu.hpp b/fpga/lib/gpu/include/villas/gpu.hpp index 88b316815..00f1464ca 100644 --- a/fpga/lib/gpu/include/villas/gpu.hpp +++ b/fpga/lib/gpu/include/villas/gpu.hpp @@ -36,6 +36,9 @@ public: void memcpyKernel(const MemoryBlock& src, const MemoryBlock& dst, size_t size); + MemoryTranslation + translate(const MemoryBlock& dst); + private: bool registerIoMemory(const MemoryBlock& mem); bool registerHostMemory(const MemoryBlock& mem); diff --git a/fpga/lib/ips/rtds2gpu/rtds2gpu.cpp b/fpga/lib/ips/rtds2gpu/rtds2gpu.cpp index 1fa271764..e89574efa 100644 --- a/fpga/lib/ips/rtds2gpu/rtds2gpu.cpp +++ b/fpga/lib/ips/rtds2gpu/rtds2gpu.cpp @@ -22,11 +22,10 @@ bool Rtds2Gpu::init() status.value = 0; started = false; - maxFrameSize = getMaxFrameSize(); +// maxFrameSize = getMaxFrameSize(); + maxFrameSize = 16; logger->info("Max. frame size supported: {}", maxFrameSize); -// maxFrameSize = 16; - return true; } diff --git a/fpga/lib/kernel/vfio.cpp b/fpga/lib/kernel/vfio.cpp index b9639c1de..b04d172ce 100644 --- a/fpga/lib/kernel/vfio.cpp +++ b/fpga/lib/kernel/vfio.cpp @@ -754,6 +754,7 @@ VfioGroup::attach(VfioContainer& container, int groupIndex) << (container.isIommuEnabled() ? "" : "noiommu-") << groupIndex; + logger->debug("path: {}", groupPath.str().c_str()); group->fd = open(groupPath.str().c_str(), O_RDWR); if (group->fd < 0) { logger->error("Failed to open VFIO group {}", group->index); diff --git a/fpga/tests/CMakeLists.txt b/fpga/tests/CMakeLists.txt index 6eb34491a..36aacf045 100644 --- a/fpga/tests/CMakeLists.txt +++ b/fpga/tests/CMakeLists.txt @@ -11,7 +11,8 @@ set(SOURCES ) if(CMAKE_CUDA_COMPILER) - list(APPEND SOURCES gpu.cpp) + enable_language(CUDA) + list(APPEND SOURCES gpu.cpp gpu_kernels.cu) endif() add_executable(unit-tests ${SOURCES}) diff --git a/fpga/tests/rtds2gpu.cpp b/fpga/tests/rtds2gpu.cpp index a453572d9..cc3def8ce 100644 --- a/fpga/tests/rtds2gpu.cpp +++ b/fpga/tests/rtds2gpu.cpp @@ -23,6 +23,8 @@ #include +#include + #include #include #include @@ -32,6 +34,7 @@ #include #include #include +#include #include "global.hpp" @@ -206,7 +209,7 @@ Test(fpga, rtds2gpu_rtt_cpu, .description = "Rtds2Gpu RTT via CPU") cr_assert(gpu2rtds->connect(rtds)); - for(size_t i = 1; i <= 10000; i++) { + for(size_t i = 1; i <= 10000; ) { rtds2gpu->doorbellReset(*doorbell); rtds2gpu->startOnce(dmaRam.getMemoryBlock(), SAMPLE_COUNT, DATA_OFFSET * 4, DOORBELL_OFFSET * 4); @@ -233,6 +236,114 @@ Test(fpga, rtds2gpu_rtt_cpu, .description = "Rtds2Gpu RTT via CPU") } } + logger->info(TXT_GREEN("Passed")); + } +} + +void gpu_rtds_rtt_start(volatile uint32_t* dataIn, volatile reg_doorbell_t* doorbellIn, + volatile uint32_t* dataOut, volatile villas::fpga::ip::ControlRegister* controlRegister); + +void gpu_rtds_rtt_stop(); + +Test(fpga, rtds2gpu_rtt_gpu, .description = "Rtds2Gpu RTT via GPU") +{ + auto logger = loggerGetOrCreate("unittest:rtds2gpu"); + + /* Collect neccessary IPs */ + + auto gpu2rtds = dynamic_cast( + state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:gpu2rtds:"))); + + auto rtds2gpu = dynamic_cast( + state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:rtds2gpu:"))); + + cr_assert_not_null(gpu2rtds, "No Gpu2Rtds IP found"); + cr_assert_not_null(rtds2gpu, "No Rtds2Gpu IP not found"); + + villas::Plugin* plugin = villas::Plugin::lookup(villas::Plugin::Type::Gpu, ""); + auto gpuPlugin = dynamic_cast(plugin); + cr_assert_not_null(gpuPlugin, "No GPU plugin found"); + + auto gpus = gpuPlugin->make(); + cr_assert(gpus.size() > 0, "No GPUs found"); + + // just get first cpu + auto& gpu = gpus.front(); + + // allocate memory on GPU and make accessible by to PCIe/FPGA + auto gpuRam = gpu->getAllocator().allocate(SAMPLE_COUNT + 1); + cr_assert(gpu->makeAccessibleToPCIeAndVA(gpuRam.getMemoryBlock())); + + // make Gpu2Rtds IP register memory on FPGA accessible to GPU + cr_assert(gpu->makeAccessibleFromPCIeOrHostRam(gpu2rtds->getRegisterMemory())); + + auto tr = gpu->translate(gpuRam.getMemoryBlock()); + + auto dataIn = reinterpret_cast(tr.getLocalAddr(DATA_OFFSET * sizeof(uint32_t))); + auto doorbellIn = reinterpret_cast(tr.getLocalAddr(DOORBELL_OFFSET * sizeof(uint32_t))); + + + auto gpu2rtdsRegisters = gpu->translate(gpu2rtds->getRegisterMemory()); + + auto frameRegister = reinterpret_cast(gpu2rtdsRegisters.getLocalAddr(gpu2rtds->registerFrameOffset)); + auto controlRegister = reinterpret_cast(gpu2rtdsRegisters.getLocalAddr(gpu2rtds->registerControlAddr)); + +// auto doorbellInCpu = reinterpret_cast(&gpuRam[DOORBELL_OFFSET]); + + for(auto& ip : state.cards.front()->ips) { + if(*ip != villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:user:rtds_axis:")) + continue; + + auto& rtds = dynamic_cast(*ip); + logger->info("Testing {}", rtds); + + + // TEST: rtds loopback via switch, this should always work and have RTT=1 + //cr_assert(rtds.connect(rtds)); + //logger->info("loopback"); + //while(1); + + cr_assert(rtds.connect(*rtds2gpu)); + cr_assert(gpu2rtds->connect(rtds)); + + // launch once so they are configured + cr_assert(rtds2gpu->startOnce(gpuRam.getMemoryBlock(), SAMPLE_COUNT, DATA_OFFSET * 4, DOORBELL_OFFSET * 4)); + cr_assert(gpu2rtds->startOnce(SAMPLE_COUNT)); + + rtds2gpu->setAutoRestart(true); + rtds2gpu->start(); + + logger->info("GPU RTT RTDS"); + + std::string dummy; + +// logger->info("Press enter to proceed"); +// std::cin >> dummy; + + gpu_rtds_rtt_start(dataIn, doorbellIn, frameRegister, controlRegister); + +// while(1) { +// cr_assert(rtds2gpu->startOnce(gpuRam.getMemoryBlock(), SAMPLE_COUNT, DATA_OFFSET * 4, DOORBELL_OFFSET * 4)); +// } + +// for(int i = 0; i < 10000; i++) { +// while(not doorbellInCpu->is_valid); +// logger->debug("received data"); +// } + +// logger->info("Press enter to cancel"); +// std::cin >> dummy; + + while(1) { + sleep(1); +// logger->debug("Current sequence number: {}", doorbellInCpu->seq_nr); + logger->debug("Still running"); + } + + gpu_rtds_rtt_stop(); + + + logger->info(TXT_GREEN("Passed")); } }