From f413712b86f37623fe396f3cc61b48756ee355ff Mon Sep 17 00:00:00 2001 From: Daniel Krebs Date: Wed, 30 May 2018 17:24:51 +0200 Subject: [PATCH] gpu2rtds: unit test working --- fpga/include/villas/fpga/ips/gpu2rtds.hpp | 79 ++++++++++ fpga/include/villas/fpga/ips/rtds2gpu.hpp | 9 +- .../villas/fpga/ips/rtds2gpu/xgpu2rtds_hw.h | 53 +++++++ fpga/lib/CMakeLists.txt | 1 + fpga/lib/ips/rtds2gpu/gpu2rtds.cpp | 142 ++++++++++++++++++ fpga/lib/ips/rtds2gpu/rtds2gpu.cpp | 2 + fpga/tests/rtds2gpu.cpp | 72 ++++++++- 7 files changed, 348 insertions(+), 10 deletions(-) create mode 100644 fpga/include/villas/fpga/ips/gpu2rtds.hpp create mode 100644 fpga/include/villas/fpga/ips/rtds2gpu/xgpu2rtds_hw.h create mode 100644 fpga/lib/ips/rtds2gpu/gpu2rtds.cpp diff --git a/fpga/include/villas/fpga/ips/gpu2rtds.hpp b/fpga/include/villas/fpga/ips/gpu2rtds.hpp new file mode 100644 index 000000000..6e9a14b40 --- /dev/null +++ b/fpga/include/villas/fpga/ips/gpu2rtds.hpp @@ -0,0 +1,79 @@ +#pragma once + +#include +#include +#include + +#include +#include + +namespace villas { +namespace fpga { +namespace ip { + + +class Gpu2Rtds : public IpNode, public Hls +{ +public: + friend class Gpu2RtdsFactory; + + bool init(); + + void dump(spdlog::level::level_enum logLevel = spdlog::level::info); + bool startOnce(size_t frameSize); + + size_t getMaxFrameSize(); + +// void dumpDoorbell(uint32_t doorbellRegister) const; + +private: + bool updateStatus(); + +private: + struct StatusControlRegister { uint32_t + status_ap_vld : 1, + _res : 31; + }; + + using StatusRegister = axilite_reg_status_t; + + static constexpr uintptr_t registerStatusOffset = XGPU2RTDS_CTRL_ADDR_STATUS_DATA; + static constexpr uintptr_t registerStatusCtrlOffset = XGPU2RTDS_CTRL_ADDR_STATUS_CTRL; + static constexpr uintptr_t registerFrameSizeOffset = XGPU2RTDS_CTRL_ADDR_FRAME_SIZE_DATA; + static constexpr uintptr_t registerFrameOffset = XGPU2RTDS_CTRL_ADDR_FRAME_BASE; + static constexpr uintptr_t registerFrameLength = XGPU2RTDS_CTRL_DEPTH_FRAME; + +public: + StatusRegister* registerStatus; + StatusControlRegister* registerStatusCtrl; + uint32_t* registerFrameSize; + uint32_t* registerFrames; + + size_t maxFrameSize; + + bool started; +}; + + +class Gpu2RtdsFactory : public IpNodeFactory { +public: + Gpu2RtdsFactory(); + + IpCore* create() + { return new Gpu2Rtds; } + + std::string + getName() const + { return "Gpu2Rtds"; } + + std::string + getDescription() const + { return "HLS Gpu2Rtds IP"; } + + Vlnv getCompatibleVlnv() const + { return {"acs.eonerc.rwth-aachen.de:hls:gpu2rtds:"}; } +}; + +} // namespace ip +} // namespace fpga +} // namespace villas diff --git a/fpga/include/villas/fpga/ips/rtds2gpu.hpp b/fpga/include/villas/fpga/ips/rtds2gpu.hpp index 8a64d44a3..bc0e2b94b 100644 --- a/fpga/include/villas/fpga/ips/rtds2gpu.hpp +++ b/fpga/include/villas/fpga/ips/rtds2gpu.hpp @@ -27,17 +27,18 @@ public: void dumpDoorbell(uint32_t doorbellRegister) const; + static constexpr const char* registerMemory = "Reg"; + std::list getMemoryBlocks() const + { return { registerMemory }; } + + private: bool updateStatus(); private: - static constexpr const char* registerMemory = "Reg"; static constexpr const char* axiInterface = "m_axi_axi_mm"; static constexpr const char* streamInterface = "rtds_input"; - std::list getMemoryBlocks() const - { return { registerMemory }; } - XRtds2gpu xInstance; axilite_reg_status_t status; diff --git a/fpga/include/villas/fpga/ips/rtds2gpu/xgpu2rtds_hw.h b/fpga/include/villas/fpga/ips/rtds2gpu/xgpu2rtds_hw.h new file mode 100644 index 000000000..8ea61f0f1 --- /dev/null +++ b/fpga/include/villas/fpga/ips/rtds2gpu/xgpu2rtds_hw.h @@ -0,0 +1,53 @@ +// ============================================================== +// File generated by Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC +// Version: 2017.3 +// Copyright (C) 1986-2017 Xilinx, Inc. All Rights Reserved. +// +// ============================================================== + +// CTRL +// 0x00 : Control signals +// bit 0 - ap_start (Read/Write/COH) +// bit 1 - ap_done (Read/COR) +// bit 2 - ap_idle (Read) +// bit 3 - ap_ready (Read) +// bit 7 - auto_restart (Read/Write) +// others - reserved +// 0x04 : Global Interrupt Enable Register +// bit 0 - Global Interrupt Enable (Read/Write) +// others - reserved +// 0x08 : IP Interrupt Enable Register (Read/Write) +// bit 0 - Channel 0 (ap_done) +// bit 1 - Channel 1 (ap_ready) +// others - reserved +// 0x0c : IP Interrupt Status Register (Read/TOW) +// bit 0 - Channel 0 (ap_done) +// bit 1 - Channel 1 (ap_ready) +// others - reserved +// 0x10 : Data signal of frame_size +// bit 31~0 - frame_size[31:0] (Read/Write) +// 0x14 : reserved +// 0x80 : Data signal of status +// bit 31~0 - status[31:0] (Read) +// 0x84 : Control signal of status +// bit 0 - status_ap_vld (Read/COR) +// others - reserved +// 0x40 ~ +// 0x7f : Memory 'frame' (16 * 32b) +// Word n : bit [31:0] - frame[n] +// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake) + +#define XGPU2RTDS_CTRL_ADDR_AP_CTRL 0x00 +#define XGPU2RTDS_CTRL_ADDR_GIE 0x04 +#define XGPU2RTDS_CTRL_ADDR_IER 0x08 +#define XGPU2RTDS_CTRL_ADDR_ISR 0x0c +#define XGPU2RTDS_CTRL_ADDR_FRAME_SIZE_DATA 0x10 +#define XGPU2RTDS_CTRL_BITS_FRAME_SIZE_DATA 32 +#define XGPU2RTDS_CTRL_ADDR_STATUS_DATA 0x80 +#define XGPU2RTDS_CTRL_BITS_STATUS_DATA 32 +#define XGPU2RTDS_CTRL_ADDR_STATUS_CTRL 0x84 +#define XGPU2RTDS_CTRL_ADDR_FRAME_BASE 0x40 +#define XGPU2RTDS_CTRL_ADDR_FRAME_HIGH 0x7f +#define XGPU2RTDS_CTRL_WIDTH_FRAME 32 +#define XGPU2RTDS_CTRL_DEPTH_FRAME 16 + diff --git a/fpga/lib/CMakeLists.txt b/fpga/lib/CMakeLists.txt index 7159a63a9..4d0d555e8 100644 --- a/fpga/lib/CMakeLists.txt +++ b/fpga/lib/CMakeLists.txt @@ -17,6 +17,7 @@ set(SOURCES ips/rtds2gpu/rtds2gpu.cpp ips/rtds2gpu/xrtds2gpu.c + ips/rtds2gpu/gpu2rtds.cpp kernel/kernel.c kernel/pci.c diff --git a/fpga/lib/ips/rtds2gpu/gpu2rtds.cpp b/fpga/lib/ips/rtds2gpu/gpu2rtds.cpp new file mode 100644 index 000000000..563d9da63 --- /dev/null +++ b/fpga/lib/ips/rtds2gpu/gpu2rtds.cpp @@ -0,0 +1,142 @@ +#include +#include + +#include +#include + +#include "log.hpp" + +namespace villas { +namespace fpga { +namespace ip { + +static Gpu2RtdsFactory factory; + +bool Gpu2Rtds::init() +{ + Hls::init(); + + auto& registers = addressTranslations.at(registerMemory); + + registerStatus = reinterpret_cast(registers.getLocalAddr(registerStatusOffset)); + registerStatusCtrl = reinterpret_cast(registers.getLocalAddr(registerStatusCtrlOffset)); + registerFrameSize = reinterpret_cast(registers.getLocalAddr(registerFrameSizeOffset)); + registerFrames = reinterpret_cast(registers.getLocalAddr(registerFrameOffset)); + + maxFrameSize = getMaxFrameSize(); + logger->info("Max. frame size supported: {}", maxFrameSize); + + return true; +} + +bool +Gpu2Rtds::startOnce(size_t frameSize) +{ + *registerFrameSize = frameSize; + + start(); + + return true; +} + +void Gpu2Rtds::dump(spdlog::level::level_enum logLevel) +{ + const auto frame_size = *registerFrameSize; + auto status = *registerStatus; + + logger->log(logLevel, "Gpu2Rtds registers:"); + logger->log(logLevel, " Frame size (words): {:#x}", frame_size); + logger->log(logLevel, " Status: {:#x}", status.value); + logger->log(logLevel, " Running: {}", (status.is_running ? "yes" : "no")); + logger->log(logLevel, " Frame too short: {}", (status.frame_too_short ? "yes" : "no")); + logger->log(logLevel, " Frame too long: {}", (status.frame_too_long ? "yes" : "no")); + logger->log(logLevel, " Frame size invalid: {}", (status.invalid_frame_size ? "yes" : "no")); + logger->log(logLevel, " Last count: {}", status.last_count); + logger->log(logLevel, " Last seq. number: {}", status.last_seq_nr); + logger->log(logLevel, " Max. frame size: {}", status.max_frame_size); +} + +//bool Gpu2Rtds::startOnce(const MemoryBlock& mem, size_t frameSize, size_t dataOffset, size_t doorbellOffset) +//{ +// auto& mm = MemoryManager::get(); + +// if(frameSize > maxFrameSize) { +// logger->error("Requested frame size of {} exceeds max. frame size of {}", +// frameSize, maxFrameSize); +// return false; +// } + +// auto translationFromIp = mm.getTranslation( +// getMasterAddrSpaceByInterface(axiInterface), +// mem.getAddrSpaceId()); + +// // set address of memory block in HLS IP +// XGpu2Rtds_Set_baseaddr(&xInstance, translationFromIp.getLocalAddr(0)); + +// XGpu2Rtds_Set_doorbell_offset(&xInstance, doorbellOffset); +// XGpu2Rtds_Set_data_offset(&xInstance, dataOffset); +// XGpu2Rtds_Set_frame_size(&xInstance, frameSize); + +// // prepare memory with all zeroes +// auto translationFromProcess = mm.getTranslationFromProcess(mem.getAddrSpaceId()); +// auto memory = reinterpret_cast(translationFromProcess.getLocalAddr(0)); +// memset(memory, 0, mem.getSize()); + +// // start IP +// return start(); +//} + + + + + +//bool +//Gpu2Rtds::updateStatus() +//{ +// if(not XGpu2Rtds_Get_status_vld(&xInstance)) +// return false; + +// status.value = XGpu2Rtds_Get_status(&xInstance); + +// return true; +//} + +size_t +Gpu2Rtds::getMaxFrameSize() +{ + *registerFrameSize = 0; + + start(); + while(not isFinished()); + + while(not registerStatusCtrl->status_ap_vld); + + axilite_reg_status_t status = *registerStatus; + +// logger->debug("(*registerStatus).max_frame_size: {}", (*registerStatus).max_frame_size); +// logger->debug("status.max_frame_size: {}", status.max_frame_size); + +// assert(status.max_frame_size == (*registerStatus).max_frame_size); + + return status.max_frame_size; +} + +//void +//Gpu2Rtds::dumpDoorbell(uint32_t doorbellRegister) const +//{ +// auto& doorbell = reinterpret_cast(doorbellRegister); + +// logger->info("Doorbell register: {:#08x}", doorbell.value); +// logger->info(" Valid: {}", (doorbell.is_valid ? "yes" : "no")); +// logger->info(" Count: {}", doorbell.count); +// logger->info(" Seq. number: {}", doorbell.seq_nr); +//} + +Gpu2RtdsFactory::Gpu2RtdsFactory() : + IpNodeFactory(getName()) +{ +} + +} // namespace ip +} // namespace fpga +} // namespace villas diff --git a/fpga/lib/ips/rtds2gpu/rtds2gpu.cpp b/fpga/lib/ips/rtds2gpu/rtds2gpu.cpp index a39d45061..1fa271764 100644 --- a/fpga/lib/ips/rtds2gpu/rtds2gpu.cpp +++ b/fpga/lib/ips/rtds2gpu/rtds2gpu.cpp @@ -25,6 +25,8 @@ bool Rtds2Gpu::init() maxFrameSize = getMaxFrameSize(); logger->info("Max. frame size supported: {}", maxFrameSize); +// maxFrameSize = 16; + return true; } diff --git a/fpga/tests/rtds2gpu.cpp b/fpga/tests/rtds2gpu.cpp index 4c5e301c5..5d751ce99 100644 --- a/fpga/tests/rtds2gpu.cpp +++ b/fpga/tests/rtds2gpu.cpp @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -35,12 +36,31 @@ static constexpr size_t SAMPLE_SIZE = 4; -static constexpr size_t SAMPLE_COUNT = 16; +static constexpr size_t SAMPLE_COUNT = 8; static constexpr size_t FRAME_SIZE = SAMPLE_COUNT * SAMPLE_SIZE; static constexpr size_t DOORBELL_OFFSET = SAMPLE_COUNT; static constexpr size_t DATA_OFFSET = 0; +static void dumpMem(const uint32_t* addr, size_t len) +{ + const size_t bytesPerLine = 16; + const size_t lines = (len) / bytesPerLine + 1; + const uint8_t* buf = reinterpret_cast(addr); + + size_t bytesRead = 0; + + for(size_t line = 0; line < lines; line++) { + const unsigned base = line * bytesPerLine; + printf("0x%04x: ", base); + + for(size_t i = 0; i < bytesPerLine && bytesRead < len; i++) { + printf("0x%02x ", buf[base + i]); + bytesRead++; + } + puts(""); + } +} Test(fpga, rtds2gpu, .description = "Rtds2Gpu") { @@ -57,18 +77,23 @@ Test(fpga, rtds2gpu, .description = "Rtds2Gpu") auto rtds2gpu = dynamic_cast(*ip); - auto axiSwitchPtr = state.cards.front()->lookupIp(villas::fpga::Vlnv("xilinx.com:ip:axis_switch:")); - auto axiSwitch = dynamic_cast(axiSwitchPtr); - - cr_assert_not_null(axiSwitchPtr); + auto axiSwitch = dynamic_cast( + state.cards.front()->lookupIp(villas::fpga::Vlnv("xilinx.com:ip:axis_switch:"))); auto dma = dynamic_cast( state.cards.front()->lookupIp(villas::fpga::Vlnv("xilinx.com:ip:axi_dma:"))); - rtds2gpu.dump(spdlog::level::debug); + auto gpu2rtds = dynamic_cast( + state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:gpu2rtds:"))); + + cr_assert_not_null(axiSwitch, "No AXI switch IP found"); cr_assert_not_null(dma, "No DMA IP found"); + cr_assert_not_null(gpu2rtds, "No Gpu2Rtds IP found"); + + rtds2gpu.dump(spdlog::level::debug); + gpu2rtds->dump(spdlog::level::debug); /* Allocate and prepare memory */ @@ -76,12 +101,21 @@ Test(fpga, rtds2gpu, .description = "Rtds2Gpu") // allocate space for all samples and doorbell register auto dmaMemSrc = villas::HostDmaRam::getAllocator(0).allocate(SAMPLE_COUNT + 1); auto dmaMemDst = villas::HostDmaRam::getAllocator(0).allocate(SAMPLE_COUNT + 1); + auto dmaMemDst2 = villas::HostDmaRam::getAllocator(0).allocate(SAMPLE_COUNT + 1); + memset(&dmaMemSrc, 0x11, dmaMemSrc.getMemoryBlock().getSize()); memset(&dmaMemDst, 0x55, dmaMemDst.getMemoryBlock().getSize()); + memset(&dmaMemDst2, 0x77, dmaMemDst2.getMemoryBlock().getSize()); const uint32_t* dataSrc = &dmaMemSrc[DATA_OFFSET]; const uint32_t* dataDst = &dmaMemDst[DATA_OFFSET]; + const uint32_t* dataDst2 = &dmaMemDst2[0]; + + dumpMem(dataSrc, dmaMemSrc.getMemoryBlock().getSize()); + dumpMem(dataDst, dmaMemDst.getMemoryBlock().getSize()); + dumpMem(dataDst2, dmaMemDst2.getMemoryBlock().getSize()); + // connect DMA to Rtds2Gpu IP // TODO: this should be done automatically @@ -96,6 +130,7 @@ Test(fpga, rtds2gpu, .description = "Rtds2Gpu") cr_assert(dma->writeComplete(), "DMA failed"); + while(not rtds2gpu.isFinished()); const uint32_t* doorbellDst = &dmaMemDst[DOORBELL_OFFSET]; @@ -104,6 +139,31 @@ Test(fpga, rtds2gpu, .description = "Rtds2Gpu") cr_assert(memcmp(dataSrc, dataDst, FRAME_SIZE) == 0, "Memory not equal"); + (void) dmaMemDst2; + (void) dataDst2; + + for(size_t i = 0; i < SAMPLE_COUNT; i++) { + gpu2rtds->registerFrames[i] = dmaMemDst[i]; + } + cr_assert(axiSwitch->connect(7, 6)); + + cr_assert(dma->read(dmaMemDst2.getMemoryBlock(), FRAME_SIZE), + "Starting DMA S2MM transfer failed"); + + cr_assert(gpu2rtds->startOnce(SAMPLE_COUNT), + "Preparing Gpu2Rtds IP failed"); + + cr_assert(dma->readComplete(), + "DMA failed"); + + while(not rtds2gpu.isFinished()); + + cr_assert(memcmp(dataSrc, dataDst2, FRAME_SIZE) == 0, "Memory not equal"); + + dumpMem(dataSrc, dmaMemSrc.getMemoryBlock().getSize()); + dumpMem(dataDst, dmaMemDst.getMemoryBlock().getSize()); + dumpMem(dataDst2, dmaMemDst2.getMemoryBlock().getSize()); + logger->info(TXT_GREEN("Passed")); } }