diff --git a/fpga/common b/fpga/common index dd7d75d0a..9747c6ead 160000 --- a/fpga/common +++ b/fpga/common @@ -1 +1 @@ -Subproject commit dd7d75d0aab3801d65f9ff757d82f47f705514af +Subproject commit 9747c6ead6dedff943dbf22ce74e40e9b2622514 diff --git a/fpga/etc/fpga.json b/fpga/etc/fpga.json index 5f982e2a8..2beee4407 100644 --- a/fpga/etc/fpga.json +++ b/fpga/etc/fpga.json @@ -33,90 +33,6 @@ "hier_0_axi_dma_axi_dma_0": { "vlnv": "xilinx.com:ip:axi_dma:7.1", "memory-view": { - "M_AXI_SG": { - "bram_0_axi_bram_ctrl_0": { - "Mem0": { - "baseaddr": 0, - "highaddr": 8191, - "size": 8192 - } - }, - "hier_0_axi_dma_axi_dma_1": { - "Reg": { - "baseaddr": 8192, - "highaddr": 12287, - "size": 4096 - } - }, - "hier_0_axi_dma_axi_dma_0": { - "Reg": { - "baseaddr": 12288, - "highaddr": 16383, - "size": 4096 - } - }, - "timer_0_axi_timer_0": { - "Reg": { - "baseaddr": 16384, - "highaddr": 20479, - "size": 4096 - } - }, - "hier_0_axis_interconnect_0_axis_interconnect_0_xbar": { - "Reg": { - "baseaddr": 20480, - "highaddr": 24575, - "size": 4096 - } - }, - "hier_0_axi_fifo_mm_s_0": { - "Mem0": { - "baseaddr": 24576, - "highaddr": 28671, - "size": 4096 - }, - "Mem1": { - "baseaddr": 49152, - "highaddr": 57343, - "size": 8192 - } - }, - "pcie_0_axi_reset_0": { - "Reg": { - "baseaddr": 28672, - "highaddr": 32767, - "size": 4096 - } - }, - "hier_0_rtds_axis_0": { - "reg0": { - "baseaddr": 32768, - "highaddr": 36863, - "size": 4096 - } - }, - "hier_0_hls_dft_0": { - "Reg": { - "baseaddr": 36864, - "highaddr": 40959, - "size": 4096 - } - }, - "pcie_0_axi_pcie_intc_0": { - "Reg": { - "baseaddr": 45056, - "highaddr": 49151, - "size": 4096 - } - }, - "pcie_0_axi_pcie_0": { - "CTL0": { - "baseaddr": 268435456, - "highaddr": 536870911, - "size": 268435456 - } - } - }, "M_AXI_MM2S": { "pcie_0_axi_pcie_0": { "BAR0": { @@ -134,17 +50,115 @@ "size": 4294967296 } } + }, + "M_AXI_SG": { + "hier_0_axi_dma_axi_dma_0": { + "Reg": { + "baseaddr": 4096, + "highaddr": 8191, + "size": 4096 + } + }, + "hier_0_axi_dma_axi_dma_1": { + "Reg": { + "baseaddr": 8192, + "highaddr": 12287, + "size": 4096 + } + }, + "hier_0_axi_fifo_mm_s_0": { + "Mem0": { + "baseaddr": 12288, + "highaddr": 16383, + "size": 4096 + }, + "Mem1": { + "baseaddr": 16384, + "highaddr": 24575, + "size": 8192 + } + }, + "pcie_0_axi_pcie_intc_0": { + "Reg": { + "baseaddr": 24576, + "highaddr": 28671, + "size": 4096 + } + }, + "pcie_0_axi_reset_0": { + "Reg": { + "baseaddr": 28672, + "highaddr": 32767, + "size": 4096 + } + }, + "timer_0_axi_timer_0": { + "Reg": { + "baseaddr": 32768, + "highaddr": 36863, + "size": 4096 + } + }, + "hier_0_hls_dft_0": { + "Reg": { + "baseaddr": 36864, + "highaddr": 40959, + "size": 4096 + } + }, + "hier_0_rtds_axis_0": { + "reg0": { + "baseaddr": 40960, + "highaddr": 45055, + "size": 4096 + } + }, + "hier_0_axis_interconnect_0_axis_interconnect_0_xbar": { + "Reg": { + "baseaddr": 45056, + "highaddr": 49151, + "size": 4096 + } + }, + "bram_0_axi_bram_ctrl_0": { + "Mem0": { + "baseaddr": 49152, + "highaddr": 57343, + "size": 8192 + } + }, + "hier_0_rtds2gpu_0": { + "Reg": { + "baseaddr": 57344, + "highaddr": 61439, + "size": 4096 + } + }, + "hier_0_gpu2rtds_0": { + "Reg": { + "baseaddr": 61440, + "highaddr": 65535, + "size": 4096 + } + }, + "pcie_0_axi_pcie_0": { + "CTL0": { + "baseaddr": 268435456, + "highaddr": 536870911, + "size": 268435456 + } + } } }, "ports": [ { "role": "master", - "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:1", + "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S01_AXIS", "name": "MM2S" }, { "role": "slave", - "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:1", + "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M01_AXIS", "name": "S2MM" } ], @@ -178,12 +192,12 @@ "ports": [ { "role": "master", - "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:6", + "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S06_AXIS", "name": "MM2S" }, { "role": "slave", - "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:6", + "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M06_AXIS", "name": "S2MM" } ], @@ -197,12 +211,12 @@ "ports": [ { "role": "master", - "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:2", + "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S02_AXIS", "name": "STR_TXD" }, { "role": "slave", - "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:2", + "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M02_AXIS", "name": "STR_RXD" } ], @@ -210,43 +224,143 @@ "interrupt": "pcie_0_axi_pcie_intc_0:2" } }, + "hier_0_axis_data_fifo_0": { + "vlnv": "xilinx.com:ip:axis_data_fifo:1.1", + "ports": [ + { + "role": "master", + "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S03_AXIS", + "name": "AXIS" + }, + { + "role": "slave", + "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M03_AXIS", + "name": "AXIS" + } + ] + }, + "hier_0_axis_data_fifo_1": { + "vlnv": "xilinx.com:ip:axis_data_fifo:1.1", + "ports": [ + { + "role": "master", + "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S04_AXIS", + "name": "AXIS" + }, + { + "role": "slave", + "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M04_AXIS", + "name": "AXIS" + } + ] + }, "hier_0_axis_interconnect_0_axis_interconnect_0_xbar": { "vlnv": "xilinx.com:ip:axis_switch:1.1", "ports": [ + { + "role": "slave", + "target": "hier_0_rtds_axis_0:m_axis", + "name": "S00_AXIS" + }, { "role": "master", - "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:3", - "name": "M03_AXIS" + "target": "hier_0_rtds_axis_0:s_axis", + "name": "M00_AXIS" }, { "role": "slave", - "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:3", + "target": "hier_0_axi_dma_axi_dma_0:MM2S", + "name": "S01_AXIS" + }, + { + "role": "master", + "target": "hier_0_axi_dma_axi_dma_0:S2MM", + "name": "M01_AXIS" + }, + { + "role": "slave", + "target": "hier_0_axi_fifo_mm_s_0:STR_TXD", + "name": "S02_AXIS" + }, + { + "role": "master", + "target": "hier_0_axi_fifo_mm_s_0:STR_RXD", + "name": "M02_AXIS" + }, + { + "role": "slave", + "target": "hier_0_axis_data_fifo_0:AXIS", "name": "S03_AXIS" }, { "role": "master", - "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:4", + "target": "hier_0_axis_data_fifo_0:AXIS", + "name": "M03_AXIS" + }, + { + "role": "slave", + "target": "hier_0_axis_data_fifo_1:AXIS", + "name": "S04_AXIS" + }, + { + "role": "master", + "target": "hier_0_axis_data_fifo_1:AXIS", "name": "M04_AXIS" }, { "role": "slave", - "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:4", - "name": "S04_AXIS" + "target": "hier_0_hls_dft_0:output_r", + "name": "S05_AXIS" + }, + { + "role": "master", + "target": "hier_0_hls_dft_0:input_r", + "name": "M05_AXIS" + }, + { + "role": "slave", + "target": "hier_0_axi_dma_axi_dma_1:MM2S", + "name": "S06_AXIS" + }, + { + "role": "master", + "target": "hier_0_axi_dma_axi_dma_1:S2MM", + "name": "M06_AXIS" + }, + { + "role": "slave", + "target": "hier_0_gpu2rtds_0:rtds_output", + "name": "S07_AXIS" + }, + { + "role": "master", + "target": "hier_0_rtds2gpu_0:rtds_input", + "name": "M07_AXIS" } ], - "num_ports": 7 + "num_ports": 8 + }, + "hier_0_gpu2rtds_0": { + "vlnv": "acs.eonerc.rwth-aachen.de:hls:gpu2rtds:1.0", + "ports": [ + { + "role": "master", + "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S07_AXIS", + "name": "rtds_output" + } + ] }, "hier_0_hls_dft_0": { "vlnv": "acs.eonerc.rwth-aachen.de:hls:hls_dft:1.1", "ports": [ { "role": "master", - "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:5", + "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S05_AXIS", "name": "output_r" }, { "role": "slave", - "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:5", + "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M05_AXIS", "name": "input_r" } ], @@ -254,17 +368,38 @@ "interrupt": "pcie_0_axi_pcie_intc_0:1" } }, + "hier_0_rtds2gpu_0": { + "vlnv": "acs.eonerc.rwth-aachen.de:hls:rtds2gpu:1.1", + "memory-view": { + "m_axi_axi_mm": { + "pcie_0_axi_pcie_0": { + "BAR0": { + "baseaddr": 0, + "highaddr": 4294967295, + "size": 4294967296 + } + } + } + }, + "ports": [ + { + "role": "slave", + "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M07_AXIS", + "name": "rtds_input" + } + ] + }, "hier_0_rtds_axis_0": { "vlnv": "acs.eonerc.rwth-aachen.de:user:rtds_axis:1.0", "ports": [ { "role": "master", - "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:0", + "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S00_AXIS", "name": "m_axis" }, { "role": "slave", - "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:0", + "target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M00_AXIS", "name": "s_axis" } ], @@ -278,11 +413,11 @@ "vlnv": "xilinx.com:ip:axi_pcie:2.8", "memory-view": { "M_AXI": { - "bram_0_axi_bram_ctrl_0": { - "Mem0": { - "baseaddr": 0, + "hier_0_axi_dma_axi_dma_0": { + "Reg": { + "baseaddr": 4096, "highaddr": 8191, - "size": 8192 + "size": 4096 } }, "hier_0_axi_dma_axi_dma_1": { @@ -292,37 +427,23 @@ "size": 4096 } }, - "hier_0_axi_dma_axi_dma_0": { - "Reg": { + "hier_0_axi_fifo_mm_s_0": { + "Mem0": { "baseaddr": 12288, "highaddr": 16383, "size": 4096 - } - }, - "timer_0_axi_timer_0": { - "Reg": { + }, + "Mem1": { "baseaddr": 16384, - "highaddr": 20479, - "size": 4096 - } - }, - "hier_0_axis_interconnect_0_axis_interconnect_0_xbar": { - "Reg": { - "baseaddr": 20480, "highaddr": 24575, - "size": 4096 + "size": 8192 } }, - "hier_0_axi_fifo_mm_s_0": { - "Mem0": { + "pcie_0_axi_pcie_intc_0": { + "Reg": { "baseaddr": 24576, "highaddr": 28671, "size": 4096 - }, - "Mem1": { - "baseaddr": 49152, - "highaddr": 57343, - "size": 8192 } }, "pcie_0_axi_reset_0": { @@ -332,8 +453,8 @@ "size": 4096 } }, - "hier_0_rtds_axis_0": { - "reg0": { + "timer_0_axi_timer_0": { + "Reg": { "baseaddr": 32768, "highaddr": 36863, "size": 4096 @@ -346,13 +467,41 @@ "size": 4096 } }, - "pcie_0_axi_pcie_intc_0": { + "hier_0_rtds_axis_0": { + "reg0": { + "baseaddr": 40960, + "highaddr": 45055, + "size": 4096 + } + }, + "hier_0_axis_interconnect_0_axis_interconnect_0_xbar": { "Reg": { "baseaddr": 45056, "highaddr": 49151, "size": 4096 } }, + "bram_0_axi_bram_ctrl_0": { + "Mem0": { + "baseaddr": 49152, + "highaddr": 57343, + "size": 8192 + } + }, + "hier_0_rtds2gpu_0": { + "Reg": { + "baseaddr": 57344, + "highaddr": 61439, + "size": 4096 + } + }, + "hier_0_gpu2rtds_0": { + "Reg": { + "baseaddr": 61440, + "highaddr": 65535, + "size": 4096 + } + }, "pcie_0_axi_pcie_0": { "CTL0": { "baseaddr": 268435456, diff --git a/fpga/include/villas/fpga/ip_node.hpp b/fpga/include/villas/fpga/ip_node.hpp index 427917fc0..1ce9ded0c 100644 --- a/fpga/include/villas/fpga/ip_node.hpp +++ b/fpga/include/villas/fpga/ip_node.hpp @@ -71,8 +71,8 @@ public: const std::string& port, bool isMaster) { - for(auto& [vertexId, vertex] : vertices) { - (void) vertexId; + for(auto& vertexEntry : vertices) { + auto& vertex = vertexEntry.second; if(vertex->nodeName == node and vertex->portName == port and vertex->isMaster == isMaster) return vertex; } @@ -86,7 +86,7 @@ public: }; -class IpNode : public IpCore { +class IpNode : public virtual IpCore { public: friend class IpNodeFactory; diff --git a/fpga/include/villas/fpga/ips/dma.hpp b/fpga/include/villas/fpga/ips/dma.hpp index 963c20cda..12cf0236a 100644 --- a/fpga/include/villas/fpga/ips/dma.hpp +++ b/fpga/include/villas/fpga/ips/dma.hpp @@ -43,7 +43,10 @@ public: bool init(); bool reset(); + // memory-mapped to stream (MM2S) bool write(const MemoryBlock& mem, size_t len); + + // stream to memory-mapped (S2MM) bool read(const MemoryBlock& mem, size_t len); size_t writeComplete() diff --git a/fpga/include/villas/fpga/ips/gpu2rtds.hpp b/fpga/include/villas/fpga/ips/gpu2rtds.hpp new file mode 100644 index 000000000..1aab1a2aa --- /dev/null +++ b/fpga/include/villas/fpga/ips/gpu2rtds.hpp @@ -0,0 +1,87 @@ +#pragma once + +#include +#include +#include + +#include +#include + +namespace villas { +namespace fpga { +namespace ip { + + +class Gpu2Rtds : public IpNode, public Hls +{ +public: + friend class Gpu2RtdsFactory; + + bool init(); + + void dump(spdlog::level::level_enum logLevel = spdlog::level::info); + bool startOnce(size_t frameSize); + + size_t getMaxFrameSize(); + + const StreamVertex& + getDefaultMasterPort() const + { return getMasterPort(rtdsOutputStreamPort); } + + MemoryBlock + getRegisterMemory() const + { return MemoryBlock(0, 1 << 10, getAddressSpaceId(registerMemory)); } + +private: + bool updateStatus(); + +public: + static constexpr const char* rtdsOutputStreamPort = "rtds_output"; + + struct StatusControlRegister { uint32_t + status_ap_vld : 1, + _res : 31; + }; + + using StatusRegister = axilite_reg_status_t; + + static constexpr uintptr_t registerStatusOffset = XGPU2RTDS_CTRL_ADDR_STATUS_DATA; + static constexpr uintptr_t registerStatusCtrlOffset = XGPU2RTDS_CTRL_ADDR_STATUS_CTRL; + static constexpr uintptr_t registerFrameSizeOffset = XGPU2RTDS_CTRL_ADDR_FRAME_SIZE_DATA; + static constexpr uintptr_t registerFrameOffset = XGPU2RTDS_CTRL_ADDR_FRAME_BASE; + static constexpr uintptr_t registerFrameLength = XGPU2RTDS_CTRL_DEPTH_FRAME; + +public: + StatusRegister* registerStatus; + StatusControlRegister* registerStatusCtrl; + uint32_t* registerFrameSize; + uint32_t* registerFrames; + + size_t maxFrameSize; + + bool started; +}; + + +class Gpu2RtdsFactory : public IpNodeFactory { +public: + Gpu2RtdsFactory(); + + IpCore* create() + { return new Gpu2Rtds; } + + std::string + getName() const + { return "Gpu2Rtds"; } + + std::string + getDescription() const + { return "HLS Gpu2Rtds IP"; } + + Vlnv getCompatibleVlnv() const + { return {"acs.eonerc.rwth-aachen.de:hls:gpu2rtds:"}; } +}; + +} // namespace ip +} // namespace fpga +} // namespace villas diff --git a/fpga/include/villas/fpga/ips/hls.hpp b/fpga/include/villas/fpga/ips/hls.hpp new file mode 100644 index 000000000..1184fdfd9 --- /dev/null +++ b/fpga/include/villas/fpga/ips/hls.hpp @@ -0,0 +1,137 @@ +#pragma once + +#include +#include + +namespace villas { +namespace fpga { +namespace ip { + + +class Hls : public virtual IpCore +{ +public: + virtual bool init() + { + auto& registers = addressTranslations.at(registerMemory); + + controlRegister = reinterpret_cast(registers.getLocalAddr(registerControlAddr)); + globalIntRegister = reinterpret_cast(registers.getLocalAddr(registerGlobalIntEnableAddr)); + ipIntEnableRegister = reinterpret_cast(registers.getLocalAddr(registerIntEnableAddr)); + ipIntStatusRegister = reinterpret_cast(registers.getLocalAddr(registerIntStatusAddr)); + + setAutoRestart(false); + setGlobalInterrupt(false); + + return true; + } + + bool start() + { + controlRegister->ap_start = true; + running = true; + + return true; + } + + virtual bool isFinished() + { updateRunningStatus(); return !running; } + + + bool isRunning() + { updateRunningStatus(); return running; } + + + void setAutoRestart(bool enabled) const + { controlRegister->auto_restart = enabled; } + + + void setGlobalInterrupt(bool enabled) const + { globalIntRegister->globalInterruptEnable = enabled; } + + + void setReadyInterrupt(bool enabled) const + { ipIntEnableRegister->ap_ready = enabled; } + + + void setDoneInterrupt(bool enabled) const + { ipIntEnableRegister->ap_done = enabled; } + + + bool isIdleBit() const + { return controlRegister->ap_idle; } + + + bool isReadyBit() const + { return controlRegister->ap_ready; } + + + /// Warning: the corresponding bit is cleared on read of the register, so if + /// not used correctly, this function may never return true. Only use this + /// function if you really know what you are doing! + bool isDoneBit() const + { return controlRegister->ap_done; } + + + bool isAutoRestartBit() const + { return controlRegister->auto_restart; } + +private: + void updateRunningStatus() + { + if(running and isIdleBit()) + running = false; + } + +protected: + /* Memory block handling */ + + static constexpr const char* registerMemory = "Reg"; + + virtual std::list getMemoryBlocks() const + { return { registerMemory }; } + + +public: + /* Register definitions */ + + static constexpr uintptr_t registerControlAddr = 0x00; + static constexpr uintptr_t registerGlobalIntEnableAddr = 0x04; + static constexpr uintptr_t registerIntEnableAddr = 0x08; + static constexpr uintptr_t registerIntStatusAddr = 0x0c; + + union ControlRegister { + uint32_t value; + struct { uint32_t + ap_start : 1, + ap_done : 1, + ap_idle : 1, + ap_ready : 1, + _res1 : 3, + auto_restart : 1, + _res2 : 24; + }; + }; + + struct GlobalIntRegister { uint32_t + globalInterruptEnable : 1, + _res : 31; + }; + + struct IpIntRegister { uint32_t + ap_done : 1, + ap_ready : 1, + _res : 30; + }; +protected: + ControlRegister* controlRegister; + GlobalIntRegister* globalIntRegister; + IpIntRegister* ipIntEnableRegister; + IpIntRegister* ipIntStatusRegister; + + bool running; +}; + +} // namespace ip +} // namespace fpga +} // namespace villas diff --git a/fpga/include/villas/fpga/ips/rtds.hpp b/fpga/include/villas/fpga/ips/rtds.hpp index 35aeef133..4f6d789e0 100644 --- a/fpga/include/villas/fpga/ips/rtds.hpp +++ b/fpga/include/villas/fpga/ips/rtds.hpp @@ -44,6 +44,14 @@ public: std::list getMemoryBlocks() const { return { registerMemory }; } + const StreamVertex& + getDefaultSlavePort() const + { return getSlavePort(slavePort); } + + const StreamVertex& + getDefaultMasterPort() const + { return getMasterPort(masterPort); } + private: static constexpr const char registerMemory[] = "reg0"; static constexpr const char* irqTs = "irq_ts"; diff --git a/fpga/include/villas/fpga/ips/rtds2gpu.hpp b/fpga/include/villas/fpga/ips/rtds2gpu.hpp new file mode 100644 index 000000000..b956805c3 --- /dev/null +++ b/fpga/include/villas/fpga/ips/rtds2gpu.hpp @@ -0,0 +1,96 @@ +#pragma once + +#include +#include +#include + +#include "rtds2gpu/xrtds2gpu.h" +#include "rtds2gpu/register_types.hpp" + +namespace villas { +namespace fpga { +namespace ip { + +union ControlRegister { + uint32_t value; + struct { uint32_t + ap_start : 1, + ap_done : 1, + ap_idle : 1, + ap_ready : 1, + _res1 : 3, + auto_restart : 1, + _res2 : 24; + }; +}; + + +class Rtds2Gpu : public IpNode, public Hls +{ +public: + friend class Rtds2GpuFactory; + + bool init(); + + void dump(spdlog::level::level_enum logLevel = spdlog::level::info); + + bool startOnce(const MemoryBlock& mem, size_t frameSize, size_t dataOffset, size_t doorbellOffset); + + size_t getMaxFrameSize(); + + void dumpDoorbell(uint32_t doorbellRegister) const; + + bool doorbellIsValid(const uint32_t& doorbellRegister) const + { return reinterpret_cast(doorbellRegister).is_valid; } + + void doorbellReset(uint32_t& doorbellRegister) const + { doorbellRegister = 0; } + + static constexpr const char* registerMemory = "Reg"; + + std::list getMemoryBlocks() const + { return { registerMemory }; } + + + const StreamVertex& + getDefaultSlavePort() const + { return getSlavePort(rtdsInputStreamPort); } + +private: + bool updateStatus(); + +private: + static constexpr const char* axiInterface = "m_axi_axi_mm"; + static constexpr const char* rtdsInputStreamPort = "rtds_input"; + + XRtds2gpu xInstance; + + axilite_reg_status_t status; + size_t maxFrameSize; + + bool started; +}; + + +class Rtds2GpuFactory : public IpNodeFactory { +public: + Rtds2GpuFactory(); + + IpCore* create() + { return new Rtds2Gpu; } + + std::string + getName() const + { return "Rtds2Gpu"; } + + std::string + getDescription() const + { return "HLS RTDS2GPU IP"; } + + Vlnv getCompatibleVlnv() const + { return {"acs.eonerc.rwth-aachen.de:hls:rtds2gpu:"}; } +}; + +} // namespace ip +} // namespace fpga +} // namespace villas diff --git a/fpga/include/villas/fpga/ips/rtds2gpu/register_types.hpp b/fpga/include/villas/fpga/ips/rtds2gpu/register_types.hpp new file mode 100644 index 000000000..ce7328417 --- /dev/null +++ b/fpga/include/villas/fpga/ips/rtds2gpu/register_types.hpp @@ -0,0 +1,57 @@ +#ifndef REGISTER_TYPES_H +#define REGISTER_TYPES_H + +#include +#include +#include + +union axilite_reg_status_t { + uint32_t value; + struct { + uint32_t + last_seq_nr : 16, + last_count : 6, + max_frame_size : 6, + invalid_frame_size : 1, + frame_too_short : 1, + frame_too_long : 1, + is_running : 1; + }; +}; + +union reg_doorbell_t { + uint32_t value; + struct { + uint32_t + seq_nr : 16, + count : 6, + is_valid : 1; + }; + + constexpr reg_doorbell_t() : value(0) {} +}; + +template +struct Rtds2GpuMemoryBuffer { + // this type is only for memory interpretation, it makes no sense to create + // an instance so it's forbidden + Rtds2GpuMemoryBuffer() = delete; + + // T can be a more complex type that wraps multiple values + static constexpr size_t rawValueCount = N * (sizeof(T) / 4); + + // As of C++14, offsetof() is not working for non-standard layout types (i.e. + // composed of non-POD members). This might work in C++17 though. + // More info: https://gist.github.com/graphitemaster/494f21190bb2c63c5516 + //static constexpr size_t doorbellOffset = offsetof(Rtds2GpuMemoryBuffer, doorbell); + //static constexpr size_t dataOffset = offsetof(Rtds2GpuMemoryBuffer, data); + + // HACK: This might break horribly, let's just hope C++17 will be there soon + static constexpr size_t dataOffset = 0; + static constexpr size_t doorbellOffset = N * sizeof(Rtds2GpuMemoryBuffer::data); + + T data[N]; + reg_doorbell_t doorbell; +}; + +#endif // REGISTER_TYPES_H diff --git a/fpga/include/villas/fpga/ips/rtds2gpu/xgpu2rtds_hw.h b/fpga/include/villas/fpga/ips/rtds2gpu/xgpu2rtds_hw.h new file mode 100644 index 000000000..8ea61f0f1 --- /dev/null +++ b/fpga/include/villas/fpga/ips/rtds2gpu/xgpu2rtds_hw.h @@ -0,0 +1,53 @@ +// ============================================================== +// File generated by Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC +// Version: 2017.3 +// Copyright (C) 1986-2017 Xilinx, Inc. All Rights Reserved. +// +// ============================================================== + +// CTRL +// 0x00 : Control signals +// bit 0 - ap_start (Read/Write/COH) +// bit 1 - ap_done (Read/COR) +// bit 2 - ap_idle (Read) +// bit 3 - ap_ready (Read) +// bit 7 - auto_restart (Read/Write) +// others - reserved +// 0x04 : Global Interrupt Enable Register +// bit 0 - Global Interrupt Enable (Read/Write) +// others - reserved +// 0x08 : IP Interrupt Enable Register (Read/Write) +// bit 0 - Channel 0 (ap_done) +// bit 1 - Channel 1 (ap_ready) +// others - reserved +// 0x0c : IP Interrupt Status Register (Read/TOW) +// bit 0 - Channel 0 (ap_done) +// bit 1 - Channel 1 (ap_ready) +// others - reserved +// 0x10 : Data signal of frame_size +// bit 31~0 - frame_size[31:0] (Read/Write) +// 0x14 : reserved +// 0x80 : Data signal of status +// bit 31~0 - status[31:0] (Read) +// 0x84 : Control signal of status +// bit 0 - status_ap_vld (Read/COR) +// others - reserved +// 0x40 ~ +// 0x7f : Memory 'frame' (16 * 32b) +// Word n : bit [31:0] - frame[n] +// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake) + +#define XGPU2RTDS_CTRL_ADDR_AP_CTRL 0x00 +#define XGPU2RTDS_CTRL_ADDR_GIE 0x04 +#define XGPU2RTDS_CTRL_ADDR_IER 0x08 +#define XGPU2RTDS_CTRL_ADDR_ISR 0x0c +#define XGPU2RTDS_CTRL_ADDR_FRAME_SIZE_DATA 0x10 +#define XGPU2RTDS_CTRL_BITS_FRAME_SIZE_DATA 32 +#define XGPU2RTDS_CTRL_ADDR_STATUS_DATA 0x80 +#define XGPU2RTDS_CTRL_BITS_STATUS_DATA 32 +#define XGPU2RTDS_CTRL_ADDR_STATUS_CTRL 0x84 +#define XGPU2RTDS_CTRL_ADDR_FRAME_BASE 0x40 +#define XGPU2RTDS_CTRL_ADDR_FRAME_HIGH 0x7f +#define XGPU2RTDS_CTRL_WIDTH_FRAME 32 +#define XGPU2RTDS_CTRL_DEPTH_FRAME 16 + diff --git a/fpga/include/villas/fpga/ips/rtds2gpu/xrtds2gpu.h b/fpga/include/villas/fpga/ips/rtds2gpu/xrtds2gpu.h new file mode 100644 index 000000000..87cb2b70a --- /dev/null +++ b/fpga/include/villas/fpga/ips/rtds2gpu/xrtds2gpu.h @@ -0,0 +1,113 @@ +// ============================================================== +// File generated by Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC +// Version: 2017.3 +// Copyright (C) 1986-2017 Xilinx, Inc. All Rights Reserved. +// +// ============================================================== + +#ifndef XRTDS2GPU_H +#define XRTDS2GPU_H + +#ifdef __cplusplus +extern "C" { +#endif + +/***************************** Include Files *********************************/ +#ifndef __linux__ +#include "xil_types.h" +#include "xil_assert.h" +#include "xstatus.h" +#include "xil_io.h" +#else +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#endif +#include "xrtds2gpu_hw.h" + +/**************************** Type Definitions ******************************/ +#ifdef __linux__ +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +#else +typedef struct { + u16 DeviceId; + u32 Ctrl_BaseAddress; +} XRtds2gpu_Config; +#endif + +typedef struct { + u32 Ctrl_BaseAddress; + u32 IsReady; +} XRtds2gpu; + +/***************** Macros (Inline Functions) Definitions *********************/ +#ifndef __linux__ +#define XRtds2gpu_WriteReg(BaseAddress, RegOffset, Data) \ + Xil_Out32((BaseAddress) + (RegOffset), (u32)(Data)) +#define XRtds2gpu_ReadReg(BaseAddress, RegOffset) \ + Xil_In32((BaseAddress) + (RegOffset)) +#else +#define XRtds2gpu_WriteReg(BaseAddress, RegOffset, Data) \ + *(volatile u32*)((BaseAddress) + (RegOffset)) = (u32)(Data) +#define XRtds2gpu_ReadReg(BaseAddress, RegOffset) \ + *(volatile u32*)((BaseAddress) + (RegOffset)) + +#define Xil_AssertVoid(expr) assert(expr) +#define Xil_AssertNonvoid(expr) assert(expr) + +#define XST_SUCCESS 0 +#define XST_DEVICE_NOT_FOUND 2 +#define XST_OPEN_DEVICE_FAILED 3 +#define XIL_COMPONENT_IS_READY 1 +#endif + +/************************** Function Prototypes *****************************/ +#ifndef __linux__ +int XRtds2gpu_Initialize(XRtds2gpu *InstancePtr, u16 DeviceId); +XRtds2gpu_Config* XRtds2gpu_LookupConfig(u16 DeviceId); +int XRtds2gpu_CfgInitialize(XRtds2gpu *InstancePtr, XRtds2gpu_Config *ConfigPtr); +#else +int XRtds2gpu_Initialize(XRtds2gpu *InstancePtr, const char* InstanceName); +int XRtds2gpu_Release(XRtds2gpu *InstancePtr); +#endif + +void XRtds2gpu_Start(XRtds2gpu *InstancePtr); +u32 XRtds2gpu_IsDone(XRtds2gpu *InstancePtr); +u32 XRtds2gpu_IsIdle(XRtds2gpu *InstancePtr); +u32 XRtds2gpu_IsReady(XRtds2gpu *InstancePtr); +void XRtds2gpu_EnableAutoRestart(XRtds2gpu *InstancePtr); +void XRtds2gpu_DisableAutoRestart(XRtds2gpu *InstancePtr); + +void XRtds2gpu_Set_baseaddr(XRtds2gpu *InstancePtr, u32 Data); +u32 XRtds2gpu_Get_baseaddr(XRtds2gpu *InstancePtr); +void XRtds2gpu_Set_data_offset(XRtds2gpu *InstancePtr, u32 Data); +u32 XRtds2gpu_Get_data_offset(XRtds2gpu *InstancePtr); +void XRtds2gpu_Set_doorbell_offset(XRtds2gpu *InstancePtr, u32 Data); +u32 XRtds2gpu_Get_doorbell_offset(XRtds2gpu *InstancePtr); +void XRtds2gpu_Set_frame_size(XRtds2gpu *InstancePtr, u32 Data); +u32 XRtds2gpu_Get_frame_size(XRtds2gpu *InstancePtr); +u32 XRtds2gpu_Get_status(XRtds2gpu *InstancePtr); +u32 XRtds2gpu_Get_status_vld(XRtds2gpu *InstancePtr); + +void XRtds2gpu_InterruptGlobalEnable(XRtds2gpu *InstancePtr); +void XRtds2gpu_InterruptGlobalDisable(XRtds2gpu *InstancePtr); +void XRtds2gpu_InterruptEnable(XRtds2gpu *InstancePtr, u32 Mask); +void XRtds2gpu_InterruptDisable(XRtds2gpu *InstancePtr, u32 Mask); +void XRtds2gpu_InterruptClear(XRtds2gpu *InstancePtr, u32 Mask); +u32 XRtds2gpu_InterruptGetEnabled(XRtds2gpu *InstancePtr); +u32 XRtds2gpu_InterruptGetStatus(XRtds2gpu *InstancePtr); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/fpga/include/villas/fpga/ips/rtds2gpu/xrtds2gpu_hw.h b/fpga/include/villas/fpga/ips/rtds2gpu/xrtds2gpu_hw.h new file mode 100644 index 000000000..73bf1bce2 --- /dev/null +++ b/fpga/include/villas/fpga/ips/rtds2gpu/xrtds2gpu_hw.h @@ -0,0 +1,61 @@ +// ============================================================== +// File generated by Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC +// Version: 2017.3 +// Copyright (C) 1986-2017 Xilinx, Inc. All Rights Reserved. +// +// ============================================================== + +// CTRL +// 0x00 : Control signals +// bit 0 - ap_start (Read/Write/COH) +// bit 1 - ap_done (Read/COR) +// bit 2 - ap_idle (Read) +// bit 3 - ap_ready (Read) +// bit 7 - auto_restart (Read/Write) +// others - reserved +// 0x04 : Global Interrupt Enable Register +// bit 0 - Global Interrupt Enable (Read/Write) +// others - reserved +// 0x08 : IP Interrupt Enable Register (Read/Write) +// bit 0 - Channel 0 (ap_done) +// bit 1 - Channel 1 (ap_ready) +// others - reserved +// 0x0c : IP Interrupt Status Register (Read/TOW) +// bit 0 - Channel 0 (ap_done) +// bit 1 - Channel 1 (ap_ready) +// others - reserved +// 0x10 : Data signal of baseaddr +// bit 31~0 - baseaddr[31:0] (Read/Write) +// 0x14 : reserved +// 0x18 : Data signal of data_offset +// bit 31~0 - data_offset[31:0] (Read/Write) +// 0x1c : reserved +// 0x20 : Data signal of doorbell_offset +// bit 31~0 - doorbell_offset[31:0] (Read/Write) +// 0x24 : reserved +// 0x28 : Data signal of frame_size +// bit 31~0 - frame_size[31:0] (Read/Write) +// 0x2c : reserved +// 0x30 : Data signal of status +// bit 31~0 - status[31:0] (Read) +// 0x34 : Control signal of status +// bit 0 - status_ap_vld (Read/COR) +// others - reserved +// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake) + +#define XRTDS2GPU_CTRL_ADDR_AP_CTRL 0x00 +#define XRTDS2GPU_CTRL_ADDR_GIE 0x04 +#define XRTDS2GPU_CTRL_ADDR_IER 0x08 +#define XRTDS2GPU_CTRL_ADDR_ISR 0x0c +#define XRTDS2GPU_CTRL_ADDR_BASEADDR_DATA 0x10 +#define XRTDS2GPU_CTRL_BITS_BASEADDR_DATA 32 +#define XRTDS2GPU_CTRL_ADDR_DATA_OFFSET_DATA 0x18 +#define XRTDS2GPU_CTRL_BITS_DATA_OFFSET_DATA 32 +#define XRTDS2GPU_CTRL_ADDR_DOORBELL_OFFSET_DATA 0x20 +#define XRTDS2GPU_CTRL_BITS_DOORBELL_OFFSET_DATA 32 +#define XRTDS2GPU_CTRL_ADDR_FRAME_SIZE_DATA 0x28 +#define XRTDS2GPU_CTRL_BITS_FRAME_SIZE_DATA 32 +#define XRTDS2GPU_CTRL_ADDR_STATUS_DATA 0x30 +#define XRTDS2GPU_CTRL_BITS_STATUS_DATA 32 +#define XRTDS2GPU_CTRL_ADDR_STATUS_CTRL 0x34 + diff --git a/fpga/lib/CMakeLists.txt b/fpga/lib/CMakeLists.txt index 4289c5276..b4d3f34b4 100644 --- a/fpga/lib/CMakeLists.txt +++ b/fpga/lib/CMakeLists.txt @@ -34,8 +34,16 @@ set(SOURCES ips/dma.cpp ips/bram.cpp ips/rtds.cpp + + ips/rtds2gpu/rtds2gpu.cpp + ips/rtds2gpu/xrtds2gpu.c + ips/rtds2gpu/gpu2rtds.cpp ) +# we don't have much influence on drivers generated by Xilinx, so ignore warnings +set_source_files_properties(ips/rtds2gpu/xrtds2gpu.c + PROPERTIES COMPILE_FLAGS -Wno-int-to-pointer-cast) + include(FindPkgConfig) pkg_check_modules(JANSSON jansson) diff --git a/fpga/lib/gpu/gpu.cpp b/fpga/lib/gpu/gpu.cpp index fbefd5e36..e45efa7b2 100644 --- a/fpga/lib/gpu/gpu.cpp +++ b/fpga/lib/gpu/gpu.cpp @@ -374,6 +374,13 @@ void Gpu::memcpyKernel(const MemoryBlock& src, const MemoryBlock& dst, size_t si cudaDeviceSynchronize(); } +MemoryTranslation +Gpu::translate(const MemoryBlock& dst) +{ + auto& mm = MemoryManager::get(); + return mm.getTranslation(masterPciEAddrSpaceId, dst.getAddrSpaceId()); +} + std::unique_ptr GpuAllocator::allocateBlock(size_t size) @@ -381,29 +388,53 @@ GpuAllocator::allocateBlock(size_t size) cudaSetDevice(gpu.gpuId); void* addr; - if(cudaSuccess != cudaMalloc(&addr, size)) { - logger->error("cudaMalloc(..., size={}) failed", size); - throw std::bad_alloc(); - } - auto& mm = MemoryManager::get(); - // assemble name for this block - std::stringstream name; - name << std::showbase << std::hex << reinterpret_cast(addr); + // search for an existing chunk that has enough free memory + auto chunk = std::find_if(chunks.begin(), chunks.end(), [&](const auto& chunk) { + return chunk->getAvailableMemory() >= size; + }); - auto blockName = mm.getSlaveAddrSpaceName(getName(), name.str()); - auto blockAddrSpaceId = mm.getOrCreateAddressSpace(blockName); - const auto localAddr = reinterpret_cast(addr); - std::unique_ptr - mem(new MemoryBlock(localAddr, size, blockAddrSpaceId), this->free); + if(chunk != chunks.end()) { + logger->debug("Found existing chunk that can host the requested block"); - insertMemoryBlock(*mem); + return (*chunk)->allocateBlock(size); - gpu.makeAccessibleToPCIeAndVA(*mem); + } else { + // allocate a new chunk - return mem; + // rounded-up multiple of GPU page size + const size_t chunkSize = size - (size & (GpuPageSize - 1)) + GpuPageSize; + logger->debug("Allocate new chunk of {:#x} bytes", chunkSize); + + if(cudaSuccess != cudaMalloc(&addr, chunkSize)) { + logger->error("cudaMalloc(..., size={}) failed", chunkSize); + throw std::bad_alloc(); + } + + // assemble name for this block + std::stringstream name; + name << std::showbase << std::hex << reinterpret_cast(addr); + + auto blockName = mm.getSlaveAddrSpaceName(getName(), name.str()); + auto blockAddrSpaceId = mm.getOrCreateAddressSpace(blockName); + + const auto localAddr = reinterpret_cast(addr); + std::unique_ptr + mem(new MemoryBlock(localAddr, chunkSize, blockAddrSpaceId), this->free); + + insertMemoryBlock(*mem); + + // already make accessible to CPU + gpu.makeAccessibleToPCIeAndVA(*mem); + + // create a new allocator to manage the chunk and push to chunk list + chunks.push_front(std::make_unique(std::move(mem))); + + // call again, this time there's a large enough chunk + return allocateBlock(size); + } } diff --git a/fpga/lib/gpu/include/villas/gpu.hpp b/fpga/lib/gpu/include/villas/gpu.hpp index 6e8dc0a6d..6e130bb30 100644 --- a/fpga/lib/gpu/include/villas/gpu.hpp +++ b/fpga/lib/gpu/include/villas/gpu.hpp @@ -58,6 +58,9 @@ public: void memcpyKernel(const MemoryBlock& src, const MemoryBlock& dst, size_t size); + MemoryTranslation + translate(const MemoryBlock& dst); + private: bool registerIoMemory(const MemoryBlock& mem); bool registerHostMemory(const MemoryBlock& mem); @@ -81,6 +84,8 @@ private: class GpuAllocator : public BaseAllocator { public: + static constexpr size_t GpuPageSize = 64UL << 10; + GpuAllocator(Gpu& gpu); std::string getName() const; @@ -90,6 +95,8 @@ public: private: Gpu& gpu; + // TODO: replace by multimap (key is available memory) + std::list> chunks; }; class GpuFactory : public Plugin { diff --git a/fpga/lib/ip_node.cpp b/fpga/lib/ip_node.cpp index c1dc36a5b..1a9ce5a48 100644 --- a/fpga/lib/ip_node.cpp +++ b/fpga/lib/ip_node.cpp @@ -41,7 +41,7 @@ IpNode::streamGraph; bool IpNodeFactory::configureJson(IpCore& ip, json_t* json_ip) { - auto& ipNode = reinterpret_cast(ip); + auto& ipNode = dynamic_cast(ip); auto logger = getLogger(); json_t* json_ports = json_object_get(json_ip, "ports"); @@ -216,7 +216,7 @@ IpNode::connectLoopback() logger->debug("switch at: {}", portMaster->nodeName); // TODO: verify this is really a switch! - auto axiStreamSwitch = reinterpret_cast( + auto axiStreamSwitch = dynamic_cast( card->lookupIp(portMaster->nodeName)); if(axiStreamSwitch == nullptr) { diff --git a/fpga/lib/ips/bram.cpp b/fpga/lib/ips/bram.cpp index 18a5ea0e2..fc881ff00 100644 --- a/fpga/lib/ips/bram.cpp +++ b/fpga/lib/ips/bram.cpp @@ -31,7 +31,7 @@ static BramFactory factory; bool BramFactory::configureJson(IpCore& ip, json_t* json_ip) { - auto& bram = reinterpret_cast(ip); + auto& bram = dynamic_cast(ip); if(json_unpack(json_ip, "{ s: i }", "size", &bram.size) != 0) { getLogger()->error("Cannot parse 'size'"); diff --git a/fpga/lib/ips/dma.cpp b/fpga/lib/ips/dma.cpp index 3d8f9a3b6..7408461df 100644 --- a/fpga/lib/ips/dma.cpp +++ b/fpga/lib/ips/dma.cpp @@ -165,7 +165,7 @@ Dma::write(const MemoryBlock& mem, size_t len) mem.getAddrSpaceId()); const void* buf = reinterpret_cast(translation.getLocalAddr(0)); - logger->debug("Write to address: {:p}", buf); + logger->debug("Write to stream from address {:p}", buf); return hasScatterGather() ? writeSG(buf, len) : writeSimple(buf, len); } @@ -180,7 +180,7 @@ Dma::read(const MemoryBlock& mem, size_t len) mem.getAddrSpaceId()); void* buf = reinterpret_cast(translation.getLocalAddr(0)); - logger->debug("Read from address: {:p}", buf); + logger->debug("Read from stream and write to address {:p}", buf); return hasScatterGather() ? readSG(buf, len) : readSimple(buf, len); } diff --git a/fpga/lib/ips/pcie.cpp b/fpga/lib/ips/pcie.cpp index d8294469e..ea5675b7d 100644 --- a/fpga/lib/ips/pcie.cpp +++ b/fpga/lib/ips/pcie.cpp @@ -125,7 +125,7 @@ bool AxiPciExpressBridgeFactory::configureJson(IpCore& ip, json_t* json_ip) { auto logger = getLogger(); - auto& pcie = reinterpret_cast(ip); + auto& pcie = dynamic_cast(ip); for(auto barType : std::list{"axi_bars", "pcie_bars"}) { json_t* json_bars = json_object_get(json_ip, barType.c_str()); diff --git a/fpga/lib/ips/rtds2gpu/gpu2rtds.cpp b/fpga/lib/ips/rtds2gpu/gpu2rtds.cpp new file mode 100644 index 000000000..563d9da63 --- /dev/null +++ b/fpga/lib/ips/rtds2gpu/gpu2rtds.cpp @@ -0,0 +1,142 @@ +#include +#include + +#include +#include + +#include "log.hpp" + +namespace villas { +namespace fpga { +namespace ip { + +static Gpu2RtdsFactory factory; + +bool Gpu2Rtds::init() +{ + Hls::init(); + + auto& registers = addressTranslations.at(registerMemory); + + registerStatus = reinterpret_cast(registers.getLocalAddr(registerStatusOffset)); + registerStatusCtrl = reinterpret_cast(registers.getLocalAddr(registerStatusCtrlOffset)); + registerFrameSize = reinterpret_cast(registers.getLocalAddr(registerFrameSizeOffset)); + registerFrames = reinterpret_cast(registers.getLocalAddr(registerFrameOffset)); + + maxFrameSize = getMaxFrameSize(); + logger->info("Max. frame size supported: {}", maxFrameSize); + + return true; +} + +bool +Gpu2Rtds::startOnce(size_t frameSize) +{ + *registerFrameSize = frameSize; + + start(); + + return true; +} + +void Gpu2Rtds::dump(spdlog::level::level_enum logLevel) +{ + const auto frame_size = *registerFrameSize; + auto status = *registerStatus; + + logger->log(logLevel, "Gpu2Rtds registers:"); + logger->log(logLevel, " Frame size (words): {:#x}", frame_size); + logger->log(logLevel, " Status: {:#x}", status.value); + logger->log(logLevel, " Running: {}", (status.is_running ? "yes" : "no")); + logger->log(logLevel, " Frame too short: {}", (status.frame_too_short ? "yes" : "no")); + logger->log(logLevel, " Frame too long: {}", (status.frame_too_long ? "yes" : "no")); + logger->log(logLevel, " Frame size invalid: {}", (status.invalid_frame_size ? "yes" : "no")); + logger->log(logLevel, " Last count: {}", status.last_count); + logger->log(logLevel, " Last seq. number: {}", status.last_seq_nr); + logger->log(logLevel, " Max. frame size: {}", status.max_frame_size); +} + +//bool Gpu2Rtds::startOnce(const MemoryBlock& mem, size_t frameSize, size_t dataOffset, size_t doorbellOffset) +//{ +// auto& mm = MemoryManager::get(); + +// if(frameSize > maxFrameSize) { +// logger->error("Requested frame size of {} exceeds max. frame size of {}", +// frameSize, maxFrameSize); +// return false; +// } + +// auto translationFromIp = mm.getTranslation( +// getMasterAddrSpaceByInterface(axiInterface), +// mem.getAddrSpaceId()); + +// // set address of memory block in HLS IP +// XGpu2Rtds_Set_baseaddr(&xInstance, translationFromIp.getLocalAddr(0)); + +// XGpu2Rtds_Set_doorbell_offset(&xInstance, doorbellOffset); +// XGpu2Rtds_Set_data_offset(&xInstance, dataOffset); +// XGpu2Rtds_Set_frame_size(&xInstance, frameSize); + +// // prepare memory with all zeroes +// auto translationFromProcess = mm.getTranslationFromProcess(mem.getAddrSpaceId()); +// auto memory = reinterpret_cast(translationFromProcess.getLocalAddr(0)); +// memset(memory, 0, mem.getSize()); + +// // start IP +// return start(); +//} + + + + + +//bool +//Gpu2Rtds::updateStatus() +//{ +// if(not XGpu2Rtds_Get_status_vld(&xInstance)) +// return false; + +// status.value = XGpu2Rtds_Get_status(&xInstance); + +// return true; +//} + +size_t +Gpu2Rtds::getMaxFrameSize() +{ + *registerFrameSize = 0; + + start(); + while(not isFinished()); + + while(not registerStatusCtrl->status_ap_vld); + + axilite_reg_status_t status = *registerStatus; + +// logger->debug("(*registerStatus).max_frame_size: {}", (*registerStatus).max_frame_size); +// logger->debug("status.max_frame_size: {}", status.max_frame_size); + +// assert(status.max_frame_size == (*registerStatus).max_frame_size); + + return status.max_frame_size; +} + +//void +//Gpu2Rtds::dumpDoorbell(uint32_t doorbellRegister) const +//{ +// auto& doorbell = reinterpret_cast(doorbellRegister); + +// logger->info("Doorbell register: {:#08x}", doorbell.value); +// logger->info(" Valid: {}", (doorbell.is_valid ? "yes" : "no")); +// logger->info(" Count: {}", doorbell.count); +// logger->info(" Seq. number: {}", doorbell.seq_nr); +//} + +Gpu2RtdsFactory::Gpu2RtdsFactory() : + IpNodeFactory(getName()) +{ +} + +} // namespace ip +} // namespace fpga +} // namespace villas diff --git a/fpga/lib/ips/rtds2gpu/rtds2gpu.cpp b/fpga/lib/ips/rtds2gpu/rtds2gpu.cpp new file mode 100644 index 000000000..e89574efa --- /dev/null +++ b/fpga/lib/ips/rtds2gpu/rtds2gpu.cpp @@ -0,0 +1,131 @@ +#include +#include + +#include +#include + +#include "log.hpp" + +namespace villas { +namespace fpga { +namespace ip { + +static Rtds2GpuFactory factory; + +bool Rtds2Gpu::init() +{ + Hls::init(); + + xInstance.IsReady = XIL_COMPONENT_IS_READY; + xInstance.Ctrl_BaseAddress = getBaseAddr(registerMemory); + + status.value = 0; + started = false; + +// maxFrameSize = getMaxFrameSize(); + maxFrameSize = 16; + logger->info("Max. frame size supported: {}", maxFrameSize); + + return true; +} + + + +void Rtds2Gpu::dump(spdlog::level::level_enum logLevel) +{ + const auto baseaddr = XRtds2gpu_Get_baseaddr(&xInstance); + const auto data_offset = XRtds2gpu_Get_data_offset(&xInstance); + const auto doorbell_offset = XRtds2gpu_Get_doorbell_offset(&xInstance); + const auto frame_size = XRtds2gpu_Get_frame_size(&xInstance); + + logger->log(logLevel, "Rtds2Gpu registers (IP base {:#x}):", xInstance.Ctrl_BaseAddress); + logger->log(logLevel, " Base address (bytes): {:#x}", baseaddr); + logger->log(logLevel, " Doorbell offset (bytes): {:#x}", doorbell_offset); + logger->log(logLevel, " Data offset (bytes): {:#x}", data_offset); + logger->log(logLevel, " Frame size (words): {:#x}", frame_size); + logger->log(logLevel, " Status: {:#x}", status.value); + logger->log(logLevel, " Running: {}", (status.is_running ? "yes" : "no")); + logger->log(logLevel, " Frame too short: {}", (status.frame_too_short ? "yes" : "no")); + logger->log(logLevel, " Frame too long: {}", (status.frame_too_long ? "yes" : "no")); + logger->log(logLevel, " Frame size invalid: {}", (status.invalid_frame_size ? "yes" : "no")); + logger->log(logLevel, " Last count: {}", status.last_count); + logger->log(logLevel, " Last seq. number: {}", status.last_seq_nr); + logger->log(logLevel, " Max. frame size: {}", status.max_frame_size); +} + +bool Rtds2Gpu::startOnce(const MemoryBlock& mem, size_t frameSize, size_t dataOffset, size_t doorbellOffset) +{ + auto& mm = MemoryManager::get(); + + if(frameSize > maxFrameSize) { + logger->error("Requested frame size of {} exceeds max. frame size of {}", + frameSize, maxFrameSize); + return false; + } + + auto translationFromIp = mm.getTranslation( + getMasterAddrSpaceByInterface(axiInterface), + mem.getAddrSpaceId()); + + // set address of memory block in HLS IP + XRtds2gpu_Set_baseaddr(&xInstance, translationFromIp.getLocalAddr(0)); + + XRtds2gpu_Set_doorbell_offset(&xInstance, doorbellOffset); + XRtds2gpu_Set_data_offset(&xInstance, dataOffset); + XRtds2gpu_Set_frame_size(&xInstance, frameSize); + + // prepare memory with all zeroes + auto translationFromProcess = mm.getTranslationFromProcess(mem.getAddrSpaceId()); + auto memory = reinterpret_cast(translationFromProcess.getLocalAddr(0)); + memset(memory, 0, mem.getSize()); + + // start IP + return start(); +} + + + + + +bool +Rtds2Gpu::updateStatus() +{ + if(not XRtds2gpu_Get_status_vld(&xInstance)) + return false; + + status.value = XRtds2gpu_Get_status(&xInstance); + + return true; +} + +size_t +Rtds2Gpu::getMaxFrameSize() +{ + XRtds2gpu_Set_frame_size(&xInstance, 0); + + start(); + while(not isFinished()); + updateStatus(); + + return status.max_frame_size; +} + +void +Rtds2Gpu::dumpDoorbell(uint32_t doorbellRegister) const +{ + auto& doorbell = reinterpret_cast(doorbellRegister); + + logger->info("Doorbell register: {:#08x}", doorbell.value); + logger->info(" Valid: {}", (doorbell.is_valid ? "yes" : "no")); + logger->info(" Count: {}", doorbell.count); + logger->info(" Seq. number: {}", doorbell.seq_nr); +} + +Rtds2GpuFactory::Rtds2GpuFactory() : + IpNodeFactory(getName()) +{ +} + +} // namespace ip +} // namespace fpga +} // namespace villas diff --git a/fpga/lib/ips/rtds2gpu/xrtds2gpu.c b/fpga/lib/ips/rtds2gpu/xrtds2gpu.c new file mode 100644 index 000000000..26a5e4f34 --- /dev/null +++ b/fpga/lib/ips/rtds2gpu/xrtds2gpu.c @@ -0,0 +1,221 @@ +// ============================================================== +// File generated by Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC +// Version: 2017.3 +// Copyright (C) 1986-2017 Xilinx, Inc. All Rights Reserved. +// +// ============================================================== + +/***************************** Include Files *********************************/ +#include + +/************************** Function Implementation *************************/ +#ifndef __linux__ +int XRtds2gpu_CfgInitialize(XRtds2gpu *InstancePtr, XRtds2gpu_Config *ConfigPtr) { + Xil_AssertNonvoid(InstancePtr != NULL); + Xil_AssertNonvoid(ConfigPtr != NULL); + + InstancePtr->Ctrl_BaseAddress = ConfigPtr->Ctrl_BaseAddress; + InstancePtr->IsReady = XIL_COMPONENT_IS_READY; + + return XST_SUCCESS; +} +#endif + +void XRtds2gpu_Start(XRtds2gpu *InstancePtr) { + u32 Data; + + Xil_AssertVoid(InstancePtr != NULL); + Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL) & 0x80; + XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL, Data | 0x01); +} + +u32 XRtds2gpu_IsDone(XRtds2gpu *InstancePtr) { + u32 Data; + + Xil_AssertNonvoid(InstancePtr != NULL); + Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL); + return (Data >> 1) & 0x1; +} + +u32 XRtds2gpu_IsIdle(XRtds2gpu *InstancePtr) { + u32 Data; + + Xil_AssertNonvoid(InstancePtr != NULL); + Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL); + return (Data >> 2) & 0x1; +} + +u32 XRtds2gpu_IsReady(XRtds2gpu *InstancePtr) { + u32 Data; + + Xil_AssertNonvoid(InstancePtr != NULL); + Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL); + // check ap_start to see if the pcore is ready for next input + return !(Data & 0x1); +} + +void XRtds2gpu_EnableAutoRestart(XRtds2gpu *InstancePtr) { + Xil_AssertVoid(InstancePtr != NULL); + Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL, 0x80); +} + +void XRtds2gpu_DisableAutoRestart(XRtds2gpu *InstancePtr) { + Xil_AssertVoid(InstancePtr != NULL); + Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL, 0); +} + +void XRtds2gpu_Set_baseaddr(XRtds2gpu *InstancePtr, u32 Data) { + Xil_AssertVoid(InstancePtr != NULL); + Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_BASEADDR_DATA, Data); +} + +u32 XRtds2gpu_Get_baseaddr(XRtds2gpu *InstancePtr) { + u32 Data; + + Xil_AssertNonvoid(InstancePtr != NULL); + Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_BASEADDR_DATA); + return Data; +} + +void XRtds2gpu_Set_data_offset(XRtds2gpu *InstancePtr, u32 Data) { + Xil_AssertVoid(InstancePtr != NULL); + Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_DATA_OFFSET_DATA, Data); +} + +u32 XRtds2gpu_Get_data_offset(XRtds2gpu *InstancePtr) { + u32 Data; + + Xil_AssertNonvoid(InstancePtr != NULL); + Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_DATA_OFFSET_DATA); + return Data; +} + +void XRtds2gpu_Set_doorbell_offset(XRtds2gpu *InstancePtr, u32 Data) { + Xil_AssertVoid(InstancePtr != NULL); + Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_DOORBELL_OFFSET_DATA, Data); +} + +u32 XRtds2gpu_Get_doorbell_offset(XRtds2gpu *InstancePtr) { + u32 Data; + + Xil_AssertNonvoid(InstancePtr != NULL); + Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_DOORBELL_OFFSET_DATA); + return Data; +} + +void XRtds2gpu_Set_frame_size(XRtds2gpu *InstancePtr, u32 Data) { + Xil_AssertVoid(InstancePtr != NULL); + Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_FRAME_SIZE_DATA, Data); +} + +u32 XRtds2gpu_Get_frame_size(XRtds2gpu *InstancePtr) { + u32 Data; + + Xil_AssertNonvoid(InstancePtr != NULL); + Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_FRAME_SIZE_DATA); + return Data; +} + +u32 XRtds2gpu_Get_status(XRtds2gpu *InstancePtr) { + u32 Data; + + Xil_AssertNonvoid(InstancePtr != NULL); + Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_STATUS_DATA); + return Data; +} + +u32 XRtds2gpu_Get_status_vld(XRtds2gpu *InstancePtr) { + u32 Data; + + Xil_AssertNonvoid(InstancePtr != NULL); + Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_STATUS_CTRL); + return Data & 0x1; +} + +void XRtds2gpu_InterruptGlobalEnable(XRtds2gpu *InstancePtr) { + Xil_AssertVoid(InstancePtr != NULL); + Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_GIE, 1); +} + +void XRtds2gpu_InterruptGlobalDisable(XRtds2gpu *InstancePtr) { + Xil_AssertVoid(InstancePtr != NULL); + Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_GIE, 0); +} + +void XRtds2gpu_InterruptEnable(XRtds2gpu *InstancePtr, u32 Mask) { + u32 Register; + + Xil_AssertVoid(InstancePtr != NULL); + Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + Register = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_IER); + XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_IER, Register | Mask); +} + +void XRtds2gpu_InterruptDisable(XRtds2gpu *InstancePtr, u32 Mask) { + u32 Register; + + Xil_AssertVoid(InstancePtr != NULL); + Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + Register = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_IER); + XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_IER, Register & (~Mask)); +} + +void XRtds2gpu_InterruptClear(XRtds2gpu *InstancePtr, u32 Mask) { + Xil_AssertVoid(InstancePtr != NULL); + Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_ISR, Mask); +} + +u32 XRtds2gpu_InterruptGetEnabled(XRtds2gpu *InstancePtr) { + Xil_AssertNonvoid(InstancePtr != NULL); + Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + return XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_IER); +} + +u32 XRtds2gpu_InterruptGetStatus(XRtds2gpu *InstancePtr) { + Xil_AssertNonvoid(InstancePtr != NULL); + Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY); + + return XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_ISR); +} + diff --git a/fpga/lib/ips/switch.cpp b/fpga/lib/ips/switch.cpp index d2365382c..2d4fa0a15 100644 --- a/fpga/lib/ips/switch.cpp +++ b/fpga/lib/ips/switch.cpp @@ -143,7 +143,7 @@ AxiStreamSwitchFactory::configureJson(IpCore& ip, json_t* json_ip) auto logger = getLogger(); - auto& axiSwitch = reinterpret_cast(ip); + auto& axiSwitch = dynamic_cast(ip); if(json_unpack(json_ip, "{ s: i }", "num_ports", &axiSwitch.num_ports) != 0) { logger->error("Cannot parse 'num_ports'"); diff --git a/fpga/scripts/hwdef-parse.py b/fpga/scripts/hwdef-parse.py index f999b5c01..e548ac1d6 100755 --- a/fpga/scripts/hwdef-parse.py +++ b/fpga/scripts/hwdef-parse.py @@ -36,7 +36,9 @@ whitelist = [ [ 'xilinx.com', 'ip', 'axi_gpio' ], [ 'xilinx.com', 'ip', 'axi_bram_ctrl' ], [ 'xilinx.com', 'ip', 'axis_data_fifo' ], - [ 'xilinx.com', 'ip', 'axi_pcie' ] + [ 'xilinx.com', 'ip', 'axi_pcie' ], + [ 'xilinx.com', 'hls', 'rtds2gpu' ], + [ 'xilinx.com', 'hls', 'mem' ] ] # List of VLNI ids of AXI4-Stream infrastructure IP cores which do not alter data diff --git a/fpga/tests/CMakeLists.txt b/fpga/tests/CMakeLists.txt index 5db87d33f..1135d90db 100644 --- a/fpga/tests/CMakeLists.txt +++ b/fpga/tests/CMakeLists.txt @@ -27,11 +27,13 @@ set(SOURCES dma.cpp fifo.cpp rtds.cpp + rtds2gpu.cpp timer.cpp ) if(CMAKE_CUDA_COMPILER) - list(APPEND SOURCES gpu.cpp) + enable_language(CUDA) + list(APPEND SOURCES gpu.cpp gpu_kernels.cu) endif() add_executable(unit-tests ${SOURCES}) diff --git a/fpga/tests/dma.cpp b/fpga/tests/dma.cpp index 63207748f..34a42f72b 100644 --- a/fpga/tests/dma.cpp +++ b/fpga/tests/dma.cpp @@ -46,7 +46,7 @@ Test(fpga, dma, .description = "DMA") logger->info("Testing {}", *ip); - auto dma = reinterpret_cast(*ip); + auto dma = dynamic_cast(*ip); if(not dma.loopbackPossible()) { logger->info("Loopback test not possible for {}", *ip); diff --git a/fpga/tests/fifo.cpp b/fpga/tests/fifo.cpp index 5ef8e9653..805641352 100644 --- a/fpga/tests/fifo.cpp +++ b/fpga/tests/fifo.cpp @@ -46,7 +46,7 @@ Test(fpga, fifo, .description = "FIFO") logger->info("Testing {}", *ip); - auto fifo = reinterpret_cast(*ip); + auto fifo = dynamic_cast(*ip); if(not fifo.connectLoopback()) { continue; diff --git a/fpga/tests/gpu.cpp b/fpga/tests/gpu.cpp index 6bb3a7dd8..739da2f80 100644 --- a/fpga/tests/gpu.cpp +++ b/fpga/tests/gpu.cpp @@ -62,7 +62,7 @@ Test(fpga, gpu_dma, .description = "GPU DMA tests") logger->info("Testing {}", *ip); - auto bram = reinterpret_cast(ip.get()); + auto bram = dynamic_cast(ip.get()); cr_assert_not_null(bram, "Couldn't find BRAM"); count++; diff --git a/fpga/tests/gpu_kernels.cu b/fpga/tests/gpu_kernels.cu new file mode 100644 index 000000000..3f5b8b0d4 --- /dev/null +++ b/fpga/tests/gpu_kernels.cu @@ -0,0 +1,75 @@ +#include +#include + +#include +#include + +#include +#include + + + +__global__ void +gpu_rtds_rtt_kernel(volatile uint32_t* dataIn, volatile reg_doorbell_t* doorbellIn, + volatile uint32_t* dataOut, volatile villas::fpga::ip::ControlRegister* controlRegister, + int* run) +{ + printf("[gpu] gpu kernel go\n"); + + printf("dataIn: %p\n", dataIn); + printf("doorbellIn: %p\n", doorbellIn); + printf("dataOut: %p\n", dataOut); + printf("control: %p\n", controlRegister); + printf("run: %p\n", run); + +// *run = reinterpret_cast(malloc(sizeof(bool))); +// **run = true; + + uint32_t last_seq; + while(*run) { + // wait for data +// printf("[gpu] wait for data, last_seq=%u\n", last_seq); + while(not (doorbellIn->is_valid and (last_seq != doorbellIn->seq_nr)) and *run); +// printf("doorbell: 0x%08x\n", doorbellIn->value); + + last_seq = doorbellIn->seq_nr; + +// printf("[gpu] copy data\n"); + for(size_t i = 0; i < doorbellIn->count; i++) { + dataOut[i] = dataIn[i]; + } + + // reset doorbell +// printf("[gpu] reset doorbell\n"); +// doorbellIn->value = 0; + +// printf("[gpu] signal go for gpu2rtds\n"); + controlRegister->ap_start = 1; + } + + printf("kernel done\n"); +} + +static int* run = nullptr; + +void gpu_rtds_rtt_start(volatile uint32_t* dataIn, volatile reg_doorbell_t* doorbellIn, + volatile uint32_t* dataOut, volatile villas::fpga::ip::ControlRegister* controlRegister) +{ + printf("run: %p\n", run); + if(run == nullptr) { + run = (int*)malloc(sizeof(uint32_t)); + cudaHostRegister(run, sizeof(uint32_t), 0); + } + printf("run: %p\n", run); + + + *run = 1; + gpu_rtds_rtt_kernel<<<1, 1>>>(dataIn, doorbellIn, dataOut, controlRegister, run); + printf("[cpu] kernel launched\n"); +} + +void gpu_rtds_rtt_stop() +{ + *run = 0; + cudaDeviceSynchronize(); +} diff --git a/fpga/tests/rtds.cpp b/fpga/tests/rtds.cpp index 0b1067cdd..df88cacdb 100644 --- a/fpga/tests/rtds.cpp +++ b/fpga/tests/rtds.cpp @@ -80,8 +80,12 @@ Test(fpga, rtds, .description = "RTDS") auto dmaMaster = dma->getMasterPort(dma->mm2sPort); auto dmaSlave = dma->getSlavePort(dma->s2mmPort); - rtds->connect(rtdsMaster, dmaSlave); - dma->connect(dmaMaster, rtdsSlave); +// rtds->connect(*rtds); +// logger->info("loopback"); +// while(1); + +// rtds->connect(rtdsMaster, dmaSlave); +// dma->connect(dmaMaster, rtdsSlave); auto mem = villas::HostRam::getAllocator().allocate(0x100 / sizeof(int32_t)); diff --git a/fpga/tests/rtds2gpu.cpp b/fpga/tests/rtds2gpu.cpp new file mode 100644 index 000000000..cc3def8ce --- /dev/null +++ b/fpga/tests/rtds2gpu.cpp @@ -0,0 +1,349 @@ +/** FIFO unit test. + * + * @file + * @author Steffen Vogel + * @copyright 2017, Steffen Vogel + * @license GNU General Public License (version 3) + * + * VILLASfpga + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + *********************************************************************************/ + +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "global.hpp" + + +static constexpr size_t SAMPLE_SIZE = 4; +static constexpr size_t SAMPLE_COUNT = 1; +static constexpr size_t FRAME_SIZE = SAMPLE_COUNT * SAMPLE_SIZE; + +static constexpr size_t DOORBELL_OFFSET = SAMPLE_COUNT; +static constexpr size_t DATA_OFFSET = 0; + +static void dumpMem(const uint32_t* addr, size_t len) +{ + const size_t bytesPerLine = 16; + const size_t lines = (len) / bytesPerLine + 1; + const uint8_t* buf = reinterpret_cast(addr); + + size_t bytesRead = 0; + + for(size_t line = 0; line < lines; line++) { + const unsigned base = line * bytesPerLine; + printf("0x%04x: ", base); + + for(size_t i = 0; i < bytesPerLine && bytesRead < len; i++) { + printf("0x%02x ", buf[base + i]); + bytesRead++; + } + puts(""); + } +} + +Test(fpga, rtds2gpu_loopback_dma, .description = "Rtds2Gpu") +{ + auto logger = loggerGetOrCreate("unittest:rtds2gpu"); + + for(auto& ip : state.cards.front()->ips) { + if(*ip != villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:rtds2gpu:")) + continue; + + logger->info("Testing {}", *ip); + + + /* Collect neccessary IPs */ + + auto rtds2gpu = dynamic_cast(*ip); + + auto axiSwitch = dynamic_cast( + state.cards.front()->lookupIp(villas::fpga::Vlnv("xilinx.com:ip:axis_switch:"))); + + auto dma = dynamic_cast( + state.cards.front()->lookupIp(villas::fpga::Vlnv("xilinx.com:ip:axi_dma:"))); + + auto gpu2rtds = dynamic_cast( + state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:gpu2rtds:"))); + + auto rtds = dynamic_cast( + state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:user:rtds_axis:"))); + + + cr_assert_not_null(axiSwitch, "No AXI switch IP found"); + cr_assert_not_null(dma, "No DMA IP found"); + cr_assert_not_null(gpu2rtds, "No Gpu2Rtds IP found"); + cr_assert_not_null(rtds, "RTDS IP not found"); + + rtds2gpu.dump(spdlog::level::debug); + gpu2rtds->dump(spdlog::level::debug); + + + /* Allocate and prepare memory */ + + // allocate space for all samples and doorbell register + auto dmaMemSrc = villas::HostDmaRam::getAllocator(0).allocate(SAMPLE_COUNT + 1); + auto dmaMemDst = villas::HostDmaRam::getAllocator(0).allocate(SAMPLE_COUNT + 1); + auto dmaMemDst2 = villas::HostDmaRam::getAllocator(0).allocate(SAMPLE_COUNT + 1); + + + memset(&dmaMemSrc, 0x11, dmaMemSrc.getMemoryBlock().getSize()); + memset(&dmaMemDst, 0x55, dmaMemDst.getMemoryBlock().getSize()); + memset(&dmaMemDst2, 0x77, dmaMemDst2.getMemoryBlock().getSize()); + + const uint32_t* dataSrc = &dmaMemSrc[DATA_OFFSET]; + const uint32_t* dataDst = &dmaMemDst[DATA_OFFSET]; + const uint32_t* dataDst2 = &dmaMemDst2[0]; + + dumpMem(dataSrc, dmaMemSrc.getMemoryBlock().getSize()); + dumpMem(dataDst, dmaMemDst.getMemoryBlock().getSize()); + dumpMem(dataDst2, dmaMemDst2.getMemoryBlock().getSize()); + + + // connect AXI Stream from DMA to Rtds2Gpu IP + cr_assert(dma->connect(rtds2gpu)); + + cr_assert(rtds2gpu.startOnce(dmaMemDst.getMemoryBlock(), SAMPLE_COUNT, DATA_OFFSET * 4, DOORBELL_OFFSET * 4), + "Preparing Rtds2Gpu IP failed"); + + cr_assert(dma->write(dmaMemSrc.getMemoryBlock(), FRAME_SIZE), + "Starting DMA MM2S transfer failed"); + + cr_assert(dma->writeComplete(), + "DMA failed"); + + while(not rtds2gpu.isFinished()); + + const uint32_t* doorbellDst = &dmaMemDst[DOORBELL_OFFSET]; + rtds2gpu.dump(spdlog::level::info); + rtds2gpu.dumpDoorbell(*doorbellDst); + + cr_assert(memcmp(dataSrc, dataDst, FRAME_SIZE) == 0, "Memory not equal"); + + + for(size_t i = 0; i < SAMPLE_COUNT; i++) { + gpu2rtds->registerFrames[i] = dmaMemDst[i]; + } + + + // connect AXI Stream from Gpu2Rtds IP to DMA + cr_assert(gpu2rtds->connect(*dma)); + + cr_assert(dma->read(dmaMemDst2.getMemoryBlock(), FRAME_SIZE), + "Starting DMA S2MM transfer failed"); + + cr_assert(gpu2rtds->startOnce(SAMPLE_COUNT), + "Preparing Gpu2Rtds IP failed"); + + cr_assert(dma->readComplete(), + "DMA failed"); + + while(not gpu2rtds->isFinished()); + + cr_assert(memcmp(dataSrc, dataDst2, FRAME_SIZE) == 0, "Memory not equal"); + + dumpMem(dataSrc, dmaMemSrc.getMemoryBlock().getSize()); + dumpMem(dataDst, dmaMemDst.getMemoryBlock().getSize()); + dumpMem(dataDst2, dmaMemDst2.getMemoryBlock().getSize()); + + logger->info(TXT_GREEN("Passed")); + } +} + +Test(fpga, rtds2gpu_rtt_cpu, .description = "Rtds2Gpu RTT via CPU") +{ + auto logger = loggerGetOrCreate("unittest:rtds2gpu"); + + /* Collect neccessary IPs */ + + auto gpu2rtds = dynamic_cast( + state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:gpu2rtds:"))); + + auto rtds2gpu = dynamic_cast( + state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:rtds2gpu:"))); + + cr_assert_not_null(gpu2rtds, "No Gpu2Rtds IP found"); + cr_assert_not_null(rtds2gpu, "No Rtds2Gpu IP not found"); + + for(auto& ip : state.cards.front()->ips) { + if(*ip != villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:user:rtds_axis:")) + continue; + + auto& rtds = dynamic_cast(*ip); + logger->info("Testing {}", rtds); + + auto dmaRam = villas::HostDmaRam::getAllocator().allocate(SAMPLE_COUNT + 1); + uint32_t* data = &dmaRam[DATA_OFFSET]; + uint32_t* doorbell = &dmaRam[DOORBELL_OFFSET]; + + // TEST: rtds loopback via switch, this should always work and have RTT=1 + //cr_assert(rtds.connect(rtds)); + //logger->info("loopback"); + //while(1); + + cr_assert(rtds.connect(*rtds2gpu)); + cr_assert(gpu2rtds->connect(rtds)); + + + for(size_t i = 1; i <= 10000; ) { + rtds2gpu->doorbellReset(*doorbell); + rtds2gpu->startOnce(dmaRam.getMemoryBlock(), SAMPLE_COUNT, DATA_OFFSET * 4, DOORBELL_OFFSET * 4); + + // Wait by polling rtds2gpu IP or ... + // while(not rtds2gpu->isFinished()); + + // Wait by polling (local) doorbell register (= just memory) + while(not rtds2gpu->doorbellIsValid(*doorbell)); + + + // copy samples to gpu2rtds IP + for(size_t i = 0; i < SAMPLE_COUNT; i++) { + gpu2rtds->registerFrames[i] = data[i]; + } + + // Waiting for gpu2rtds is not strictly required + gpu2rtds->startOnce(SAMPLE_COUNT); + //while(not gpu2rtds->isFinished()); + + if(i % 1000 == 0) { + logger->info("Successful iterations {}, data {}", i, data[0]); + rtds2gpu->dump(); + rtds2gpu->dumpDoorbell(data[1]); + } + } + + logger->info(TXT_GREEN("Passed")); + } +} + +void gpu_rtds_rtt_start(volatile uint32_t* dataIn, volatile reg_doorbell_t* doorbellIn, + volatile uint32_t* dataOut, volatile villas::fpga::ip::ControlRegister* controlRegister); + +void gpu_rtds_rtt_stop(); + +Test(fpga, rtds2gpu_rtt_gpu, .description = "Rtds2Gpu RTT via GPU") +{ + auto logger = loggerGetOrCreate("unittest:rtds2gpu"); + + /* Collect neccessary IPs */ + + auto gpu2rtds = dynamic_cast( + state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:gpu2rtds:"))); + + auto rtds2gpu = dynamic_cast( + state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:rtds2gpu:"))); + + cr_assert_not_null(gpu2rtds, "No Gpu2Rtds IP found"); + cr_assert_not_null(rtds2gpu, "No Rtds2Gpu IP not found"); + + villas::Plugin* plugin = villas::Plugin::lookup(villas::Plugin::Type::Gpu, ""); + auto gpuPlugin = dynamic_cast(plugin); + cr_assert_not_null(gpuPlugin, "No GPU plugin found"); + + auto gpus = gpuPlugin->make(); + cr_assert(gpus.size() > 0, "No GPUs found"); + + // just get first cpu + auto& gpu = gpus.front(); + + // allocate memory on GPU and make accessible by to PCIe/FPGA + auto gpuRam = gpu->getAllocator().allocate(SAMPLE_COUNT + 1); + cr_assert(gpu->makeAccessibleToPCIeAndVA(gpuRam.getMemoryBlock())); + + // make Gpu2Rtds IP register memory on FPGA accessible to GPU + cr_assert(gpu->makeAccessibleFromPCIeOrHostRam(gpu2rtds->getRegisterMemory())); + + auto tr = gpu->translate(gpuRam.getMemoryBlock()); + + auto dataIn = reinterpret_cast(tr.getLocalAddr(DATA_OFFSET * sizeof(uint32_t))); + auto doorbellIn = reinterpret_cast(tr.getLocalAddr(DOORBELL_OFFSET * sizeof(uint32_t))); + + + auto gpu2rtdsRegisters = gpu->translate(gpu2rtds->getRegisterMemory()); + + auto frameRegister = reinterpret_cast(gpu2rtdsRegisters.getLocalAddr(gpu2rtds->registerFrameOffset)); + auto controlRegister = reinterpret_cast(gpu2rtdsRegisters.getLocalAddr(gpu2rtds->registerControlAddr)); + +// auto doorbellInCpu = reinterpret_cast(&gpuRam[DOORBELL_OFFSET]); + + for(auto& ip : state.cards.front()->ips) { + if(*ip != villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:user:rtds_axis:")) + continue; + + auto& rtds = dynamic_cast(*ip); + logger->info("Testing {}", rtds); + + + // TEST: rtds loopback via switch, this should always work and have RTT=1 + //cr_assert(rtds.connect(rtds)); + //logger->info("loopback"); + //while(1); + + cr_assert(rtds.connect(*rtds2gpu)); + cr_assert(gpu2rtds->connect(rtds)); + + // launch once so they are configured + cr_assert(rtds2gpu->startOnce(gpuRam.getMemoryBlock(), SAMPLE_COUNT, DATA_OFFSET * 4, DOORBELL_OFFSET * 4)); + cr_assert(gpu2rtds->startOnce(SAMPLE_COUNT)); + + rtds2gpu->setAutoRestart(true); + rtds2gpu->start(); + + logger->info("GPU RTT RTDS"); + + std::string dummy; + +// logger->info("Press enter to proceed"); +// std::cin >> dummy; + + gpu_rtds_rtt_start(dataIn, doorbellIn, frameRegister, controlRegister); + +// while(1) { +// cr_assert(rtds2gpu->startOnce(gpuRam.getMemoryBlock(), SAMPLE_COUNT, DATA_OFFSET * 4, DOORBELL_OFFSET * 4)); +// } + +// for(int i = 0; i < 10000; i++) { +// while(not doorbellInCpu->is_valid); +// logger->debug("received data"); +// } + +// logger->info("Press enter to cancel"); +// std::cin >> dummy; + + while(1) { + sleep(1); +// logger->debug("Current sequence number: {}", doorbellInCpu->seq_nr); + logger->debug("Still running"); + } + + gpu_rtds_rtt_stop(); + + + + logger->info(TXT_GREEN("Passed")); + } +} diff --git a/fpga/tests/timer.cpp b/fpga/tests/timer.cpp index d5f96a446..dd7ccff02 100644 --- a/fpga/tests/timer.cpp +++ b/fpga/tests/timer.cpp @@ -45,7 +45,7 @@ Test(fpga, timer, .description = "Timer Counter") count++; - auto timer = reinterpret_cast(*ip); + auto timer = dynamic_cast(*ip); logger->info("Test simple waiting"); timer.start(timer.getFrequency() / 10);