mirror of
https://git.rwth-aachen.de/acs/public/villas/node/
synced 2025-03-09 00:00:00 +01:00
Merge branch 'feature/hls-rtds2gpu' into develop
This commit is contained in:
commit
2112038d70
32 changed files with 1900 additions and 164 deletions
|
@ -1 +1 @@
|
|||
Subproject commit dd7d75d0aab3801d65f9ff757d82f47f705514af
|
||||
Subproject commit 9747c6ead6dedff943dbf22ce74e40e9b2622514
|
|
@ -33,90 +33,6 @@
|
|||
"hier_0_axi_dma_axi_dma_0": {
|
||||
"vlnv": "xilinx.com:ip:axi_dma:7.1",
|
||||
"memory-view": {
|
||||
"M_AXI_SG": {
|
||||
"bram_0_axi_bram_ctrl_0": {
|
||||
"Mem0": {
|
||||
"baseaddr": 0,
|
||||
"highaddr": 8191,
|
||||
"size": 8192
|
||||
}
|
||||
},
|
||||
"hier_0_axi_dma_axi_dma_1": {
|
||||
"Reg": {
|
||||
"baseaddr": 8192,
|
||||
"highaddr": 12287,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"hier_0_axi_dma_axi_dma_0": {
|
||||
"Reg": {
|
||||
"baseaddr": 12288,
|
||||
"highaddr": 16383,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"timer_0_axi_timer_0": {
|
||||
"Reg": {
|
||||
"baseaddr": 16384,
|
||||
"highaddr": 20479,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"hier_0_axis_interconnect_0_axis_interconnect_0_xbar": {
|
||||
"Reg": {
|
||||
"baseaddr": 20480,
|
||||
"highaddr": 24575,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"hier_0_axi_fifo_mm_s_0": {
|
||||
"Mem0": {
|
||||
"baseaddr": 24576,
|
||||
"highaddr": 28671,
|
||||
"size": 4096
|
||||
},
|
||||
"Mem1": {
|
||||
"baseaddr": 49152,
|
||||
"highaddr": 57343,
|
||||
"size": 8192
|
||||
}
|
||||
},
|
||||
"pcie_0_axi_reset_0": {
|
||||
"Reg": {
|
||||
"baseaddr": 28672,
|
||||
"highaddr": 32767,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"hier_0_rtds_axis_0": {
|
||||
"reg0": {
|
||||
"baseaddr": 32768,
|
||||
"highaddr": 36863,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"hier_0_hls_dft_0": {
|
||||
"Reg": {
|
||||
"baseaddr": 36864,
|
||||
"highaddr": 40959,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"pcie_0_axi_pcie_intc_0": {
|
||||
"Reg": {
|
||||
"baseaddr": 45056,
|
||||
"highaddr": 49151,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"pcie_0_axi_pcie_0": {
|
||||
"CTL0": {
|
||||
"baseaddr": 268435456,
|
||||
"highaddr": 536870911,
|
||||
"size": 268435456
|
||||
}
|
||||
}
|
||||
},
|
||||
"M_AXI_MM2S": {
|
||||
"pcie_0_axi_pcie_0": {
|
||||
"BAR0": {
|
||||
|
@ -134,17 +50,115 @@
|
|||
"size": 4294967296
|
||||
}
|
||||
}
|
||||
},
|
||||
"M_AXI_SG": {
|
||||
"hier_0_axi_dma_axi_dma_0": {
|
||||
"Reg": {
|
||||
"baseaddr": 4096,
|
||||
"highaddr": 8191,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"hier_0_axi_dma_axi_dma_1": {
|
||||
"Reg": {
|
||||
"baseaddr": 8192,
|
||||
"highaddr": 12287,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"hier_0_axi_fifo_mm_s_0": {
|
||||
"Mem0": {
|
||||
"baseaddr": 12288,
|
||||
"highaddr": 16383,
|
||||
"size": 4096
|
||||
},
|
||||
"Mem1": {
|
||||
"baseaddr": 16384,
|
||||
"highaddr": 24575,
|
||||
"size": 8192
|
||||
}
|
||||
},
|
||||
"pcie_0_axi_pcie_intc_0": {
|
||||
"Reg": {
|
||||
"baseaddr": 24576,
|
||||
"highaddr": 28671,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"pcie_0_axi_reset_0": {
|
||||
"Reg": {
|
||||
"baseaddr": 28672,
|
||||
"highaddr": 32767,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"timer_0_axi_timer_0": {
|
||||
"Reg": {
|
||||
"baseaddr": 32768,
|
||||
"highaddr": 36863,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"hier_0_hls_dft_0": {
|
||||
"Reg": {
|
||||
"baseaddr": 36864,
|
||||
"highaddr": 40959,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"hier_0_rtds_axis_0": {
|
||||
"reg0": {
|
||||
"baseaddr": 40960,
|
||||
"highaddr": 45055,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"hier_0_axis_interconnect_0_axis_interconnect_0_xbar": {
|
||||
"Reg": {
|
||||
"baseaddr": 45056,
|
||||
"highaddr": 49151,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"bram_0_axi_bram_ctrl_0": {
|
||||
"Mem0": {
|
||||
"baseaddr": 49152,
|
||||
"highaddr": 57343,
|
||||
"size": 8192
|
||||
}
|
||||
},
|
||||
"hier_0_rtds2gpu_0": {
|
||||
"Reg": {
|
||||
"baseaddr": 57344,
|
||||
"highaddr": 61439,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"hier_0_gpu2rtds_0": {
|
||||
"Reg": {
|
||||
"baseaddr": 61440,
|
||||
"highaddr": 65535,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"pcie_0_axi_pcie_0": {
|
||||
"CTL0": {
|
||||
"baseaddr": 268435456,
|
||||
"highaddr": 536870911,
|
||||
"size": 268435456
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"ports": [
|
||||
{
|
||||
"role": "master",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:1",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S01_AXIS",
|
||||
"name": "MM2S"
|
||||
},
|
||||
{
|
||||
"role": "slave",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:1",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M01_AXIS",
|
||||
"name": "S2MM"
|
||||
}
|
||||
],
|
||||
|
@ -178,12 +192,12 @@
|
|||
"ports": [
|
||||
{
|
||||
"role": "master",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:6",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S06_AXIS",
|
||||
"name": "MM2S"
|
||||
},
|
||||
{
|
||||
"role": "slave",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:6",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M06_AXIS",
|
||||
"name": "S2MM"
|
||||
}
|
||||
],
|
||||
|
@ -197,12 +211,12 @@
|
|||
"ports": [
|
||||
{
|
||||
"role": "master",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:2",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S02_AXIS",
|
||||
"name": "STR_TXD"
|
||||
},
|
||||
{
|
||||
"role": "slave",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:2",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M02_AXIS",
|
||||
"name": "STR_RXD"
|
||||
}
|
||||
],
|
||||
|
@ -210,43 +224,143 @@
|
|||
"interrupt": "pcie_0_axi_pcie_intc_0:2"
|
||||
}
|
||||
},
|
||||
"hier_0_axis_data_fifo_0": {
|
||||
"vlnv": "xilinx.com:ip:axis_data_fifo:1.1",
|
||||
"ports": [
|
||||
{
|
||||
"role": "master",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S03_AXIS",
|
||||
"name": "AXIS"
|
||||
},
|
||||
{
|
||||
"role": "slave",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M03_AXIS",
|
||||
"name": "AXIS"
|
||||
}
|
||||
]
|
||||
},
|
||||
"hier_0_axis_data_fifo_1": {
|
||||
"vlnv": "xilinx.com:ip:axis_data_fifo:1.1",
|
||||
"ports": [
|
||||
{
|
||||
"role": "master",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S04_AXIS",
|
||||
"name": "AXIS"
|
||||
},
|
||||
{
|
||||
"role": "slave",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M04_AXIS",
|
||||
"name": "AXIS"
|
||||
}
|
||||
]
|
||||
},
|
||||
"hier_0_axis_interconnect_0_axis_interconnect_0_xbar": {
|
||||
"vlnv": "xilinx.com:ip:axis_switch:1.1",
|
||||
"ports": [
|
||||
{
|
||||
"role": "slave",
|
||||
"target": "hier_0_rtds_axis_0:m_axis",
|
||||
"name": "S00_AXIS"
|
||||
},
|
||||
{
|
||||
"role": "master",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:3",
|
||||
"name": "M03_AXIS"
|
||||
"target": "hier_0_rtds_axis_0:s_axis",
|
||||
"name": "M00_AXIS"
|
||||
},
|
||||
{
|
||||
"role": "slave",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:3",
|
||||
"target": "hier_0_axi_dma_axi_dma_0:MM2S",
|
||||
"name": "S01_AXIS"
|
||||
},
|
||||
{
|
||||
"role": "master",
|
||||
"target": "hier_0_axi_dma_axi_dma_0:S2MM",
|
||||
"name": "M01_AXIS"
|
||||
},
|
||||
{
|
||||
"role": "slave",
|
||||
"target": "hier_0_axi_fifo_mm_s_0:STR_TXD",
|
||||
"name": "S02_AXIS"
|
||||
},
|
||||
{
|
||||
"role": "master",
|
||||
"target": "hier_0_axi_fifo_mm_s_0:STR_RXD",
|
||||
"name": "M02_AXIS"
|
||||
},
|
||||
{
|
||||
"role": "slave",
|
||||
"target": "hier_0_axis_data_fifo_0:AXIS",
|
||||
"name": "S03_AXIS"
|
||||
},
|
||||
{
|
||||
"role": "master",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:4",
|
||||
"target": "hier_0_axis_data_fifo_0:AXIS",
|
||||
"name": "M03_AXIS"
|
||||
},
|
||||
{
|
||||
"role": "slave",
|
||||
"target": "hier_0_axis_data_fifo_1:AXIS",
|
||||
"name": "S04_AXIS"
|
||||
},
|
||||
{
|
||||
"role": "master",
|
||||
"target": "hier_0_axis_data_fifo_1:AXIS",
|
||||
"name": "M04_AXIS"
|
||||
},
|
||||
{
|
||||
"role": "slave",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:4",
|
||||
"name": "S04_AXIS"
|
||||
"target": "hier_0_hls_dft_0:output_r",
|
||||
"name": "S05_AXIS"
|
||||
},
|
||||
{
|
||||
"role": "master",
|
||||
"target": "hier_0_hls_dft_0:input_r",
|
||||
"name": "M05_AXIS"
|
||||
},
|
||||
{
|
||||
"role": "slave",
|
||||
"target": "hier_0_axi_dma_axi_dma_1:MM2S",
|
||||
"name": "S06_AXIS"
|
||||
},
|
||||
{
|
||||
"role": "master",
|
||||
"target": "hier_0_axi_dma_axi_dma_1:S2MM",
|
||||
"name": "M06_AXIS"
|
||||
},
|
||||
{
|
||||
"role": "slave",
|
||||
"target": "hier_0_gpu2rtds_0:rtds_output",
|
||||
"name": "S07_AXIS"
|
||||
},
|
||||
{
|
||||
"role": "master",
|
||||
"target": "hier_0_rtds2gpu_0:rtds_input",
|
||||
"name": "M07_AXIS"
|
||||
}
|
||||
],
|
||||
"num_ports": 7
|
||||
"num_ports": 8
|
||||
},
|
||||
"hier_0_gpu2rtds_0": {
|
||||
"vlnv": "acs.eonerc.rwth-aachen.de:hls:gpu2rtds:1.0",
|
||||
"ports": [
|
||||
{
|
||||
"role": "master",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S07_AXIS",
|
||||
"name": "rtds_output"
|
||||
}
|
||||
]
|
||||
},
|
||||
"hier_0_hls_dft_0": {
|
||||
"vlnv": "acs.eonerc.rwth-aachen.de:hls:hls_dft:1.1",
|
||||
"ports": [
|
||||
{
|
||||
"role": "master",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:5",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S05_AXIS",
|
||||
"name": "output_r"
|
||||
},
|
||||
{
|
||||
"role": "slave",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:5",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M05_AXIS",
|
||||
"name": "input_r"
|
||||
}
|
||||
],
|
||||
|
@ -254,17 +368,38 @@
|
|||
"interrupt": "pcie_0_axi_pcie_intc_0:1"
|
||||
}
|
||||
},
|
||||
"hier_0_rtds2gpu_0": {
|
||||
"vlnv": "acs.eonerc.rwth-aachen.de:hls:rtds2gpu:1.1",
|
||||
"memory-view": {
|
||||
"m_axi_axi_mm": {
|
||||
"pcie_0_axi_pcie_0": {
|
||||
"BAR0": {
|
||||
"baseaddr": 0,
|
||||
"highaddr": 4294967295,
|
||||
"size": 4294967296
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"ports": [
|
||||
{
|
||||
"role": "slave",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M07_AXIS",
|
||||
"name": "rtds_input"
|
||||
}
|
||||
]
|
||||
},
|
||||
"hier_0_rtds_axis_0": {
|
||||
"vlnv": "acs.eonerc.rwth-aachen.de:user:rtds_axis:1.0",
|
||||
"ports": [
|
||||
{
|
||||
"role": "master",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:0",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S00_AXIS",
|
||||
"name": "m_axis"
|
||||
},
|
||||
{
|
||||
"role": "slave",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:0",
|
||||
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M00_AXIS",
|
||||
"name": "s_axis"
|
||||
}
|
||||
],
|
||||
|
@ -278,11 +413,11 @@
|
|||
"vlnv": "xilinx.com:ip:axi_pcie:2.8",
|
||||
"memory-view": {
|
||||
"M_AXI": {
|
||||
"bram_0_axi_bram_ctrl_0": {
|
||||
"Mem0": {
|
||||
"baseaddr": 0,
|
||||
"hier_0_axi_dma_axi_dma_0": {
|
||||
"Reg": {
|
||||
"baseaddr": 4096,
|
||||
"highaddr": 8191,
|
||||
"size": 8192
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"hier_0_axi_dma_axi_dma_1": {
|
||||
|
@ -292,37 +427,23 @@
|
|||
"size": 4096
|
||||
}
|
||||
},
|
||||
"hier_0_axi_dma_axi_dma_0": {
|
||||
"Reg": {
|
||||
"hier_0_axi_fifo_mm_s_0": {
|
||||
"Mem0": {
|
||||
"baseaddr": 12288,
|
||||
"highaddr": 16383,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"timer_0_axi_timer_0": {
|
||||
"Reg": {
|
||||
},
|
||||
"Mem1": {
|
||||
"baseaddr": 16384,
|
||||
"highaddr": 20479,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"hier_0_axis_interconnect_0_axis_interconnect_0_xbar": {
|
||||
"Reg": {
|
||||
"baseaddr": 20480,
|
||||
"highaddr": 24575,
|
||||
"size": 4096
|
||||
"size": 8192
|
||||
}
|
||||
},
|
||||
"hier_0_axi_fifo_mm_s_0": {
|
||||
"Mem0": {
|
||||
"pcie_0_axi_pcie_intc_0": {
|
||||
"Reg": {
|
||||
"baseaddr": 24576,
|
||||
"highaddr": 28671,
|
||||
"size": 4096
|
||||
},
|
||||
"Mem1": {
|
||||
"baseaddr": 49152,
|
||||
"highaddr": 57343,
|
||||
"size": 8192
|
||||
}
|
||||
},
|
||||
"pcie_0_axi_reset_0": {
|
||||
|
@ -332,8 +453,8 @@
|
|||
"size": 4096
|
||||
}
|
||||
},
|
||||
"hier_0_rtds_axis_0": {
|
||||
"reg0": {
|
||||
"timer_0_axi_timer_0": {
|
||||
"Reg": {
|
||||
"baseaddr": 32768,
|
||||
"highaddr": 36863,
|
||||
"size": 4096
|
||||
|
@ -346,13 +467,41 @@
|
|||
"size": 4096
|
||||
}
|
||||
},
|
||||
"pcie_0_axi_pcie_intc_0": {
|
||||
"hier_0_rtds_axis_0": {
|
||||
"reg0": {
|
||||
"baseaddr": 40960,
|
||||
"highaddr": 45055,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"hier_0_axis_interconnect_0_axis_interconnect_0_xbar": {
|
||||
"Reg": {
|
||||
"baseaddr": 45056,
|
||||
"highaddr": 49151,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"bram_0_axi_bram_ctrl_0": {
|
||||
"Mem0": {
|
||||
"baseaddr": 49152,
|
||||
"highaddr": 57343,
|
||||
"size": 8192
|
||||
}
|
||||
},
|
||||
"hier_0_rtds2gpu_0": {
|
||||
"Reg": {
|
||||
"baseaddr": 57344,
|
||||
"highaddr": 61439,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"hier_0_gpu2rtds_0": {
|
||||
"Reg": {
|
||||
"baseaddr": 61440,
|
||||
"highaddr": 65535,
|
||||
"size": 4096
|
||||
}
|
||||
},
|
||||
"pcie_0_axi_pcie_0": {
|
||||
"CTL0": {
|
||||
"baseaddr": 268435456,
|
||||
|
|
|
@ -71,8 +71,8 @@ public:
|
|||
const std::string& port,
|
||||
bool isMaster)
|
||||
{
|
||||
for(auto& [vertexId, vertex] : vertices) {
|
||||
(void) vertexId;
|
||||
for(auto& vertexEntry : vertices) {
|
||||
auto& vertex = vertexEntry.second;
|
||||
if(vertex->nodeName == node and vertex->portName == port and vertex->isMaster == isMaster)
|
||||
return vertex;
|
||||
}
|
||||
|
@ -86,7 +86,7 @@ public:
|
|||
};
|
||||
|
||||
|
||||
class IpNode : public IpCore {
|
||||
class IpNode : public virtual IpCore {
|
||||
public:
|
||||
|
||||
friend class IpNodeFactory;
|
||||
|
|
|
@ -43,7 +43,10 @@ public:
|
|||
bool init();
|
||||
bool reset();
|
||||
|
||||
// memory-mapped to stream (MM2S)
|
||||
bool write(const MemoryBlock& mem, size_t len);
|
||||
|
||||
// stream to memory-mapped (S2MM)
|
||||
bool read(const MemoryBlock& mem, size_t len);
|
||||
|
||||
size_t writeComplete()
|
||||
|
|
87
fpga/include/villas/fpga/ips/gpu2rtds.hpp
Normal file
87
fpga/include/villas/fpga/ips/gpu2rtds.hpp
Normal file
|
@ -0,0 +1,87 @@
|
|||
#pragma once
|
||||
|
||||
#include <villas/memory.hpp>
|
||||
#include <villas/fpga/ip_node.hpp>
|
||||
#include <villas/fpga/ips/hls.hpp>
|
||||
|
||||
#include <villas/fpga/ips/rtds2gpu/register_types.hpp>
|
||||
#include <villas/fpga/ips/rtds2gpu/xgpu2rtds_hw.h>
|
||||
|
||||
namespace villas {
|
||||
namespace fpga {
|
||||
namespace ip {
|
||||
|
||||
|
||||
class Gpu2Rtds : public IpNode, public Hls
|
||||
{
|
||||
public:
|
||||
friend class Gpu2RtdsFactory;
|
||||
|
||||
bool init();
|
||||
|
||||
void dump(spdlog::level::level_enum logLevel = spdlog::level::info);
|
||||
bool startOnce(size_t frameSize);
|
||||
|
||||
size_t getMaxFrameSize();
|
||||
|
||||
const StreamVertex&
|
||||
getDefaultMasterPort() const
|
||||
{ return getMasterPort(rtdsOutputStreamPort); }
|
||||
|
||||
MemoryBlock
|
||||
getRegisterMemory() const
|
||||
{ return MemoryBlock(0, 1 << 10, getAddressSpaceId(registerMemory)); }
|
||||
|
||||
private:
|
||||
bool updateStatus();
|
||||
|
||||
public:
|
||||
static constexpr const char* rtdsOutputStreamPort = "rtds_output";
|
||||
|
||||
struct StatusControlRegister { uint32_t
|
||||
status_ap_vld : 1,
|
||||
_res : 31;
|
||||
};
|
||||
|
||||
using StatusRegister = axilite_reg_status_t;
|
||||
|
||||
static constexpr uintptr_t registerStatusOffset = XGPU2RTDS_CTRL_ADDR_STATUS_DATA;
|
||||
static constexpr uintptr_t registerStatusCtrlOffset = XGPU2RTDS_CTRL_ADDR_STATUS_CTRL;
|
||||
static constexpr uintptr_t registerFrameSizeOffset = XGPU2RTDS_CTRL_ADDR_FRAME_SIZE_DATA;
|
||||
static constexpr uintptr_t registerFrameOffset = XGPU2RTDS_CTRL_ADDR_FRAME_BASE;
|
||||
static constexpr uintptr_t registerFrameLength = XGPU2RTDS_CTRL_DEPTH_FRAME;
|
||||
|
||||
public:
|
||||
StatusRegister* registerStatus;
|
||||
StatusControlRegister* registerStatusCtrl;
|
||||
uint32_t* registerFrameSize;
|
||||
uint32_t* registerFrames;
|
||||
|
||||
size_t maxFrameSize;
|
||||
|
||||
bool started;
|
||||
};
|
||||
|
||||
|
||||
class Gpu2RtdsFactory : public IpNodeFactory {
|
||||
public:
|
||||
Gpu2RtdsFactory();
|
||||
|
||||
IpCore* create()
|
||||
{ return new Gpu2Rtds; }
|
||||
|
||||
std::string
|
||||
getName() const
|
||||
{ return "Gpu2Rtds"; }
|
||||
|
||||
std::string
|
||||
getDescription() const
|
||||
{ return "HLS Gpu2Rtds IP"; }
|
||||
|
||||
Vlnv getCompatibleVlnv() const
|
||||
{ return {"acs.eonerc.rwth-aachen.de:hls:gpu2rtds:"}; }
|
||||
};
|
||||
|
||||
} // namespace ip
|
||||
} // namespace fpga
|
||||
} // namespace villas
|
137
fpga/include/villas/fpga/ips/hls.hpp
Normal file
137
fpga/include/villas/fpga/ips/hls.hpp
Normal file
|
@ -0,0 +1,137 @@
|
|||
#pragma once
|
||||
|
||||
#include <villas/memory.hpp>
|
||||
#include <villas/fpga/ip_node.hpp>
|
||||
|
||||
namespace villas {
|
||||
namespace fpga {
|
||||
namespace ip {
|
||||
|
||||
|
||||
class Hls : public virtual IpCore
|
||||
{
|
||||
public:
|
||||
virtual bool init()
|
||||
{
|
||||
auto& registers = addressTranslations.at(registerMemory);
|
||||
|
||||
controlRegister = reinterpret_cast<ControlRegister*>(registers.getLocalAddr(registerControlAddr));
|
||||
globalIntRegister = reinterpret_cast<GlobalIntRegister*>(registers.getLocalAddr(registerGlobalIntEnableAddr));
|
||||
ipIntEnableRegister = reinterpret_cast<IpIntRegister*>(registers.getLocalAddr(registerIntEnableAddr));
|
||||
ipIntStatusRegister = reinterpret_cast<IpIntRegister*>(registers.getLocalAddr(registerIntStatusAddr));
|
||||
|
||||
setAutoRestart(false);
|
||||
setGlobalInterrupt(false);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool start()
|
||||
{
|
||||
controlRegister->ap_start = true;
|
||||
running = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool isFinished()
|
||||
{ updateRunningStatus(); return !running; }
|
||||
|
||||
|
||||
bool isRunning()
|
||||
{ updateRunningStatus(); return running; }
|
||||
|
||||
|
||||
void setAutoRestart(bool enabled) const
|
||||
{ controlRegister->auto_restart = enabled; }
|
||||
|
||||
|
||||
void setGlobalInterrupt(bool enabled) const
|
||||
{ globalIntRegister->globalInterruptEnable = enabled; }
|
||||
|
||||
|
||||
void setReadyInterrupt(bool enabled) const
|
||||
{ ipIntEnableRegister->ap_ready = enabled; }
|
||||
|
||||
|
||||
void setDoneInterrupt(bool enabled) const
|
||||
{ ipIntEnableRegister->ap_done = enabled; }
|
||||
|
||||
|
||||
bool isIdleBit() const
|
||||
{ return controlRegister->ap_idle; }
|
||||
|
||||
|
||||
bool isReadyBit() const
|
||||
{ return controlRegister->ap_ready; }
|
||||
|
||||
|
||||
/// Warning: the corresponding bit is cleared on read of the register, so if
|
||||
/// not used correctly, this function may never return true. Only use this
|
||||
/// function if you really know what you are doing!
|
||||
bool isDoneBit() const
|
||||
{ return controlRegister->ap_done; }
|
||||
|
||||
|
||||
bool isAutoRestartBit() const
|
||||
{ return controlRegister->auto_restart; }
|
||||
|
||||
private:
|
||||
void updateRunningStatus()
|
||||
{
|
||||
if(running and isIdleBit())
|
||||
running = false;
|
||||
}
|
||||
|
||||
protected:
|
||||
/* Memory block handling */
|
||||
|
||||
static constexpr const char* registerMemory = "Reg";
|
||||
|
||||
virtual std::list<MemoryBlockName> getMemoryBlocks() const
|
||||
{ return { registerMemory }; }
|
||||
|
||||
|
||||
public:
|
||||
/* Register definitions */
|
||||
|
||||
static constexpr uintptr_t registerControlAddr = 0x00;
|
||||
static constexpr uintptr_t registerGlobalIntEnableAddr = 0x04;
|
||||
static constexpr uintptr_t registerIntEnableAddr = 0x08;
|
||||
static constexpr uintptr_t registerIntStatusAddr = 0x0c;
|
||||
|
||||
union ControlRegister {
|
||||
uint32_t value;
|
||||
struct { uint32_t
|
||||
ap_start : 1,
|
||||
ap_done : 1,
|
||||
ap_idle : 1,
|
||||
ap_ready : 1,
|
||||
_res1 : 3,
|
||||
auto_restart : 1,
|
||||
_res2 : 24;
|
||||
};
|
||||
};
|
||||
|
||||
struct GlobalIntRegister { uint32_t
|
||||
globalInterruptEnable : 1,
|
||||
_res : 31;
|
||||
};
|
||||
|
||||
struct IpIntRegister { uint32_t
|
||||
ap_done : 1,
|
||||
ap_ready : 1,
|
||||
_res : 30;
|
||||
};
|
||||
protected:
|
||||
ControlRegister* controlRegister;
|
||||
GlobalIntRegister* globalIntRegister;
|
||||
IpIntRegister* ipIntEnableRegister;
|
||||
IpIntRegister* ipIntStatusRegister;
|
||||
|
||||
bool running;
|
||||
};
|
||||
|
||||
} // namespace ip
|
||||
} // namespace fpga
|
||||
} // namespace villas
|
|
@ -44,6 +44,14 @@ public:
|
|||
std::list<std::string> getMemoryBlocks() const
|
||||
{ return { registerMemory }; }
|
||||
|
||||
const StreamVertex&
|
||||
getDefaultSlavePort() const
|
||||
{ return getSlavePort(slavePort); }
|
||||
|
||||
const StreamVertex&
|
||||
getDefaultMasterPort() const
|
||||
{ return getMasterPort(masterPort); }
|
||||
|
||||
private:
|
||||
static constexpr const char registerMemory[] = "reg0";
|
||||
static constexpr const char* irqTs = "irq_ts";
|
||||
|
|
96
fpga/include/villas/fpga/ips/rtds2gpu.hpp
Normal file
96
fpga/include/villas/fpga/ips/rtds2gpu.hpp
Normal file
|
@ -0,0 +1,96 @@
|
|||
#pragma once
|
||||
|
||||
#include <villas/memory.hpp>
|
||||
#include <villas/fpga/ip_node.hpp>
|
||||
#include <villas/fpga/ips/hls.hpp>
|
||||
|
||||
#include "rtds2gpu/xrtds2gpu.h"
|
||||
#include "rtds2gpu/register_types.hpp"
|
||||
|
||||
namespace villas {
|
||||
namespace fpga {
|
||||
namespace ip {
|
||||
|
||||
union ControlRegister {
|
||||
uint32_t value;
|
||||
struct { uint32_t
|
||||
ap_start : 1,
|
||||
ap_done : 1,
|
||||
ap_idle : 1,
|
||||
ap_ready : 1,
|
||||
_res1 : 3,
|
||||
auto_restart : 1,
|
||||
_res2 : 24;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
class Rtds2Gpu : public IpNode, public Hls
|
||||
{
|
||||
public:
|
||||
friend class Rtds2GpuFactory;
|
||||
|
||||
bool init();
|
||||
|
||||
void dump(spdlog::level::level_enum logLevel = spdlog::level::info);
|
||||
|
||||
bool startOnce(const MemoryBlock& mem, size_t frameSize, size_t dataOffset, size_t doorbellOffset);
|
||||
|
||||
size_t getMaxFrameSize();
|
||||
|
||||
void dumpDoorbell(uint32_t doorbellRegister) const;
|
||||
|
||||
bool doorbellIsValid(const uint32_t& doorbellRegister) const
|
||||
{ return reinterpret_cast<const reg_doorbell_t&>(doorbellRegister).is_valid; }
|
||||
|
||||
void doorbellReset(uint32_t& doorbellRegister) const
|
||||
{ doorbellRegister = 0; }
|
||||
|
||||
static constexpr const char* registerMemory = "Reg";
|
||||
|
||||
std::list<MemoryBlockName> getMemoryBlocks() const
|
||||
{ return { registerMemory }; }
|
||||
|
||||
|
||||
const StreamVertex&
|
||||
getDefaultSlavePort() const
|
||||
{ return getSlavePort(rtdsInputStreamPort); }
|
||||
|
||||
private:
|
||||
bool updateStatus();
|
||||
|
||||
private:
|
||||
static constexpr const char* axiInterface = "m_axi_axi_mm";
|
||||
static constexpr const char* rtdsInputStreamPort = "rtds_input";
|
||||
|
||||
XRtds2gpu xInstance;
|
||||
|
||||
axilite_reg_status_t status;
|
||||
size_t maxFrameSize;
|
||||
|
||||
bool started;
|
||||
};
|
||||
|
||||
|
||||
class Rtds2GpuFactory : public IpNodeFactory {
|
||||
public:
|
||||
Rtds2GpuFactory();
|
||||
|
||||
IpCore* create()
|
||||
{ return new Rtds2Gpu; }
|
||||
|
||||
std::string
|
||||
getName() const
|
||||
{ return "Rtds2Gpu"; }
|
||||
|
||||
std::string
|
||||
getDescription() const
|
||||
{ return "HLS RTDS2GPU IP"; }
|
||||
|
||||
Vlnv getCompatibleVlnv() const
|
||||
{ return {"acs.eonerc.rwth-aachen.de:hls:rtds2gpu:"}; }
|
||||
};
|
||||
|
||||
} // namespace ip
|
||||
} // namespace fpga
|
||||
} // namespace villas
|
57
fpga/include/villas/fpga/ips/rtds2gpu/register_types.hpp
Normal file
57
fpga/include/villas/fpga/ips/rtds2gpu/register_types.hpp
Normal file
|
@ -0,0 +1,57 @@
|
|||
#ifndef REGISTER_TYPES_H
|
||||
#define REGISTER_TYPES_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
union axilite_reg_status_t {
|
||||
uint32_t value;
|
||||
struct {
|
||||
uint32_t
|
||||
last_seq_nr : 16,
|
||||
last_count : 6,
|
||||
max_frame_size : 6,
|
||||
invalid_frame_size : 1,
|
||||
frame_too_short : 1,
|
||||
frame_too_long : 1,
|
||||
is_running : 1;
|
||||
};
|
||||
};
|
||||
|
||||
union reg_doorbell_t {
|
||||
uint32_t value;
|
||||
struct {
|
||||
uint32_t
|
||||
seq_nr : 16,
|
||||
count : 6,
|
||||
is_valid : 1;
|
||||
};
|
||||
|
||||
constexpr reg_doorbell_t() : value(0) {}
|
||||
};
|
||||
|
||||
template<size_t N, typename T = uint32_t>
|
||||
struct Rtds2GpuMemoryBuffer {
|
||||
// this type is only for memory interpretation, it makes no sense to create
|
||||
// an instance so it's forbidden
|
||||
Rtds2GpuMemoryBuffer() = delete;
|
||||
|
||||
// T can be a more complex type that wraps multiple values
|
||||
static constexpr size_t rawValueCount = N * (sizeof(T) / 4);
|
||||
|
||||
// As of C++14, offsetof() is not working for non-standard layout types (i.e.
|
||||
// composed of non-POD members). This might work in C++17 though.
|
||||
// More info: https://gist.github.com/graphitemaster/494f21190bb2c63c5516
|
||||
//static constexpr size_t doorbellOffset = offsetof(Rtds2GpuMemoryBuffer, doorbell);
|
||||
//static constexpr size_t dataOffset = offsetof(Rtds2GpuMemoryBuffer, data);
|
||||
|
||||
// HACK: This might break horribly, let's just hope C++17 will be there soon
|
||||
static constexpr size_t dataOffset = 0;
|
||||
static constexpr size_t doorbellOffset = N * sizeof(Rtds2GpuMemoryBuffer::data);
|
||||
|
||||
T data[N];
|
||||
reg_doorbell_t doorbell;
|
||||
};
|
||||
|
||||
#endif // REGISTER_TYPES_H
|
53
fpga/include/villas/fpga/ips/rtds2gpu/xgpu2rtds_hw.h
Normal file
53
fpga/include/villas/fpga/ips/rtds2gpu/xgpu2rtds_hw.h
Normal file
|
@ -0,0 +1,53 @@
|
|||
// ==============================================================
|
||||
// File generated by Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC
|
||||
// Version: 2017.3
|
||||
// Copyright (C) 1986-2017 Xilinx, Inc. All Rights Reserved.
|
||||
//
|
||||
// ==============================================================
|
||||
|
||||
// CTRL
|
||||
// 0x00 : Control signals
|
||||
// bit 0 - ap_start (Read/Write/COH)
|
||||
// bit 1 - ap_done (Read/COR)
|
||||
// bit 2 - ap_idle (Read)
|
||||
// bit 3 - ap_ready (Read)
|
||||
// bit 7 - auto_restart (Read/Write)
|
||||
// others - reserved
|
||||
// 0x04 : Global Interrupt Enable Register
|
||||
// bit 0 - Global Interrupt Enable (Read/Write)
|
||||
// others - reserved
|
||||
// 0x08 : IP Interrupt Enable Register (Read/Write)
|
||||
// bit 0 - Channel 0 (ap_done)
|
||||
// bit 1 - Channel 1 (ap_ready)
|
||||
// others - reserved
|
||||
// 0x0c : IP Interrupt Status Register (Read/TOW)
|
||||
// bit 0 - Channel 0 (ap_done)
|
||||
// bit 1 - Channel 1 (ap_ready)
|
||||
// others - reserved
|
||||
// 0x10 : Data signal of frame_size
|
||||
// bit 31~0 - frame_size[31:0] (Read/Write)
|
||||
// 0x14 : reserved
|
||||
// 0x80 : Data signal of status
|
||||
// bit 31~0 - status[31:0] (Read)
|
||||
// 0x84 : Control signal of status
|
||||
// bit 0 - status_ap_vld (Read/COR)
|
||||
// others - reserved
|
||||
// 0x40 ~
|
||||
// 0x7f : Memory 'frame' (16 * 32b)
|
||||
// Word n : bit [31:0] - frame[n]
|
||||
// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake)
|
||||
|
||||
#define XGPU2RTDS_CTRL_ADDR_AP_CTRL 0x00
|
||||
#define XGPU2RTDS_CTRL_ADDR_GIE 0x04
|
||||
#define XGPU2RTDS_CTRL_ADDR_IER 0x08
|
||||
#define XGPU2RTDS_CTRL_ADDR_ISR 0x0c
|
||||
#define XGPU2RTDS_CTRL_ADDR_FRAME_SIZE_DATA 0x10
|
||||
#define XGPU2RTDS_CTRL_BITS_FRAME_SIZE_DATA 32
|
||||
#define XGPU2RTDS_CTRL_ADDR_STATUS_DATA 0x80
|
||||
#define XGPU2RTDS_CTRL_BITS_STATUS_DATA 32
|
||||
#define XGPU2RTDS_CTRL_ADDR_STATUS_CTRL 0x84
|
||||
#define XGPU2RTDS_CTRL_ADDR_FRAME_BASE 0x40
|
||||
#define XGPU2RTDS_CTRL_ADDR_FRAME_HIGH 0x7f
|
||||
#define XGPU2RTDS_CTRL_WIDTH_FRAME 32
|
||||
#define XGPU2RTDS_CTRL_DEPTH_FRAME 16
|
||||
|
113
fpga/include/villas/fpga/ips/rtds2gpu/xrtds2gpu.h
Normal file
113
fpga/include/villas/fpga/ips/rtds2gpu/xrtds2gpu.h
Normal file
|
@ -0,0 +1,113 @@
|
|||
// ==============================================================
|
||||
// File generated by Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC
|
||||
// Version: 2017.3
|
||||
// Copyright (C) 1986-2017 Xilinx, Inc. All Rights Reserved.
|
||||
//
|
||||
// ==============================================================
|
||||
|
||||
#ifndef XRTDS2GPU_H
|
||||
#define XRTDS2GPU_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/***************************** Include Files *********************************/
|
||||
#ifndef __linux__
|
||||
#include "xil_types.h"
|
||||
#include "xil_assert.h"
|
||||
#include "xstatus.h"
|
||||
#include "xil_io.h"
|
||||
#else
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <dirent.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
#include <stddef.h>
|
||||
#endif
|
||||
#include "xrtds2gpu_hw.h"
|
||||
|
||||
/**************************** Type Definitions ******************************/
|
||||
#ifdef __linux__
|
||||
typedef uint8_t u8;
|
||||
typedef uint16_t u16;
|
||||
typedef uint32_t u32;
|
||||
#else
|
||||
typedef struct {
|
||||
u16 DeviceId;
|
||||
u32 Ctrl_BaseAddress;
|
||||
} XRtds2gpu_Config;
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
u32 Ctrl_BaseAddress;
|
||||
u32 IsReady;
|
||||
} XRtds2gpu;
|
||||
|
||||
/***************** Macros (Inline Functions) Definitions *********************/
|
||||
#ifndef __linux__
|
||||
#define XRtds2gpu_WriteReg(BaseAddress, RegOffset, Data) \
|
||||
Xil_Out32((BaseAddress) + (RegOffset), (u32)(Data))
|
||||
#define XRtds2gpu_ReadReg(BaseAddress, RegOffset) \
|
||||
Xil_In32((BaseAddress) + (RegOffset))
|
||||
#else
|
||||
#define XRtds2gpu_WriteReg(BaseAddress, RegOffset, Data) \
|
||||
*(volatile u32*)((BaseAddress) + (RegOffset)) = (u32)(Data)
|
||||
#define XRtds2gpu_ReadReg(BaseAddress, RegOffset) \
|
||||
*(volatile u32*)((BaseAddress) + (RegOffset))
|
||||
|
||||
#define Xil_AssertVoid(expr) assert(expr)
|
||||
#define Xil_AssertNonvoid(expr) assert(expr)
|
||||
|
||||
#define XST_SUCCESS 0
|
||||
#define XST_DEVICE_NOT_FOUND 2
|
||||
#define XST_OPEN_DEVICE_FAILED 3
|
||||
#define XIL_COMPONENT_IS_READY 1
|
||||
#endif
|
||||
|
||||
/************************** Function Prototypes *****************************/
|
||||
#ifndef __linux__
|
||||
int XRtds2gpu_Initialize(XRtds2gpu *InstancePtr, u16 DeviceId);
|
||||
XRtds2gpu_Config* XRtds2gpu_LookupConfig(u16 DeviceId);
|
||||
int XRtds2gpu_CfgInitialize(XRtds2gpu *InstancePtr, XRtds2gpu_Config *ConfigPtr);
|
||||
#else
|
||||
int XRtds2gpu_Initialize(XRtds2gpu *InstancePtr, const char* InstanceName);
|
||||
int XRtds2gpu_Release(XRtds2gpu *InstancePtr);
|
||||
#endif
|
||||
|
||||
void XRtds2gpu_Start(XRtds2gpu *InstancePtr);
|
||||
u32 XRtds2gpu_IsDone(XRtds2gpu *InstancePtr);
|
||||
u32 XRtds2gpu_IsIdle(XRtds2gpu *InstancePtr);
|
||||
u32 XRtds2gpu_IsReady(XRtds2gpu *InstancePtr);
|
||||
void XRtds2gpu_EnableAutoRestart(XRtds2gpu *InstancePtr);
|
||||
void XRtds2gpu_DisableAutoRestart(XRtds2gpu *InstancePtr);
|
||||
|
||||
void XRtds2gpu_Set_baseaddr(XRtds2gpu *InstancePtr, u32 Data);
|
||||
u32 XRtds2gpu_Get_baseaddr(XRtds2gpu *InstancePtr);
|
||||
void XRtds2gpu_Set_data_offset(XRtds2gpu *InstancePtr, u32 Data);
|
||||
u32 XRtds2gpu_Get_data_offset(XRtds2gpu *InstancePtr);
|
||||
void XRtds2gpu_Set_doorbell_offset(XRtds2gpu *InstancePtr, u32 Data);
|
||||
u32 XRtds2gpu_Get_doorbell_offset(XRtds2gpu *InstancePtr);
|
||||
void XRtds2gpu_Set_frame_size(XRtds2gpu *InstancePtr, u32 Data);
|
||||
u32 XRtds2gpu_Get_frame_size(XRtds2gpu *InstancePtr);
|
||||
u32 XRtds2gpu_Get_status(XRtds2gpu *InstancePtr);
|
||||
u32 XRtds2gpu_Get_status_vld(XRtds2gpu *InstancePtr);
|
||||
|
||||
void XRtds2gpu_InterruptGlobalEnable(XRtds2gpu *InstancePtr);
|
||||
void XRtds2gpu_InterruptGlobalDisable(XRtds2gpu *InstancePtr);
|
||||
void XRtds2gpu_InterruptEnable(XRtds2gpu *InstancePtr, u32 Mask);
|
||||
void XRtds2gpu_InterruptDisable(XRtds2gpu *InstancePtr, u32 Mask);
|
||||
void XRtds2gpu_InterruptClear(XRtds2gpu *InstancePtr, u32 Mask);
|
||||
u32 XRtds2gpu_InterruptGetEnabled(XRtds2gpu *InstancePtr);
|
||||
u32 XRtds2gpu_InterruptGetStatus(XRtds2gpu *InstancePtr);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
61
fpga/include/villas/fpga/ips/rtds2gpu/xrtds2gpu_hw.h
Normal file
61
fpga/include/villas/fpga/ips/rtds2gpu/xrtds2gpu_hw.h
Normal file
|
@ -0,0 +1,61 @@
|
|||
// ==============================================================
|
||||
// File generated by Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC
|
||||
// Version: 2017.3
|
||||
// Copyright (C) 1986-2017 Xilinx, Inc. All Rights Reserved.
|
||||
//
|
||||
// ==============================================================
|
||||
|
||||
// CTRL
|
||||
// 0x00 : Control signals
|
||||
// bit 0 - ap_start (Read/Write/COH)
|
||||
// bit 1 - ap_done (Read/COR)
|
||||
// bit 2 - ap_idle (Read)
|
||||
// bit 3 - ap_ready (Read)
|
||||
// bit 7 - auto_restart (Read/Write)
|
||||
// others - reserved
|
||||
// 0x04 : Global Interrupt Enable Register
|
||||
// bit 0 - Global Interrupt Enable (Read/Write)
|
||||
// others - reserved
|
||||
// 0x08 : IP Interrupt Enable Register (Read/Write)
|
||||
// bit 0 - Channel 0 (ap_done)
|
||||
// bit 1 - Channel 1 (ap_ready)
|
||||
// others - reserved
|
||||
// 0x0c : IP Interrupt Status Register (Read/TOW)
|
||||
// bit 0 - Channel 0 (ap_done)
|
||||
// bit 1 - Channel 1 (ap_ready)
|
||||
// others - reserved
|
||||
// 0x10 : Data signal of baseaddr
|
||||
// bit 31~0 - baseaddr[31:0] (Read/Write)
|
||||
// 0x14 : reserved
|
||||
// 0x18 : Data signal of data_offset
|
||||
// bit 31~0 - data_offset[31:0] (Read/Write)
|
||||
// 0x1c : reserved
|
||||
// 0x20 : Data signal of doorbell_offset
|
||||
// bit 31~0 - doorbell_offset[31:0] (Read/Write)
|
||||
// 0x24 : reserved
|
||||
// 0x28 : Data signal of frame_size
|
||||
// bit 31~0 - frame_size[31:0] (Read/Write)
|
||||
// 0x2c : reserved
|
||||
// 0x30 : Data signal of status
|
||||
// bit 31~0 - status[31:0] (Read)
|
||||
// 0x34 : Control signal of status
|
||||
// bit 0 - status_ap_vld (Read/COR)
|
||||
// others - reserved
|
||||
// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake)
|
||||
|
||||
#define XRTDS2GPU_CTRL_ADDR_AP_CTRL 0x00
|
||||
#define XRTDS2GPU_CTRL_ADDR_GIE 0x04
|
||||
#define XRTDS2GPU_CTRL_ADDR_IER 0x08
|
||||
#define XRTDS2GPU_CTRL_ADDR_ISR 0x0c
|
||||
#define XRTDS2GPU_CTRL_ADDR_BASEADDR_DATA 0x10
|
||||
#define XRTDS2GPU_CTRL_BITS_BASEADDR_DATA 32
|
||||
#define XRTDS2GPU_CTRL_ADDR_DATA_OFFSET_DATA 0x18
|
||||
#define XRTDS2GPU_CTRL_BITS_DATA_OFFSET_DATA 32
|
||||
#define XRTDS2GPU_CTRL_ADDR_DOORBELL_OFFSET_DATA 0x20
|
||||
#define XRTDS2GPU_CTRL_BITS_DOORBELL_OFFSET_DATA 32
|
||||
#define XRTDS2GPU_CTRL_ADDR_FRAME_SIZE_DATA 0x28
|
||||
#define XRTDS2GPU_CTRL_BITS_FRAME_SIZE_DATA 32
|
||||
#define XRTDS2GPU_CTRL_ADDR_STATUS_DATA 0x30
|
||||
#define XRTDS2GPU_CTRL_BITS_STATUS_DATA 32
|
||||
#define XRTDS2GPU_CTRL_ADDR_STATUS_CTRL 0x34
|
||||
|
|
@ -34,8 +34,16 @@ set(SOURCES
|
|||
ips/dma.cpp
|
||||
ips/bram.cpp
|
||||
ips/rtds.cpp
|
||||
|
||||
ips/rtds2gpu/rtds2gpu.cpp
|
||||
ips/rtds2gpu/xrtds2gpu.c
|
||||
ips/rtds2gpu/gpu2rtds.cpp
|
||||
)
|
||||
|
||||
# we don't have much influence on drivers generated by Xilinx, so ignore warnings
|
||||
set_source_files_properties(ips/rtds2gpu/xrtds2gpu.c
|
||||
PROPERTIES COMPILE_FLAGS -Wno-int-to-pointer-cast)
|
||||
|
||||
include(FindPkgConfig)
|
||||
|
||||
pkg_check_modules(JANSSON jansson)
|
||||
|
|
|
@ -374,6 +374,13 @@ void Gpu::memcpyKernel(const MemoryBlock& src, const MemoryBlock& dst, size_t si
|
|||
cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
MemoryTranslation
|
||||
Gpu::translate(const MemoryBlock& dst)
|
||||
{
|
||||
auto& mm = MemoryManager::get();
|
||||
return mm.getTranslation(masterPciEAddrSpaceId, dst.getAddrSpaceId());
|
||||
}
|
||||
|
||||
|
||||
std::unique_ptr<villas::MemoryBlock, villas::MemoryBlock::deallocator_fn>
|
||||
GpuAllocator::allocateBlock(size_t size)
|
||||
|
@ -381,29 +388,53 @@ GpuAllocator::allocateBlock(size_t size)
|
|||
cudaSetDevice(gpu.gpuId);
|
||||
|
||||
void* addr;
|
||||
if(cudaSuccess != cudaMalloc(&addr, size)) {
|
||||
logger->error("cudaMalloc(..., size={}) failed", size);
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
|
||||
auto& mm = MemoryManager::get();
|
||||
|
||||
// assemble name for this block
|
||||
std::stringstream name;
|
||||
name << std::showbase << std::hex << reinterpret_cast<uintptr_t>(addr);
|
||||
// search for an existing chunk that has enough free memory
|
||||
auto chunk = std::find_if(chunks.begin(), chunks.end(), [&](const auto& chunk) {
|
||||
return chunk->getAvailableMemory() >= size;
|
||||
});
|
||||
|
||||
auto blockName = mm.getSlaveAddrSpaceName(getName(), name.str());
|
||||
auto blockAddrSpaceId = mm.getOrCreateAddressSpace(blockName);
|
||||
|
||||
const auto localAddr = reinterpret_cast<uintptr_t>(addr);
|
||||
std::unique_ptr<MemoryBlock, MemoryBlock::deallocator_fn>
|
||||
mem(new MemoryBlock(localAddr, size, blockAddrSpaceId), this->free);
|
||||
if(chunk != chunks.end()) {
|
||||
logger->debug("Found existing chunk that can host the requested block");
|
||||
|
||||
insertMemoryBlock(*mem);
|
||||
return (*chunk)->allocateBlock(size);
|
||||
|
||||
gpu.makeAccessibleToPCIeAndVA(*mem);
|
||||
} else {
|
||||
// allocate a new chunk
|
||||
|
||||
return mem;
|
||||
// rounded-up multiple of GPU page size
|
||||
const size_t chunkSize = size - (size & (GpuPageSize - 1)) + GpuPageSize;
|
||||
logger->debug("Allocate new chunk of {:#x} bytes", chunkSize);
|
||||
|
||||
if(cudaSuccess != cudaMalloc(&addr, chunkSize)) {
|
||||
logger->error("cudaMalloc(..., size={}) failed", chunkSize);
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
|
||||
// assemble name for this block
|
||||
std::stringstream name;
|
||||
name << std::showbase << std::hex << reinterpret_cast<uintptr_t>(addr);
|
||||
|
||||
auto blockName = mm.getSlaveAddrSpaceName(getName(), name.str());
|
||||
auto blockAddrSpaceId = mm.getOrCreateAddressSpace(blockName);
|
||||
|
||||
const auto localAddr = reinterpret_cast<uintptr_t>(addr);
|
||||
std::unique_ptr<MemoryBlock, MemoryBlock::deallocator_fn>
|
||||
mem(new MemoryBlock(localAddr, chunkSize, blockAddrSpaceId), this->free);
|
||||
|
||||
insertMemoryBlock(*mem);
|
||||
|
||||
// already make accessible to CPU
|
||||
gpu.makeAccessibleToPCIeAndVA(*mem);
|
||||
|
||||
// create a new allocator to manage the chunk and push to chunk list
|
||||
chunks.push_front(std::make_unique<LinearAllocator>(std::move(mem)));
|
||||
|
||||
// call again, this time there's a large enough chunk
|
||||
return allocateBlock(size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -58,6 +58,9 @@ public:
|
|||
|
||||
void memcpyKernel(const MemoryBlock& src, const MemoryBlock& dst, size_t size);
|
||||
|
||||
MemoryTranslation
|
||||
translate(const MemoryBlock& dst);
|
||||
|
||||
private:
|
||||
bool registerIoMemory(const MemoryBlock& mem);
|
||||
bool registerHostMemory(const MemoryBlock& mem);
|
||||
|
@ -81,6 +84,8 @@ private:
|
|||
|
||||
class GpuAllocator : public BaseAllocator<GpuAllocator> {
|
||||
public:
|
||||
static constexpr size_t GpuPageSize = 64UL << 10;
|
||||
|
||||
GpuAllocator(Gpu& gpu);
|
||||
|
||||
std::string getName() const;
|
||||
|
@ -90,6 +95,8 @@ public:
|
|||
|
||||
private:
|
||||
Gpu& gpu;
|
||||
// TODO: replace by multimap (key is available memory)
|
||||
std::list<std::unique_ptr<LinearAllocator>> chunks;
|
||||
};
|
||||
|
||||
class GpuFactory : public Plugin {
|
||||
|
|
|
@ -41,7 +41,7 @@ IpNode::streamGraph;
|
|||
bool
|
||||
IpNodeFactory::configureJson(IpCore& ip, json_t* json_ip)
|
||||
{
|
||||
auto& ipNode = reinterpret_cast<IpNode&>(ip);
|
||||
auto& ipNode = dynamic_cast<IpNode&>(ip);
|
||||
auto logger = getLogger();
|
||||
|
||||
json_t* json_ports = json_object_get(json_ip, "ports");
|
||||
|
@ -216,7 +216,7 @@ IpNode::connectLoopback()
|
|||
logger->debug("switch at: {}", portMaster->nodeName);
|
||||
|
||||
// TODO: verify this is really a switch!
|
||||
auto axiStreamSwitch = reinterpret_cast<ip::AxiStreamSwitch*>(
|
||||
auto axiStreamSwitch = dynamic_cast<ip::AxiStreamSwitch*>(
|
||||
card->lookupIp(portMaster->nodeName));
|
||||
|
||||
if(axiStreamSwitch == nullptr) {
|
||||
|
|
|
@ -31,7 +31,7 @@ static BramFactory factory;
|
|||
bool
|
||||
BramFactory::configureJson(IpCore& ip, json_t* json_ip)
|
||||
{
|
||||
auto& bram = reinterpret_cast<Bram&>(ip);
|
||||
auto& bram = dynamic_cast<Bram&>(ip);
|
||||
|
||||
if(json_unpack(json_ip, "{ s: i }", "size", &bram.size) != 0) {
|
||||
getLogger()->error("Cannot parse 'size'");
|
||||
|
|
|
@ -165,7 +165,7 @@ Dma::write(const MemoryBlock& mem, size_t len)
|
|||
mem.getAddrSpaceId());
|
||||
const void* buf = reinterpret_cast<void*>(translation.getLocalAddr(0));
|
||||
|
||||
logger->debug("Write to address: {:p}", buf);
|
||||
logger->debug("Write to stream from address {:p}", buf);
|
||||
return hasScatterGather() ? writeSG(buf, len) : writeSimple(buf, len);
|
||||
}
|
||||
|
||||
|
@ -180,7 +180,7 @@ Dma::read(const MemoryBlock& mem, size_t len)
|
|||
mem.getAddrSpaceId());
|
||||
void* buf = reinterpret_cast<void*>(translation.getLocalAddr(0));
|
||||
|
||||
logger->debug("Read from address: {:p}", buf);
|
||||
logger->debug("Read from stream and write to address {:p}", buf);
|
||||
return hasScatterGather() ? readSG(buf, len) : readSimple(buf, len);
|
||||
}
|
||||
|
||||
|
|
|
@ -125,7 +125,7 @@ bool
|
|||
AxiPciExpressBridgeFactory::configureJson(IpCore& ip, json_t* json_ip)
|
||||
{
|
||||
auto logger = getLogger();
|
||||
auto& pcie = reinterpret_cast<AxiPciExpressBridge&>(ip);
|
||||
auto& pcie = dynamic_cast<AxiPciExpressBridge&>(ip);
|
||||
|
||||
for(auto barType : std::list<std::string>{"axi_bars", "pcie_bars"}) {
|
||||
json_t* json_bars = json_object_get(json_ip, barType.c_str());
|
||||
|
|
142
fpga/lib/ips/rtds2gpu/gpu2rtds.cpp
Normal file
142
fpga/lib/ips/rtds2gpu/gpu2rtds.cpp
Normal file
|
@ -0,0 +1,142 @@
|
|||
#include <unistd.h>
|
||||
#include <cstring>
|
||||
|
||||
#include <villas/memory_manager.hpp>
|
||||
#include <villas/fpga/ips/gpu2rtds.hpp>
|
||||
|
||||
#include "log.hpp"
|
||||
|
||||
namespace villas {
|
||||
namespace fpga {
|
||||
namespace ip {
|
||||
|
||||
static Gpu2RtdsFactory factory;
|
||||
|
||||
bool Gpu2Rtds::init()
|
||||
{
|
||||
Hls::init();
|
||||
|
||||
auto& registers = addressTranslations.at(registerMemory);
|
||||
|
||||
registerStatus = reinterpret_cast<StatusRegister*>(registers.getLocalAddr(registerStatusOffset));
|
||||
registerStatusCtrl = reinterpret_cast<StatusControlRegister*>(registers.getLocalAddr(registerStatusCtrlOffset));
|
||||
registerFrameSize = reinterpret_cast<uint32_t*>(registers.getLocalAddr(registerFrameSizeOffset));
|
||||
registerFrames = reinterpret_cast<uint32_t*>(registers.getLocalAddr(registerFrameOffset));
|
||||
|
||||
maxFrameSize = getMaxFrameSize();
|
||||
logger->info("Max. frame size supported: {}", maxFrameSize);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Gpu2Rtds::startOnce(size_t frameSize)
|
||||
{
|
||||
*registerFrameSize = frameSize;
|
||||
|
||||
start();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Gpu2Rtds::dump(spdlog::level::level_enum logLevel)
|
||||
{
|
||||
const auto frame_size = *registerFrameSize;
|
||||
auto status = *registerStatus;
|
||||
|
||||
logger->log(logLevel, "Gpu2Rtds registers:");
|
||||
logger->log(logLevel, " Frame size (words): {:#x}", frame_size);
|
||||
logger->log(logLevel, " Status: {:#x}", status.value);
|
||||
logger->log(logLevel, " Running: {}", (status.is_running ? "yes" : "no"));
|
||||
logger->log(logLevel, " Frame too short: {}", (status.frame_too_short ? "yes" : "no"));
|
||||
logger->log(logLevel, " Frame too long: {}", (status.frame_too_long ? "yes" : "no"));
|
||||
logger->log(logLevel, " Frame size invalid: {}", (status.invalid_frame_size ? "yes" : "no"));
|
||||
logger->log(logLevel, " Last count: {}", status.last_count);
|
||||
logger->log(logLevel, " Last seq. number: {}", status.last_seq_nr);
|
||||
logger->log(logLevel, " Max. frame size: {}", status.max_frame_size);
|
||||
}
|
||||
|
||||
//bool Gpu2Rtds::startOnce(const MemoryBlock& mem, size_t frameSize, size_t dataOffset, size_t doorbellOffset)
|
||||
//{
|
||||
// auto& mm = MemoryManager::get();
|
||||
|
||||
// if(frameSize > maxFrameSize) {
|
||||
// logger->error("Requested frame size of {} exceeds max. frame size of {}",
|
||||
// frameSize, maxFrameSize);
|
||||
// return false;
|
||||
// }
|
||||
|
||||
// auto translationFromIp = mm.getTranslation(
|
||||
// getMasterAddrSpaceByInterface(axiInterface),
|
||||
// mem.getAddrSpaceId());
|
||||
|
||||
// // set address of memory block in HLS IP
|
||||
// XGpu2Rtds_Set_baseaddr(&xInstance, translationFromIp.getLocalAddr(0));
|
||||
|
||||
// XGpu2Rtds_Set_doorbell_offset(&xInstance, doorbellOffset);
|
||||
// XGpu2Rtds_Set_data_offset(&xInstance, dataOffset);
|
||||
// XGpu2Rtds_Set_frame_size(&xInstance, frameSize);
|
||||
|
||||
// // prepare memory with all zeroes
|
||||
// auto translationFromProcess = mm.getTranslationFromProcess(mem.getAddrSpaceId());
|
||||
// auto memory = reinterpret_cast<void*>(translationFromProcess.getLocalAddr(0));
|
||||
// memset(memory, 0, mem.getSize());
|
||||
|
||||
// // start IP
|
||||
// return start();
|
||||
//}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//bool
|
||||
//Gpu2Rtds::updateStatus()
|
||||
//{
|
||||
// if(not XGpu2Rtds_Get_status_vld(&xInstance))
|
||||
// return false;
|
||||
|
||||
// status.value = XGpu2Rtds_Get_status(&xInstance);
|
||||
|
||||
// return true;
|
||||
//}
|
||||
|
||||
size_t
|
||||
Gpu2Rtds::getMaxFrameSize()
|
||||
{
|
||||
*registerFrameSize = 0;
|
||||
|
||||
start();
|
||||
while(not isFinished());
|
||||
|
||||
while(not registerStatusCtrl->status_ap_vld);
|
||||
|
||||
axilite_reg_status_t status = *registerStatus;
|
||||
|
||||
// logger->debug("(*registerStatus).max_frame_size: {}", (*registerStatus).max_frame_size);
|
||||
// logger->debug("status.max_frame_size: {}", status.max_frame_size);
|
||||
|
||||
// assert(status.max_frame_size == (*registerStatus).max_frame_size);
|
||||
|
||||
return status.max_frame_size;
|
||||
}
|
||||
|
||||
//void
|
||||
//Gpu2Rtds::dumpDoorbell(uint32_t doorbellRegister) const
|
||||
//{
|
||||
// auto& doorbell = reinterpret_cast<reg_doorbell_t&>(doorbellRegister);
|
||||
|
||||
// logger->info("Doorbell register: {:#08x}", doorbell.value);
|
||||
// logger->info(" Valid: {}", (doorbell.is_valid ? "yes" : "no"));
|
||||
// logger->info(" Count: {}", doorbell.count);
|
||||
// logger->info(" Seq. number: {}", doorbell.seq_nr);
|
||||
//}
|
||||
|
||||
Gpu2RtdsFactory::Gpu2RtdsFactory() :
|
||||
IpNodeFactory(getName())
|
||||
{
|
||||
}
|
||||
|
||||
} // namespace ip
|
||||
} // namespace fpga
|
||||
} // namespace villas
|
131
fpga/lib/ips/rtds2gpu/rtds2gpu.cpp
Normal file
131
fpga/lib/ips/rtds2gpu/rtds2gpu.cpp
Normal file
|
@ -0,0 +1,131 @@
|
|||
#include <unistd.h>
|
||||
#include <cstring>
|
||||
|
||||
#include <villas/memory_manager.hpp>
|
||||
#include <villas/fpga/ips/rtds2gpu.hpp>
|
||||
|
||||
#include "log.hpp"
|
||||
|
||||
namespace villas {
|
||||
namespace fpga {
|
||||
namespace ip {
|
||||
|
||||
static Rtds2GpuFactory factory;
|
||||
|
||||
bool Rtds2Gpu::init()
|
||||
{
|
||||
Hls::init();
|
||||
|
||||
xInstance.IsReady = XIL_COMPONENT_IS_READY;
|
||||
xInstance.Ctrl_BaseAddress = getBaseAddr(registerMemory);
|
||||
|
||||
status.value = 0;
|
||||
started = false;
|
||||
|
||||
// maxFrameSize = getMaxFrameSize();
|
||||
maxFrameSize = 16;
|
||||
logger->info("Max. frame size supported: {}", maxFrameSize);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Rtds2Gpu::dump(spdlog::level::level_enum logLevel)
|
||||
{
|
||||
const auto baseaddr = XRtds2gpu_Get_baseaddr(&xInstance);
|
||||
const auto data_offset = XRtds2gpu_Get_data_offset(&xInstance);
|
||||
const auto doorbell_offset = XRtds2gpu_Get_doorbell_offset(&xInstance);
|
||||
const auto frame_size = XRtds2gpu_Get_frame_size(&xInstance);
|
||||
|
||||
logger->log(logLevel, "Rtds2Gpu registers (IP base {:#x}):", xInstance.Ctrl_BaseAddress);
|
||||
logger->log(logLevel, " Base address (bytes): {:#x}", baseaddr);
|
||||
logger->log(logLevel, " Doorbell offset (bytes): {:#x}", doorbell_offset);
|
||||
logger->log(logLevel, " Data offset (bytes): {:#x}", data_offset);
|
||||
logger->log(logLevel, " Frame size (words): {:#x}", frame_size);
|
||||
logger->log(logLevel, " Status: {:#x}", status.value);
|
||||
logger->log(logLevel, " Running: {}", (status.is_running ? "yes" : "no"));
|
||||
logger->log(logLevel, " Frame too short: {}", (status.frame_too_short ? "yes" : "no"));
|
||||
logger->log(logLevel, " Frame too long: {}", (status.frame_too_long ? "yes" : "no"));
|
||||
logger->log(logLevel, " Frame size invalid: {}", (status.invalid_frame_size ? "yes" : "no"));
|
||||
logger->log(logLevel, " Last count: {}", status.last_count);
|
||||
logger->log(logLevel, " Last seq. number: {}", status.last_seq_nr);
|
||||
logger->log(logLevel, " Max. frame size: {}", status.max_frame_size);
|
||||
}
|
||||
|
||||
bool Rtds2Gpu::startOnce(const MemoryBlock& mem, size_t frameSize, size_t dataOffset, size_t doorbellOffset)
|
||||
{
|
||||
auto& mm = MemoryManager::get();
|
||||
|
||||
if(frameSize > maxFrameSize) {
|
||||
logger->error("Requested frame size of {} exceeds max. frame size of {}",
|
||||
frameSize, maxFrameSize);
|
||||
return false;
|
||||
}
|
||||
|
||||
auto translationFromIp = mm.getTranslation(
|
||||
getMasterAddrSpaceByInterface(axiInterface),
|
||||
mem.getAddrSpaceId());
|
||||
|
||||
// set address of memory block in HLS IP
|
||||
XRtds2gpu_Set_baseaddr(&xInstance, translationFromIp.getLocalAddr(0));
|
||||
|
||||
XRtds2gpu_Set_doorbell_offset(&xInstance, doorbellOffset);
|
||||
XRtds2gpu_Set_data_offset(&xInstance, dataOffset);
|
||||
XRtds2gpu_Set_frame_size(&xInstance, frameSize);
|
||||
|
||||
// prepare memory with all zeroes
|
||||
auto translationFromProcess = mm.getTranslationFromProcess(mem.getAddrSpaceId());
|
||||
auto memory = reinterpret_cast<void*>(translationFromProcess.getLocalAddr(0));
|
||||
memset(memory, 0, mem.getSize());
|
||||
|
||||
// start IP
|
||||
return start();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
bool
|
||||
Rtds2Gpu::updateStatus()
|
||||
{
|
||||
if(not XRtds2gpu_Get_status_vld(&xInstance))
|
||||
return false;
|
||||
|
||||
status.value = XRtds2gpu_Get_status(&xInstance);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t
|
||||
Rtds2Gpu::getMaxFrameSize()
|
||||
{
|
||||
XRtds2gpu_Set_frame_size(&xInstance, 0);
|
||||
|
||||
start();
|
||||
while(not isFinished());
|
||||
updateStatus();
|
||||
|
||||
return status.max_frame_size;
|
||||
}
|
||||
|
||||
void
|
||||
Rtds2Gpu::dumpDoorbell(uint32_t doorbellRegister) const
|
||||
{
|
||||
auto& doorbell = reinterpret_cast<reg_doorbell_t&>(doorbellRegister);
|
||||
|
||||
logger->info("Doorbell register: {:#08x}", doorbell.value);
|
||||
logger->info(" Valid: {}", (doorbell.is_valid ? "yes" : "no"));
|
||||
logger->info(" Count: {}", doorbell.count);
|
||||
logger->info(" Seq. number: {}", doorbell.seq_nr);
|
||||
}
|
||||
|
||||
Rtds2GpuFactory::Rtds2GpuFactory() :
|
||||
IpNodeFactory(getName())
|
||||
{
|
||||
}
|
||||
|
||||
} // namespace ip
|
||||
} // namespace fpga
|
||||
} // namespace villas
|
221
fpga/lib/ips/rtds2gpu/xrtds2gpu.c
Normal file
221
fpga/lib/ips/rtds2gpu/xrtds2gpu.c
Normal file
|
@ -0,0 +1,221 @@
|
|||
// ==============================================================
|
||||
// File generated by Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC
|
||||
// Version: 2017.3
|
||||
// Copyright (C) 1986-2017 Xilinx, Inc. All Rights Reserved.
|
||||
//
|
||||
// ==============================================================
|
||||
|
||||
/***************************** Include Files *********************************/
|
||||
#include <villas/fpga/ips/rtds2gpu/xrtds2gpu.h>
|
||||
|
||||
/************************** Function Implementation *************************/
|
||||
#ifndef __linux__
|
||||
int XRtds2gpu_CfgInitialize(XRtds2gpu *InstancePtr, XRtds2gpu_Config *ConfigPtr) {
|
||||
Xil_AssertNonvoid(InstancePtr != NULL);
|
||||
Xil_AssertNonvoid(ConfigPtr != NULL);
|
||||
|
||||
InstancePtr->Ctrl_BaseAddress = ConfigPtr->Ctrl_BaseAddress;
|
||||
InstancePtr->IsReady = XIL_COMPONENT_IS_READY;
|
||||
|
||||
return XST_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
void XRtds2gpu_Start(XRtds2gpu *InstancePtr) {
|
||||
u32 Data;
|
||||
|
||||
Xil_AssertVoid(InstancePtr != NULL);
|
||||
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL) & 0x80;
|
||||
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL, Data | 0x01);
|
||||
}
|
||||
|
||||
u32 XRtds2gpu_IsDone(XRtds2gpu *InstancePtr) {
|
||||
u32 Data;
|
||||
|
||||
Xil_AssertNonvoid(InstancePtr != NULL);
|
||||
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL);
|
||||
return (Data >> 1) & 0x1;
|
||||
}
|
||||
|
||||
u32 XRtds2gpu_IsIdle(XRtds2gpu *InstancePtr) {
|
||||
u32 Data;
|
||||
|
||||
Xil_AssertNonvoid(InstancePtr != NULL);
|
||||
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL);
|
||||
return (Data >> 2) & 0x1;
|
||||
}
|
||||
|
||||
u32 XRtds2gpu_IsReady(XRtds2gpu *InstancePtr) {
|
||||
u32 Data;
|
||||
|
||||
Xil_AssertNonvoid(InstancePtr != NULL);
|
||||
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL);
|
||||
// check ap_start to see if the pcore is ready for next input
|
||||
return !(Data & 0x1);
|
||||
}
|
||||
|
||||
void XRtds2gpu_EnableAutoRestart(XRtds2gpu *InstancePtr) {
|
||||
Xil_AssertVoid(InstancePtr != NULL);
|
||||
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL, 0x80);
|
||||
}
|
||||
|
||||
void XRtds2gpu_DisableAutoRestart(XRtds2gpu *InstancePtr) {
|
||||
Xil_AssertVoid(InstancePtr != NULL);
|
||||
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL, 0);
|
||||
}
|
||||
|
||||
void XRtds2gpu_Set_baseaddr(XRtds2gpu *InstancePtr, u32 Data) {
|
||||
Xil_AssertVoid(InstancePtr != NULL);
|
||||
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_BASEADDR_DATA, Data);
|
||||
}
|
||||
|
||||
u32 XRtds2gpu_Get_baseaddr(XRtds2gpu *InstancePtr) {
|
||||
u32 Data;
|
||||
|
||||
Xil_AssertNonvoid(InstancePtr != NULL);
|
||||
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_BASEADDR_DATA);
|
||||
return Data;
|
||||
}
|
||||
|
||||
void XRtds2gpu_Set_data_offset(XRtds2gpu *InstancePtr, u32 Data) {
|
||||
Xil_AssertVoid(InstancePtr != NULL);
|
||||
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_DATA_OFFSET_DATA, Data);
|
||||
}
|
||||
|
||||
u32 XRtds2gpu_Get_data_offset(XRtds2gpu *InstancePtr) {
|
||||
u32 Data;
|
||||
|
||||
Xil_AssertNonvoid(InstancePtr != NULL);
|
||||
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_DATA_OFFSET_DATA);
|
||||
return Data;
|
||||
}
|
||||
|
||||
void XRtds2gpu_Set_doorbell_offset(XRtds2gpu *InstancePtr, u32 Data) {
|
||||
Xil_AssertVoid(InstancePtr != NULL);
|
||||
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_DOORBELL_OFFSET_DATA, Data);
|
||||
}
|
||||
|
||||
u32 XRtds2gpu_Get_doorbell_offset(XRtds2gpu *InstancePtr) {
|
||||
u32 Data;
|
||||
|
||||
Xil_AssertNonvoid(InstancePtr != NULL);
|
||||
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_DOORBELL_OFFSET_DATA);
|
||||
return Data;
|
||||
}
|
||||
|
||||
void XRtds2gpu_Set_frame_size(XRtds2gpu *InstancePtr, u32 Data) {
|
||||
Xil_AssertVoid(InstancePtr != NULL);
|
||||
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_FRAME_SIZE_DATA, Data);
|
||||
}
|
||||
|
||||
u32 XRtds2gpu_Get_frame_size(XRtds2gpu *InstancePtr) {
|
||||
u32 Data;
|
||||
|
||||
Xil_AssertNonvoid(InstancePtr != NULL);
|
||||
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_FRAME_SIZE_DATA);
|
||||
return Data;
|
||||
}
|
||||
|
||||
u32 XRtds2gpu_Get_status(XRtds2gpu *InstancePtr) {
|
||||
u32 Data;
|
||||
|
||||
Xil_AssertNonvoid(InstancePtr != NULL);
|
||||
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_STATUS_DATA);
|
||||
return Data;
|
||||
}
|
||||
|
||||
u32 XRtds2gpu_Get_status_vld(XRtds2gpu *InstancePtr) {
|
||||
u32 Data;
|
||||
|
||||
Xil_AssertNonvoid(InstancePtr != NULL);
|
||||
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_STATUS_CTRL);
|
||||
return Data & 0x1;
|
||||
}
|
||||
|
||||
void XRtds2gpu_InterruptGlobalEnable(XRtds2gpu *InstancePtr) {
|
||||
Xil_AssertVoid(InstancePtr != NULL);
|
||||
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_GIE, 1);
|
||||
}
|
||||
|
||||
void XRtds2gpu_InterruptGlobalDisable(XRtds2gpu *InstancePtr) {
|
||||
Xil_AssertVoid(InstancePtr != NULL);
|
||||
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_GIE, 0);
|
||||
}
|
||||
|
||||
void XRtds2gpu_InterruptEnable(XRtds2gpu *InstancePtr, u32 Mask) {
|
||||
u32 Register;
|
||||
|
||||
Xil_AssertVoid(InstancePtr != NULL);
|
||||
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
Register = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_IER);
|
||||
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_IER, Register | Mask);
|
||||
}
|
||||
|
||||
void XRtds2gpu_InterruptDisable(XRtds2gpu *InstancePtr, u32 Mask) {
|
||||
u32 Register;
|
||||
|
||||
Xil_AssertVoid(InstancePtr != NULL);
|
||||
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
Register = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_IER);
|
||||
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_IER, Register & (~Mask));
|
||||
}
|
||||
|
||||
void XRtds2gpu_InterruptClear(XRtds2gpu *InstancePtr, u32 Mask) {
|
||||
Xil_AssertVoid(InstancePtr != NULL);
|
||||
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_ISR, Mask);
|
||||
}
|
||||
|
||||
u32 XRtds2gpu_InterruptGetEnabled(XRtds2gpu *InstancePtr) {
|
||||
Xil_AssertNonvoid(InstancePtr != NULL);
|
||||
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
return XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_IER);
|
||||
}
|
||||
|
||||
u32 XRtds2gpu_InterruptGetStatus(XRtds2gpu *InstancePtr) {
|
||||
Xil_AssertNonvoid(InstancePtr != NULL);
|
||||
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
|
||||
|
||||
return XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_ISR);
|
||||
}
|
||||
|
|
@ -143,7 +143,7 @@ AxiStreamSwitchFactory::configureJson(IpCore& ip, json_t* json_ip)
|
|||
|
||||
auto logger = getLogger();
|
||||
|
||||
auto& axiSwitch = reinterpret_cast<AxiStreamSwitch&>(ip);
|
||||
auto& axiSwitch = dynamic_cast<AxiStreamSwitch&>(ip);
|
||||
|
||||
if(json_unpack(json_ip, "{ s: i }", "num_ports", &axiSwitch.num_ports) != 0) {
|
||||
logger->error("Cannot parse 'num_ports'");
|
||||
|
|
|
@ -36,7 +36,9 @@ whitelist = [
|
|||
[ 'xilinx.com', 'ip', 'axi_gpio' ],
|
||||
[ 'xilinx.com', 'ip', 'axi_bram_ctrl' ],
|
||||
[ 'xilinx.com', 'ip', 'axis_data_fifo' ],
|
||||
[ 'xilinx.com', 'ip', 'axi_pcie' ]
|
||||
[ 'xilinx.com', 'ip', 'axi_pcie' ],
|
||||
[ 'xilinx.com', 'hls', 'rtds2gpu' ],
|
||||
[ 'xilinx.com', 'hls', 'mem' ]
|
||||
]
|
||||
|
||||
# List of VLNI ids of AXI4-Stream infrastructure IP cores which do not alter data
|
||||
|
|
|
@ -27,11 +27,13 @@ set(SOURCES
|
|||
dma.cpp
|
||||
fifo.cpp
|
||||
rtds.cpp
|
||||
rtds2gpu.cpp
|
||||
timer.cpp
|
||||
)
|
||||
|
||||
if(CMAKE_CUDA_COMPILER)
|
||||
list(APPEND SOURCES gpu.cpp)
|
||||
enable_language(CUDA)
|
||||
list(APPEND SOURCES gpu.cpp gpu_kernels.cu)
|
||||
endif()
|
||||
|
||||
add_executable(unit-tests ${SOURCES})
|
||||
|
|
|
@ -46,7 +46,7 @@ Test(fpga, dma, .description = "DMA")
|
|||
|
||||
logger->info("Testing {}", *ip);
|
||||
|
||||
auto dma = reinterpret_cast<villas::fpga::ip::Dma&>(*ip);
|
||||
auto dma = dynamic_cast<villas::fpga::ip::Dma&>(*ip);
|
||||
|
||||
if(not dma.loopbackPossible()) {
|
||||
logger->info("Loopback test not possible for {}", *ip);
|
||||
|
|
|
@ -46,7 +46,7 @@ Test(fpga, fifo, .description = "FIFO")
|
|||
|
||||
logger->info("Testing {}", *ip);
|
||||
|
||||
auto fifo = reinterpret_cast<villas::fpga::ip::Fifo&>(*ip);
|
||||
auto fifo = dynamic_cast<villas::fpga::ip::Fifo&>(*ip);
|
||||
|
||||
if(not fifo.connectLoopback()) {
|
||||
continue;
|
||||
|
|
|
@ -62,7 +62,7 @@ Test(fpga, gpu_dma, .description = "GPU DMA tests")
|
|||
|
||||
logger->info("Testing {}", *ip);
|
||||
|
||||
auto bram = reinterpret_cast<villas::fpga::ip::Bram*>(ip.get());
|
||||
auto bram = dynamic_cast<villas::fpga::ip::Bram*>(ip.get());
|
||||
cr_assert_not_null(bram, "Couldn't find BRAM");
|
||||
|
||||
count++;
|
||||
|
|
75
fpga/tests/gpu_kernels.cu
Normal file
75
fpga/tests/gpu_kernels.cu
Normal file
|
@ -0,0 +1,75 @@
|
|||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <villas/gpu.hpp>
|
||||
#include <villas/fpga/ips/rtds2gpu.hpp>
|
||||
|
||||
|
||||
|
||||
__global__ void
|
||||
gpu_rtds_rtt_kernel(volatile uint32_t* dataIn, volatile reg_doorbell_t* doorbellIn,
|
||||
volatile uint32_t* dataOut, volatile villas::fpga::ip::ControlRegister* controlRegister,
|
||||
int* run)
|
||||
{
|
||||
printf("[gpu] gpu kernel go\n");
|
||||
|
||||
printf("dataIn: %p\n", dataIn);
|
||||
printf("doorbellIn: %p\n", doorbellIn);
|
||||
printf("dataOut: %p\n", dataOut);
|
||||
printf("control: %p\n", controlRegister);
|
||||
printf("run: %p\n", run);
|
||||
|
||||
// *run = reinterpret_cast<bool*>(malloc(sizeof(bool)));
|
||||
// **run = true;
|
||||
|
||||
uint32_t last_seq;
|
||||
while(*run) {
|
||||
// wait for data
|
||||
// printf("[gpu] wait for data, last_seq=%u\n", last_seq);
|
||||
while(not (doorbellIn->is_valid and (last_seq != doorbellIn->seq_nr)) and *run);
|
||||
// printf("doorbell: 0x%08x\n", doorbellIn->value);
|
||||
|
||||
last_seq = doorbellIn->seq_nr;
|
||||
|
||||
// printf("[gpu] copy data\n");
|
||||
for(size_t i = 0; i < doorbellIn->count; i++) {
|
||||
dataOut[i] = dataIn[i];
|
||||
}
|
||||
|
||||
// reset doorbell
|
||||
// printf("[gpu] reset doorbell\n");
|
||||
// doorbellIn->value = 0;
|
||||
|
||||
// printf("[gpu] signal go for gpu2rtds\n");
|
||||
controlRegister->ap_start = 1;
|
||||
}
|
||||
|
||||
printf("kernel done\n");
|
||||
}
|
||||
|
||||
static int* run = nullptr;
|
||||
|
||||
void gpu_rtds_rtt_start(volatile uint32_t* dataIn, volatile reg_doorbell_t* doorbellIn,
|
||||
volatile uint32_t* dataOut, volatile villas::fpga::ip::ControlRegister* controlRegister)
|
||||
{
|
||||
printf("run: %p\n", run);
|
||||
if(run == nullptr) {
|
||||
run = (int*)malloc(sizeof(uint32_t));
|
||||
cudaHostRegister(run, sizeof(uint32_t), 0);
|
||||
}
|
||||
printf("run: %p\n", run);
|
||||
|
||||
|
||||
*run = 1;
|
||||
gpu_rtds_rtt_kernel<<<1, 1>>>(dataIn, doorbellIn, dataOut, controlRegister, run);
|
||||
printf("[cpu] kernel launched\n");
|
||||
}
|
||||
|
||||
void gpu_rtds_rtt_stop()
|
||||
{
|
||||
*run = 0;
|
||||
cudaDeviceSynchronize();
|
||||
}
|
|
@ -80,8 +80,12 @@ Test(fpga, rtds, .description = "RTDS")
|
|||
auto dmaMaster = dma->getMasterPort(dma->mm2sPort);
|
||||
auto dmaSlave = dma->getSlavePort(dma->s2mmPort);
|
||||
|
||||
rtds->connect(rtdsMaster, dmaSlave);
|
||||
dma->connect(dmaMaster, rtdsSlave);
|
||||
// rtds->connect(*rtds);
|
||||
// logger->info("loopback");
|
||||
// while(1);
|
||||
|
||||
// rtds->connect(rtdsMaster, dmaSlave);
|
||||
// dma->connect(dmaMaster, rtdsSlave);
|
||||
|
||||
auto mem = villas::HostRam::getAllocator().allocate<int32_t>(0x100 / sizeof(int32_t));
|
||||
|
||||
|
|
349
fpga/tests/rtds2gpu.cpp
Normal file
349
fpga/tests/rtds2gpu.cpp
Normal file
|
@ -0,0 +1,349 @@
|
|||
/** FIFO unit test.
|
||||
*
|
||||
* @file
|
||||
* @author Steffen Vogel <stvogel@eonerc.rwth-aachen.de>
|
||||
* @copyright 2017, Steffen Vogel
|
||||
* @license GNU General Public License (version 3)
|
||||
*
|
||||
* VILLASfpga
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*********************************************************************************/
|
||||
|
||||
#include <criterion/criterion.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <villas/log.hpp>
|
||||
#include <villas/memory.hpp>
|
||||
#include <villas/fpga/card.hpp>
|
||||
|
||||
#include <villas/fpga/ips/rtds2gpu.hpp>
|
||||
#include <villas/fpga/ips/gpu2rtds.hpp>
|
||||
#include <villas/fpga/ips/switch.hpp>
|
||||
#include <villas/fpga/ips/dma.hpp>
|
||||
#include <villas/fpga/ips/rtds.hpp>
|
||||
#include <villas/gpu.hpp>
|
||||
|
||||
#include "global.hpp"
|
||||
|
||||
|
||||
static constexpr size_t SAMPLE_SIZE = 4;
|
||||
static constexpr size_t SAMPLE_COUNT = 1;
|
||||
static constexpr size_t FRAME_SIZE = SAMPLE_COUNT * SAMPLE_SIZE;
|
||||
|
||||
static constexpr size_t DOORBELL_OFFSET = SAMPLE_COUNT;
|
||||
static constexpr size_t DATA_OFFSET = 0;
|
||||
|
||||
static void dumpMem(const uint32_t* addr, size_t len)
|
||||
{
|
||||
const size_t bytesPerLine = 16;
|
||||
const size_t lines = (len) / bytesPerLine + 1;
|
||||
const uint8_t* buf = reinterpret_cast<const uint8_t*>(addr);
|
||||
|
||||
size_t bytesRead = 0;
|
||||
|
||||
for(size_t line = 0; line < lines; line++) {
|
||||
const unsigned base = line * bytesPerLine;
|
||||
printf("0x%04x: ", base);
|
||||
|
||||
for(size_t i = 0; i < bytesPerLine && bytesRead < len; i++) {
|
||||
printf("0x%02x ", buf[base + i]);
|
||||
bytesRead++;
|
||||
}
|
||||
puts("");
|
||||
}
|
||||
}
|
||||
|
||||
Test(fpga, rtds2gpu_loopback_dma, .description = "Rtds2Gpu")
|
||||
{
|
||||
auto logger = loggerGetOrCreate("unittest:rtds2gpu");
|
||||
|
||||
for(auto& ip : state.cards.front()->ips) {
|
||||
if(*ip != villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:rtds2gpu:"))
|
||||
continue;
|
||||
|
||||
logger->info("Testing {}", *ip);
|
||||
|
||||
|
||||
/* Collect neccessary IPs */
|
||||
|
||||
auto rtds2gpu = dynamic_cast<villas::fpga::ip::Rtds2Gpu&>(*ip);
|
||||
|
||||
auto axiSwitch = dynamic_cast<villas::fpga::ip::AxiStreamSwitch*>(
|
||||
state.cards.front()->lookupIp(villas::fpga::Vlnv("xilinx.com:ip:axis_switch:")));
|
||||
|
||||
auto dma = dynamic_cast<villas::fpga::ip::Dma*>(
|
||||
state.cards.front()->lookupIp(villas::fpga::Vlnv("xilinx.com:ip:axi_dma:")));
|
||||
|
||||
auto gpu2rtds = dynamic_cast<villas::fpga::ip::Gpu2Rtds*>(
|
||||
state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:gpu2rtds:")));
|
||||
|
||||
auto rtds = dynamic_cast<villas::fpga::ip::Rtds*>(
|
||||
state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:user:rtds_axis:")));
|
||||
|
||||
|
||||
cr_assert_not_null(axiSwitch, "No AXI switch IP found");
|
||||
cr_assert_not_null(dma, "No DMA IP found");
|
||||
cr_assert_not_null(gpu2rtds, "No Gpu2Rtds IP found");
|
||||
cr_assert_not_null(rtds, "RTDS IP not found");
|
||||
|
||||
rtds2gpu.dump(spdlog::level::debug);
|
||||
gpu2rtds->dump(spdlog::level::debug);
|
||||
|
||||
|
||||
/* Allocate and prepare memory */
|
||||
|
||||
// allocate space for all samples and doorbell register
|
||||
auto dmaMemSrc = villas::HostDmaRam::getAllocator(0).allocate<uint32_t>(SAMPLE_COUNT + 1);
|
||||
auto dmaMemDst = villas::HostDmaRam::getAllocator(0).allocate<uint32_t>(SAMPLE_COUNT + 1);
|
||||
auto dmaMemDst2 = villas::HostDmaRam::getAllocator(0).allocate<uint32_t>(SAMPLE_COUNT + 1);
|
||||
|
||||
|
||||
memset(&dmaMemSrc, 0x11, dmaMemSrc.getMemoryBlock().getSize());
|
||||
memset(&dmaMemDst, 0x55, dmaMemDst.getMemoryBlock().getSize());
|
||||
memset(&dmaMemDst2, 0x77, dmaMemDst2.getMemoryBlock().getSize());
|
||||
|
||||
const uint32_t* dataSrc = &dmaMemSrc[DATA_OFFSET];
|
||||
const uint32_t* dataDst = &dmaMemDst[DATA_OFFSET];
|
||||
const uint32_t* dataDst2 = &dmaMemDst2[0];
|
||||
|
||||
dumpMem(dataSrc, dmaMemSrc.getMemoryBlock().getSize());
|
||||
dumpMem(dataDst, dmaMemDst.getMemoryBlock().getSize());
|
||||
dumpMem(dataDst2, dmaMemDst2.getMemoryBlock().getSize());
|
||||
|
||||
|
||||
// connect AXI Stream from DMA to Rtds2Gpu IP
|
||||
cr_assert(dma->connect(rtds2gpu));
|
||||
|
||||
cr_assert(rtds2gpu.startOnce(dmaMemDst.getMemoryBlock(), SAMPLE_COUNT, DATA_OFFSET * 4, DOORBELL_OFFSET * 4),
|
||||
"Preparing Rtds2Gpu IP failed");
|
||||
|
||||
cr_assert(dma->write(dmaMemSrc.getMemoryBlock(), FRAME_SIZE),
|
||||
"Starting DMA MM2S transfer failed");
|
||||
|
||||
cr_assert(dma->writeComplete(),
|
||||
"DMA failed");
|
||||
|
||||
while(not rtds2gpu.isFinished());
|
||||
|
||||
const uint32_t* doorbellDst = &dmaMemDst[DOORBELL_OFFSET];
|
||||
rtds2gpu.dump(spdlog::level::info);
|
||||
rtds2gpu.dumpDoorbell(*doorbellDst);
|
||||
|
||||
cr_assert(memcmp(dataSrc, dataDst, FRAME_SIZE) == 0, "Memory not equal");
|
||||
|
||||
|
||||
for(size_t i = 0; i < SAMPLE_COUNT; i++) {
|
||||
gpu2rtds->registerFrames[i] = dmaMemDst[i];
|
||||
}
|
||||
|
||||
|
||||
// connect AXI Stream from Gpu2Rtds IP to DMA
|
||||
cr_assert(gpu2rtds->connect(*dma));
|
||||
|
||||
cr_assert(dma->read(dmaMemDst2.getMemoryBlock(), FRAME_SIZE),
|
||||
"Starting DMA S2MM transfer failed");
|
||||
|
||||
cr_assert(gpu2rtds->startOnce(SAMPLE_COUNT),
|
||||
"Preparing Gpu2Rtds IP failed");
|
||||
|
||||
cr_assert(dma->readComplete(),
|
||||
"DMA failed");
|
||||
|
||||
while(not gpu2rtds->isFinished());
|
||||
|
||||
cr_assert(memcmp(dataSrc, dataDst2, FRAME_SIZE) == 0, "Memory not equal");
|
||||
|
||||
dumpMem(dataSrc, dmaMemSrc.getMemoryBlock().getSize());
|
||||
dumpMem(dataDst, dmaMemDst.getMemoryBlock().getSize());
|
||||
dumpMem(dataDst2, dmaMemDst2.getMemoryBlock().getSize());
|
||||
|
||||
logger->info(TXT_GREEN("Passed"));
|
||||
}
|
||||
}
|
||||
|
||||
Test(fpga, rtds2gpu_rtt_cpu, .description = "Rtds2Gpu RTT via CPU")
|
||||
{
|
||||
auto logger = loggerGetOrCreate("unittest:rtds2gpu");
|
||||
|
||||
/* Collect neccessary IPs */
|
||||
|
||||
auto gpu2rtds = dynamic_cast<villas::fpga::ip::Gpu2Rtds*>(
|
||||
state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:gpu2rtds:")));
|
||||
|
||||
auto rtds2gpu = dynamic_cast<villas::fpga::ip::Rtds2Gpu*>(
|
||||
state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:rtds2gpu:")));
|
||||
|
||||
cr_assert_not_null(gpu2rtds, "No Gpu2Rtds IP found");
|
||||
cr_assert_not_null(rtds2gpu, "No Rtds2Gpu IP not found");
|
||||
|
||||
for(auto& ip : state.cards.front()->ips) {
|
||||
if(*ip != villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:user:rtds_axis:"))
|
||||
continue;
|
||||
|
||||
auto& rtds = dynamic_cast<villas::fpga::ip::Rtds&>(*ip);
|
||||
logger->info("Testing {}", rtds);
|
||||
|
||||
auto dmaRam = villas::HostDmaRam::getAllocator().allocate<uint32_t>(SAMPLE_COUNT + 1);
|
||||
uint32_t* data = &dmaRam[DATA_OFFSET];
|
||||
uint32_t* doorbell = &dmaRam[DOORBELL_OFFSET];
|
||||
|
||||
// TEST: rtds loopback via switch, this should always work and have RTT=1
|
||||
//cr_assert(rtds.connect(rtds));
|
||||
//logger->info("loopback");
|
||||
//while(1);
|
||||
|
||||
cr_assert(rtds.connect(*rtds2gpu));
|
||||
cr_assert(gpu2rtds->connect(rtds));
|
||||
|
||||
|
||||
for(size_t i = 1; i <= 10000; ) {
|
||||
rtds2gpu->doorbellReset(*doorbell);
|
||||
rtds2gpu->startOnce(dmaRam.getMemoryBlock(), SAMPLE_COUNT, DATA_OFFSET * 4, DOORBELL_OFFSET * 4);
|
||||
|
||||
// Wait by polling rtds2gpu IP or ...
|
||||
// while(not rtds2gpu->isFinished());
|
||||
|
||||
// Wait by polling (local) doorbell register (= just memory)
|
||||
while(not rtds2gpu->doorbellIsValid(*doorbell));
|
||||
|
||||
|
||||
// copy samples to gpu2rtds IP
|
||||
for(size_t i = 0; i < SAMPLE_COUNT; i++) {
|
||||
gpu2rtds->registerFrames[i] = data[i];
|
||||
}
|
||||
|
||||
// Waiting for gpu2rtds is not strictly required
|
||||
gpu2rtds->startOnce(SAMPLE_COUNT);
|
||||
//while(not gpu2rtds->isFinished());
|
||||
|
||||
if(i % 1000 == 0) {
|
||||
logger->info("Successful iterations {}, data {}", i, data[0]);
|
||||
rtds2gpu->dump();
|
||||
rtds2gpu->dumpDoorbell(data[1]);
|
||||
}
|
||||
}
|
||||
|
||||
logger->info(TXT_GREEN("Passed"));
|
||||
}
|
||||
}
|
||||
|
||||
void gpu_rtds_rtt_start(volatile uint32_t* dataIn, volatile reg_doorbell_t* doorbellIn,
|
||||
volatile uint32_t* dataOut, volatile villas::fpga::ip::ControlRegister* controlRegister);
|
||||
|
||||
void gpu_rtds_rtt_stop();
|
||||
|
||||
Test(fpga, rtds2gpu_rtt_gpu, .description = "Rtds2Gpu RTT via GPU")
|
||||
{
|
||||
auto logger = loggerGetOrCreate("unittest:rtds2gpu");
|
||||
|
||||
/* Collect neccessary IPs */
|
||||
|
||||
auto gpu2rtds = dynamic_cast<villas::fpga::ip::Gpu2Rtds*>(
|
||||
state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:gpu2rtds:")));
|
||||
|
||||
auto rtds2gpu = dynamic_cast<villas::fpga::ip::Rtds2Gpu*>(
|
||||
state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:rtds2gpu:")));
|
||||
|
||||
cr_assert_not_null(gpu2rtds, "No Gpu2Rtds IP found");
|
||||
cr_assert_not_null(rtds2gpu, "No Rtds2Gpu IP not found");
|
||||
|
||||
villas::Plugin* plugin = villas::Plugin::lookup(villas::Plugin::Type::Gpu, "");
|
||||
auto gpuPlugin = dynamic_cast<villas::gpu::GpuFactory*>(plugin);
|
||||
cr_assert_not_null(gpuPlugin, "No GPU plugin found");
|
||||
|
||||
auto gpus = gpuPlugin->make();
|
||||
cr_assert(gpus.size() > 0, "No GPUs found");
|
||||
|
||||
// just get first cpu
|
||||
auto& gpu = gpus.front();
|
||||
|
||||
// allocate memory on GPU and make accessible by to PCIe/FPGA
|
||||
auto gpuRam = gpu->getAllocator().allocate<uint32_t>(SAMPLE_COUNT + 1);
|
||||
cr_assert(gpu->makeAccessibleToPCIeAndVA(gpuRam.getMemoryBlock()));
|
||||
|
||||
// make Gpu2Rtds IP register memory on FPGA accessible to GPU
|
||||
cr_assert(gpu->makeAccessibleFromPCIeOrHostRam(gpu2rtds->getRegisterMemory()));
|
||||
|
||||
auto tr = gpu->translate(gpuRam.getMemoryBlock());
|
||||
|
||||
auto dataIn = reinterpret_cast<uint32_t*>(tr.getLocalAddr(DATA_OFFSET * sizeof(uint32_t)));
|
||||
auto doorbellIn = reinterpret_cast<reg_doorbell_t*>(tr.getLocalAddr(DOORBELL_OFFSET * sizeof(uint32_t)));
|
||||
|
||||
|
||||
auto gpu2rtdsRegisters = gpu->translate(gpu2rtds->getRegisterMemory());
|
||||
|
||||
auto frameRegister = reinterpret_cast<uint32_t*>(gpu2rtdsRegisters.getLocalAddr(gpu2rtds->registerFrameOffset));
|
||||
auto controlRegister = reinterpret_cast<villas::fpga::ip::ControlRegister*>(gpu2rtdsRegisters.getLocalAddr(gpu2rtds->registerControlAddr));
|
||||
|
||||
// auto doorbellInCpu = reinterpret_cast<reg_doorbell_t*>(&gpuRam[DOORBELL_OFFSET]);
|
||||
|
||||
for(auto& ip : state.cards.front()->ips) {
|
||||
if(*ip != villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:user:rtds_axis:"))
|
||||
continue;
|
||||
|
||||
auto& rtds = dynamic_cast<villas::fpga::ip::Rtds&>(*ip);
|
||||
logger->info("Testing {}", rtds);
|
||||
|
||||
|
||||
// TEST: rtds loopback via switch, this should always work and have RTT=1
|
||||
//cr_assert(rtds.connect(rtds));
|
||||
//logger->info("loopback");
|
||||
//while(1);
|
||||
|
||||
cr_assert(rtds.connect(*rtds2gpu));
|
||||
cr_assert(gpu2rtds->connect(rtds));
|
||||
|
||||
// launch once so they are configured
|
||||
cr_assert(rtds2gpu->startOnce(gpuRam.getMemoryBlock(), SAMPLE_COUNT, DATA_OFFSET * 4, DOORBELL_OFFSET * 4));
|
||||
cr_assert(gpu2rtds->startOnce(SAMPLE_COUNT));
|
||||
|
||||
rtds2gpu->setAutoRestart(true);
|
||||
rtds2gpu->start();
|
||||
|
||||
logger->info("GPU RTT RTDS");
|
||||
|
||||
std::string dummy;
|
||||
|
||||
// logger->info("Press enter to proceed");
|
||||
// std::cin >> dummy;
|
||||
|
||||
gpu_rtds_rtt_start(dataIn, doorbellIn, frameRegister, controlRegister);
|
||||
|
||||
// while(1) {
|
||||
// cr_assert(rtds2gpu->startOnce(gpuRam.getMemoryBlock(), SAMPLE_COUNT, DATA_OFFSET * 4, DOORBELL_OFFSET * 4));
|
||||
// }
|
||||
|
||||
// for(int i = 0; i < 10000; i++) {
|
||||
// while(not doorbellInCpu->is_valid);
|
||||
// logger->debug("received data");
|
||||
// }
|
||||
|
||||
// logger->info("Press enter to cancel");
|
||||
// std::cin >> dummy;
|
||||
|
||||
while(1) {
|
||||
sleep(1);
|
||||
// logger->debug("Current sequence number: {}", doorbellInCpu->seq_nr);
|
||||
logger->debug("Still running");
|
||||
}
|
||||
|
||||
gpu_rtds_rtt_stop();
|
||||
|
||||
|
||||
|
||||
logger->info(TXT_GREEN("Passed"));
|
||||
}
|
||||
}
|
|
@ -45,7 +45,7 @@ Test(fpga, timer, .description = "Timer Counter")
|
|||
|
||||
count++;
|
||||
|
||||
auto timer = reinterpret_cast<villas::fpga::ip::Timer&>(*ip);
|
||||
auto timer = dynamic_cast<villas::fpga::ip::Timer&>(*ip);
|
||||
|
||||
logger->info("Test simple waiting");
|
||||
timer.start(timer.getFrequency() / 10);
|
||||
|
|
Loading…
Add table
Reference in a new issue