1
0
Fork 0
mirror of https://git.rwth-aachen.de/acs/public/villas/node/ synced 2025-03-09 00:00:00 +01:00

Merge branch 'feature/hls-rtds2gpu' into develop

This commit is contained in:
Steffen Vogel 2018-08-21 13:51:32 +02:00
commit 2112038d70
32 changed files with 1900 additions and 164 deletions

@ -1 +1 @@
Subproject commit dd7d75d0aab3801d65f9ff757d82f47f705514af
Subproject commit 9747c6ead6dedff943dbf22ce74e40e9b2622514

View file

@ -33,90 +33,6 @@
"hier_0_axi_dma_axi_dma_0": {
"vlnv": "xilinx.com:ip:axi_dma:7.1",
"memory-view": {
"M_AXI_SG": {
"bram_0_axi_bram_ctrl_0": {
"Mem0": {
"baseaddr": 0,
"highaddr": 8191,
"size": 8192
}
},
"hier_0_axi_dma_axi_dma_1": {
"Reg": {
"baseaddr": 8192,
"highaddr": 12287,
"size": 4096
}
},
"hier_0_axi_dma_axi_dma_0": {
"Reg": {
"baseaddr": 12288,
"highaddr": 16383,
"size": 4096
}
},
"timer_0_axi_timer_0": {
"Reg": {
"baseaddr": 16384,
"highaddr": 20479,
"size": 4096
}
},
"hier_0_axis_interconnect_0_axis_interconnect_0_xbar": {
"Reg": {
"baseaddr": 20480,
"highaddr": 24575,
"size": 4096
}
},
"hier_0_axi_fifo_mm_s_0": {
"Mem0": {
"baseaddr": 24576,
"highaddr": 28671,
"size": 4096
},
"Mem1": {
"baseaddr": 49152,
"highaddr": 57343,
"size": 8192
}
},
"pcie_0_axi_reset_0": {
"Reg": {
"baseaddr": 28672,
"highaddr": 32767,
"size": 4096
}
},
"hier_0_rtds_axis_0": {
"reg0": {
"baseaddr": 32768,
"highaddr": 36863,
"size": 4096
}
},
"hier_0_hls_dft_0": {
"Reg": {
"baseaddr": 36864,
"highaddr": 40959,
"size": 4096
}
},
"pcie_0_axi_pcie_intc_0": {
"Reg": {
"baseaddr": 45056,
"highaddr": 49151,
"size": 4096
}
},
"pcie_0_axi_pcie_0": {
"CTL0": {
"baseaddr": 268435456,
"highaddr": 536870911,
"size": 268435456
}
}
},
"M_AXI_MM2S": {
"pcie_0_axi_pcie_0": {
"BAR0": {
@ -134,17 +50,115 @@
"size": 4294967296
}
}
},
"M_AXI_SG": {
"hier_0_axi_dma_axi_dma_0": {
"Reg": {
"baseaddr": 4096,
"highaddr": 8191,
"size": 4096
}
},
"hier_0_axi_dma_axi_dma_1": {
"Reg": {
"baseaddr": 8192,
"highaddr": 12287,
"size": 4096
}
},
"hier_0_axi_fifo_mm_s_0": {
"Mem0": {
"baseaddr": 12288,
"highaddr": 16383,
"size": 4096
},
"Mem1": {
"baseaddr": 16384,
"highaddr": 24575,
"size": 8192
}
},
"pcie_0_axi_pcie_intc_0": {
"Reg": {
"baseaddr": 24576,
"highaddr": 28671,
"size": 4096
}
},
"pcie_0_axi_reset_0": {
"Reg": {
"baseaddr": 28672,
"highaddr": 32767,
"size": 4096
}
},
"timer_0_axi_timer_0": {
"Reg": {
"baseaddr": 32768,
"highaddr": 36863,
"size": 4096
}
},
"hier_0_hls_dft_0": {
"Reg": {
"baseaddr": 36864,
"highaddr": 40959,
"size": 4096
}
},
"hier_0_rtds_axis_0": {
"reg0": {
"baseaddr": 40960,
"highaddr": 45055,
"size": 4096
}
},
"hier_0_axis_interconnect_0_axis_interconnect_0_xbar": {
"Reg": {
"baseaddr": 45056,
"highaddr": 49151,
"size": 4096
}
},
"bram_0_axi_bram_ctrl_0": {
"Mem0": {
"baseaddr": 49152,
"highaddr": 57343,
"size": 8192
}
},
"hier_0_rtds2gpu_0": {
"Reg": {
"baseaddr": 57344,
"highaddr": 61439,
"size": 4096
}
},
"hier_0_gpu2rtds_0": {
"Reg": {
"baseaddr": 61440,
"highaddr": 65535,
"size": 4096
}
},
"pcie_0_axi_pcie_0": {
"CTL0": {
"baseaddr": 268435456,
"highaddr": 536870911,
"size": 268435456
}
}
}
},
"ports": [
{
"role": "master",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:1",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S01_AXIS",
"name": "MM2S"
},
{
"role": "slave",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:1",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M01_AXIS",
"name": "S2MM"
}
],
@ -178,12 +192,12 @@
"ports": [
{
"role": "master",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:6",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S06_AXIS",
"name": "MM2S"
},
{
"role": "slave",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:6",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M06_AXIS",
"name": "S2MM"
}
],
@ -197,12 +211,12 @@
"ports": [
{
"role": "master",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:2",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S02_AXIS",
"name": "STR_TXD"
},
{
"role": "slave",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:2",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M02_AXIS",
"name": "STR_RXD"
}
],
@ -210,43 +224,143 @@
"interrupt": "pcie_0_axi_pcie_intc_0:2"
}
},
"hier_0_axis_data_fifo_0": {
"vlnv": "xilinx.com:ip:axis_data_fifo:1.1",
"ports": [
{
"role": "master",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S03_AXIS",
"name": "AXIS"
},
{
"role": "slave",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M03_AXIS",
"name": "AXIS"
}
]
},
"hier_0_axis_data_fifo_1": {
"vlnv": "xilinx.com:ip:axis_data_fifo:1.1",
"ports": [
{
"role": "master",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S04_AXIS",
"name": "AXIS"
},
{
"role": "slave",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M04_AXIS",
"name": "AXIS"
}
]
},
"hier_0_axis_interconnect_0_axis_interconnect_0_xbar": {
"vlnv": "xilinx.com:ip:axis_switch:1.1",
"ports": [
{
"role": "slave",
"target": "hier_0_rtds_axis_0:m_axis",
"name": "S00_AXIS"
},
{
"role": "master",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:3",
"name": "M03_AXIS"
"target": "hier_0_rtds_axis_0:s_axis",
"name": "M00_AXIS"
},
{
"role": "slave",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:3",
"target": "hier_0_axi_dma_axi_dma_0:MM2S",
"name": "S01_AXIS"
},
{
"role": "master",
"target": "hier_0_axi_dma_axi_dma_0:S2MM",
"name": "M01_AXIS"
},
{
"role": "slave",
"target": "hier_0_axi_fifo_mm_s_0:STR_TXD",
"name": "S02_AXIS"
},
{
"role": "master",
"target": "hier_0_axi_fifo_mm_s_0:STR_RXD",
"name": "M02_AXIS"
},
{
"role": "slave",
"target": "hier_0_axis_data_fifo_0:AXIS",
"name": "S03_AXIS"
},
{
"role": "master",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:4",
"target": "hier_0_axis_data_fifo_0:AXIS",
"name": "M03_AXIS"
},
{
"role": "slave",
"target": "hier_0_axis_data_fifo_1:AXIS",
"name": "S04_AXIS"
},
{
"role": "master",
"target": "hier_0_axis_data_fifo_1:AXIS",
"name": "M04_AXIS"
},
{
"role": "slave",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:4",
"name": "S04_AXIS"
"target": "hier_0_hls_dft_0:output_r",
"name": "S05_AXIS"
},
{
"role": "master",
"target": "hier_0_hls_dft_0:input_r",
"name": "M05_AXIS"
},
{
"role": "slave",
"target": "hier_0_axi_dma_axi_dma_1:MM2S",
"name": "S06_AXIS"
},
{
"role": "master",
"target": "hier_0_axi_dma_axi_dma_1:S2MM",
"name": "M06_AXIS"
},
{
"role": "slave",
"target": "hier_0_gpu2rtds_0:rtds_output",
"name": "S07_AXIS"
},
{
"role": "master",
"target": "hier_0_rtds2gpu_0:rtds_input",
"name": "M07_AXIS"
}
],
"num_ports": 7
"num_ports": 8
},
"hier_0_gpu2rtds_0": {
"vlnv": "acs.eonerc.rwth-aachen.de:hls:gpu2rtds:1.0",
"ports": [
{
"role": "master",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S07_AXIS",
"name": "rtds_output"
}
]
},
"hier_0_hls_dft_0": {
"vlnv": "acs.eonerc.rwth-aachen.de:hls:hls_dft:1.1",
"ports": [
{
"role": "master",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:5",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S05_AXIS",
"name": "output_r"
},
{
"role": "slave",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:5",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M05_AXIS",
"name": "input_r"
}
],
@ -254,17 +368,38 @@
"interrupt": "pcie_0_axi_pcie_intc_0:1"
}
},
"hier_0_rtds2gpu_0": {
"vlnv": "acs.eonerc.rwth-aachen.de:hls:rtds2gpu:1.1",
"memory-view": {
"m_axi_axi_mm": {
"pcie_0_axi_pcie_0": {
"BAR0": {
"baseaddr": 0,
"highaddr": 4294967295,
"size": 4294967296
}
}
}
},
"ports": [
{
"role": "slave",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M07_AXIS",
"name": "rtds_input"
}
]
},
"hier_0_rtds_axis_0": {
"vlnv": "acs.eonerc.rwth-aachen.de:user:rtds_axis:1.0",
"ports": [
{
"role": "master",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:0",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:S00_AXIS",
"name": "m_axis"
},
{
"role": "slave",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:0",
"target": "hier_0_axis_interconnect_0_axis_interconnect_0_xbar:M00_AXIS",
"name": "s_axis"
}
],
@ -278,11 +413,11 @@
"vlnv": "xilinx.com:ip:axi_pcie:2.8",
"memory-view": {
"M_AXI": {
"bram_0_axi_bram_ctrl_0": {
"Mem0": {
"baseaddr": 0,
"hier_0_axi_dma_axi_dma_0": {
"Reg": {
"baseaddr": 4096,
"highaddr": 8191,
"size": 8192
"size": 4096
}
},
"hier_0_axi_dma_axi_dma_1": {
@ -292,37 +427,23 @@
"size": 4096
}
},
"hier_0_axi_dma_axi_dma_0": {
"Reg": {
"hier_0_axi_fifo_mm_s_0": {
"Mem0": {
"baseaddr": 12288,
"highaddr": 16383,
"size": 4096
}
},
"timer_0_axi_timer_0": {
"Reg": {
},
"Mem1": {
"baseaddr": 16384,
"highaddr": 20479,
"size": 4096
}
},
"hier_0_axis_interconnect_0_axis_interconnect_0_xbar": {
"Reg": {
"baseaddr": 20480,
"highaddr": 24575,
"size": 4096
"size": 8192
}
},
"hier_0_axi_fifo_mm_s_0": {
"Mem0": {
"pcie_0_axi_pcie_intc_0": {
"Reg": {
"baseaddr": 24576,
"highaddr": 28671,
"size": 4096
},
"Mem1": {
"baseaddr": 49152,
"highaddr": 57343,
"size": 8192
}
},
"pcie_0_axi_reset_0": {
@ -332,8 +453,8 @@
"size": 4096
}
},
"hier_0_rtds_axis_0": {
"reg0": {
"timer_0_axi_timer_0": {
"Reg": {
"baseaddr": 32768,
"highaddr": 36863,
"size": 4096
@ -346,13 +467,41 @@
"size": 4096
}
},
"pcie_0_axi_pcie_intc_0": {
"hier_0_rtds_axis_0": {
"reg0": {
"baseaddr": 40960,
"highaddr": 45055,
"size": 4096
}
},
"hier_0_axis_interconnect_0_axis_interconnect_0_xbar": {
"Reg": {
"baseaddr": 45056,
"highaddr": 49151,
"size": 4096
}
},
"bram_0_axi_bram_ctrl_0": {
"Mem0": {
"baseaddr": 49152,
"highaddr": 57343,
"size": 8192
}
},
"hier_0_rtds2gpu_0": {
"Reg": {
"baseaddr": 57344,
"highaddr": 61439,
"size": 4096
}
},
"hier_0_gpu2rtds_0": {
"Reg": {
"baseaddr": 61440,
"highaddr": 65535,
"size": 4096
}
},
"pcie_0_axi_pcie_0": {
"CTL0": {
"baseaddr": 268435456,

View file

@ -71,8 +71,8 @@ public:
const std::string& port,
bool isMaster)
{
for(auto& [vertexId, vertex] : vertices) {
(void) vertexId;
for(auto& vertexEntry : vertices) {
auto& vertex = vertexEntry.second;
if(vertex->nodeName == node and vertex->portName == port and vertex->isMaster == isMaster)
return vertex;
}
@ -86,7 +86,7 @@ public:
};
class IpNode : public IpCore {
class IpNode : public virtual IpCore {
public:
friend class IpNodeFactory;

View file

@ -43,7 +43,10 @@ public:
bool init();
bool reset();
// memory-mapped to stream (MM2S)
bool write(const MemoryBlock& mem, size_t len);
// stream to memory-mapped (S2MM)
bool read(const MemoryBlock& mem, size_t len);
size_t writeComplete()

View file

@ -0,0 +1,87 @@
#pragma once
#include <villas/memory.hpp>
#include <villas/fpga/ip_node.hpp>
#include <villas/fpga/ips/hls.hpp>
#include <villas/fpga/ips/rtds2gpu/register_types.hpp>
#include <villas/fpga/ips/rtds2gpu/xgpu2rtds_hw.h>
namespace villas {
namespace fpga {
namespace ip {
class Gpu2Rtds : public IpNode, public Hls
{
public:
friend class Gpu2RtdsFactory;
bool init();
void dump(spdlog::level::level_enum logLevel = spdlog::level::info);
bool startOnce(size_t frameSize);
size_t getMaxFrameSize();
const StreamVertex&
getDefaultMasterPort() const
{ return getMasterPort(rtdsOutputStreamPort); }
MemoryBlock
getRegisterMemory() const
{ return MemoryBlock(0, 1 << 10, getAddressSpaceId(registerMemory)); }
private:
bool updateStatus();
public:
static constexpr const char* rtdsOutputStreamPort = "rtds_output";
struct StatusControlRegister { uint32_t
status_ap_vld : 1,
_res : 31;
};
using StatusRegister = axilite_reg_status_t;
static constexpr uintptr_t registerStatusOffset = XGPU2RTDS_CTRL_ADDR_STATUS_DATA;
static constexpr uintptr_t registerStatusCtrlOffset = XGPU2RTDS_CTRL_ADDR_STATUS_CTRL;
static constexpr uintptr_t registerFrameSizeOffset = XGPU2RTDS_CTRL_ADDR_FRAME_SIZE_DATA;
static constexpr uintptr_t registerFrameOffset = XGPU2RTDS_CTRL_ADDR_FRAME_BASE;
static constexpr uintptr_t registerFrameLength = XGPU2RTDS_CTRL_DEPTH_FRAME;
public:
StatusRegister* registerStatus;
StatusControlRegister* registerStatusCtrl;
uint32_t* registerFrameSize;
uint32_t* registerFrames;
size_t maxFrameSize;
bool started;
};
class Gpu2RtdsFactory : public IpNodeFactory {
public:
Gpu2RtdsFactory();
IpCore* create()
{ return new Gpu2Rtds; }
std::string
getName() const
{ return "Gpu2Rtds"; }
std::string
getDescription() const
{ return "HLS Gpu2Rtds IP"; }
Vlnv getCompatibleVlnv() const
{ return {"acs.eonerc.rwth-aachen.de:hls:gpu2rtds:"}; }
};
} // namespace ip
} // namespace fpga
} // namespace villas

View file

@ -0,0 +1,137 @@
#pragma once
#include <villas/memory.hpp>
#include <villas/fpga/ip_node.hpp>
namespace villas {
namespace fpga {
namespace ip {
class Hls : public virtual IpCore
{
public:
virtual bool init()
{
auto& registers = addressTranslations.at(registerMemory);
controlRegister = reinterpret_cast<ControlRegister*>(registers.getLocalAddr(registerControlAddr));
globalIntRegister = reinterpret_cast<GlobalIntRegister*>(registers.getLocalAddr(registerGlobalIntEnableAddr));
ipIntEnableRegister = reinterpret_cast<IpIntRegister*>(registers.getLocalAddr(registerIntEnableAddr));
ipIntStatusRegister = reinterpret_cast<IpIntRegister*>(registers.getLocalAddr(registerIntStatusAddr));
setAutoRestart(false);
setGlobalInterrupt(false);
return true;
}
bool start()
{
controlRegister->ap_start = true;
running = true;
return true;
}
virtual bool isFinished()
{ updateRunningStatus(); return !running; }
bool isRunning()
{ updateRunningStatus(); return running; }
void setAutoRestart(bool enabled) const
{ controlRegister->auto_restart = enabled; }
void setGlobalInterrupt(bool enabled) const
{ globalIntRegister->globalInterruptEnable = enabled; }
void setReadyInterrupt(bool enabled) const
{ ipIntEnableRegister->ap_ready = enabled; }
void setDoneInterrupt(bool enabled) const
{ ipIntEnableRegister->ap_done = enabled; }
bool isIdleBit() const
{ return controlRegister->ap_idle; }
bool isReadyBit() const
{ return controlRegister->ap_ready; }
/// Warning: the corresponding bit is cleared on read of the register, so if
/// not used correctly, this function may never return true. Only use this
/// function if you really know what you are doing!
bool isDoneBit() const
{ return controlRegister->ap_done; }
bool isAutoRestartBit() const
{ return controlRegister->auto_restart; }
private:
void updateRunningStatus()
{
if(running and isIdleBit())
running = false;
}
protected:
/* Memory block handling */
static constexpr const char* registerMemory = "Reg";
virtual std::list<MemoryBlockName> getMemoryBlocks() const
{ return { registerMemory }; }
public:
/* Register definitions */
static constexpr uintptr_t registerControlAddr = 0x00;
static constexpr uintptr_t registerGlobalIntEnableAddr = 0x04;
static constexpr uintptr_t registerIntEnableAddr = 0x08;
static constexpr uintptr_t registerIntStatusAddr = 0x0c;
union ControlRegister {
uint32_t value;
struct { uint32_t
ap_start : 1,
ap_done : 1,
ap_idle : 1,
ap_ready : 1,
_res1 : 3,
auto_restart : 1,
_res2 : 24;
};
};
struct GlobalIntRegister { uint32_t
globalInterruptEnable : 1,
_res : 31;
};
struct IpIntRegister { uint32_t
ap_done : 1,
ap_ready : 1,
_res : 30;
};
protected:
ControlRegister* controlRegister;
GlobalIntRegister* globalIntRegister;
IpIntRegister* ipIntEnableRegister;
IpIntRegister* ipIntStatusRegister;
bool running;
};
} // namespace ip
} // namespace fpga
} // namespace villas

View file

@ -44,6 +44,14 @@ public:
std::list<std::string> getMemoryBlocks() const
{ return { registerMemory }; }
const StreamVertex&
getDefaultSlavePort() const
{ return getSlavePort(slavePort); }
const StreamVertex&
getDefaultMasterPort() const
{ return getMasterPort(masterPort); }
private:
static constexpr const char registerMemory[] = "reg0";
static constexpr const char* irqTs = "irq_ts";

View file

@ -0,0 +1,96 @@
#pragma once
#include <villas/memory.hpp>
#include <villas/fpga/ip_node.hpp>
#include <villas/fpga/ips/hls.hpp>
#include "rtds2gpu/xrtds2gpu.h"
#include "rtds2gpu/register_types.hpp"
namespace villas {
namespace fpga {
namespace ip {
union ControlRegister {
uint32_t value;
struct { uint32_t
ap_start : 1,
ap_done : 1,
ap_idle : 1,
ap_ready : 1,
_res1 : 3,
auto_restart : 1,
_res2 : 24;
};
};
class Rtds2Gpu : public IpNode, public Hls
{
public:
friend class Rtds2GpuFactory;
bool init();
void dump(spdlog::level::level_enum logLevel = spdlog::level::info);
bool startOnce(const MemoryBlock& mem, size_t frameSize, size_t dataOffset, size_t doorbellOffset);
size_t getMaxFrameSize();
void dumpDoorbell(uint32_t doorbellRegister) const;
bool doorbellIsValid(const uint32_t& doorbellRegister) const
{ return reinterpret_cast<const reg_doorbell_t&>(doorbellRegister).is_valid; }
void doorbellReset(uint32_t& doorbellRegister) const
{ doorbellRegister = 0; }
static constexpr const char* registerMemory = "Reg";
std::list<MemoryBlockName> getMemoryBlocks() const
{ return { registerMemory }; }
const StreamVertex&
getDefaultSlavePort() const
{ return getSlavePort(rtdsInputStreamPort); }
private:
bool updateStatus();
private:
static constexpr const char* axiInterface = "m_axi_axi_mm";
static constexpr const char* rtdsInputStreamPort = "rtds_input";
XRtds2gpu xInstance;
axilite_reg_status_t status;
size_t maxFrameSize;
bool started;
};
class Rtds2GpuFactory : public IpNodeFactory {
public:
Rtds2GpuFactory();
IpCore* create()
{ return new Rtds2Gpu; }
std::string
getName() const
{ return "Rtds2Gpu"; }
std::string
getDescription() const
{ return "HLS RTDS2GPU IP"; }
Vlnv getCompatibleVlnv() const
{ return {"acs.eonerc.rwth-aachen.de:hls:rtds2gpu:"}; }
};
} // namespace ip
} // namespace fpga
} // namespace villas

View file

@ -0,0 +1,57 @@
#ifndef REGISTER_TYPES_H
#define REGISTER_TYPES_H
#include <stdint.h>
#include <cstddef>
#include <cstdint>
union axilite_reg_status_t {
uint32_t value;
struct {
uint32_t
last_seq_nr : 16,
last_count : 6,
max_frame_size : 6,
invalid_frame_size : 1,
frame_too_short : 1,
frame_too_long : 1,
is_running : 1;
};
};
union reg_doorbell_t {
uint32_t value;
struct {
uint32_t
seq_nr : 16,
count : 6,
is_valid : 1;
};
constexpr reg_doorbell_t() : value(0) {}
};
template<size_t N, typename T = uint32_t>
struct Rtds2GpuMemoryBuffer {
// this type is only for memory interpretation, it makes no sense to create
// an instance so it's forbidden
Rtds2GpuMemoryBuffer() = delete;
// T can be a more complex type that wraps multiple values
static constexpr size_t rawValueCount = N * (sizeof(T) / 4);
// As of C++14, offsetof() is not working for non-standard layout types (i.e.
// composed of non-POD members). This might work in C++17 though.
// More info: https://gist.github.com/graphitemaster/494f21190bb2c63c5516
//static constexpr size_t doorbellOffset = offsetof(Rtds2GpuMemoryBuffer, doorbell);
//static constexpr size_t dataOffset = offsetof(Rtds2GpuMemoryBuffer, data);
// HACK: This might break horribly, let's just hope C++17 will be there soon
static constexpr size_t dataOffset = 0;
static constexpr size_t doorbellOffset = N * sizeof(Rtds2GpuMemoryBuffer::data);
T data[N];
reg_doorbell_t doorbell;
};
#endif // REGISTER_TYPES_H

View file

@ -0,0 +1,53 @@
// ==============================================================
// File generated by Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC
// Version: 2017.3
// Copyright (C) 1986-2017 Xilinx, Inc. All Rights Reserved.
//
// ==============================================================
// CTRL
// 0x00 : Control signals
// bit 0 - ap_start (Read/Write/COH)
// bit 1 - ap_done (Read/COR)
// bit 2 - ap_idle (Read)
// bit 3 - ap_ready (Read)
// bit 7 - auto_restart (Read/Write)
// others - reserved
// 0x04 : Global Interrupt Enable Register
// bit 0 - Global Interrupt Enable (Read/Write)
// others - reserved
// 0x08 : IP Interrupt Enable Register (Read/Write)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x0c : IP Interrupt Status Register (Read/TOW)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x10 : Data signal of frame_size
// bit 31~0 - frame_size[31:0] (Read/Write)
// 0x14 : reserved
// 0x80 : Data signal of status
// bit 31~0 - status[31:0] (Read)
// 0x84 : Control signal of status
// bit 0 - status_ap_vld (Read/COR)
// others - reserved
// 0x40 ~
// 0x7f : Memory 'frame' (16 * 32b)
// Word n : bit [31:0] - frame[n]
// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake)
#define XGPU2RTDS_CTRL_ADDR_AP_CTRL 0x00
#define XGPU2RTDS_CTRL_ADDR_GIE 0x04
#define XGPU2RTDS_CTRL_ADDR_IER 0x08
#define XGPU2RTDS_CTRL_ADDR_ISR 0x0c
#define XGPU2RTDS_CTRL_ADDR_FRAME_SIZE_DATA 0x10
#define XGPU2RTDS_CTRL_BITS_FRAME_SIZE_DATA 32
#define XGPU2RTDS_CTRL_ADDR_STATUS_DATA 0x80
#define XGPU2RTDS_CTRL_BITS_STATUS_DATA 32
#define XGPU2RTDS_CTRL_ADDR_STATUS_CTRL 0x84
#define XGPU2RTDS_CTRL_ADDR_FRAME_BASE 0x40
#define XGPU2RTDS_CTRL_ADDR_FRAME_HIGH 0x7f
#define XGPU2RTDS_CTRL_WIDTH_FRAME 32
#define XGPU2RTDS_CTRL_DEPTH_FRAME 16

View file

@ -0,0 +1,113 @@
// ==============================================================
// File generated by Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC
// Version: 2017.3
// Copyright (C) 1986-2017 Xilinx, Inc. All Rights Reserved.
//
// ==============================================================
#ifndef XRTDS2GPU_H
#define XRTDS2GPU_H
#ifdef __cplusplus
extern "C" {
#endif
/***************************** Include Files *********************************/
#ifndef __linux__
#include "xil_types.h"
#include "xil_assert.h"
#include "xstatus.h"
#include "xil_io.h"
#else
#include <stdint.h>
#include <assert.h>
#include <dirent.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include <stddef.h>
#endif
#include "xrtds2gpu_hw.h"
/**************************** Type Definitions ******************************/
#ifdef __linux__
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
#else
typedef struct {
u16 DeviceId;
u32 Ctrl_BaseAddress;
} XRtds2gpu_Config;
#endif
typedef struct {
u32 Ctrl_BaseAddress;
u32 IsReady;
} XRtds2gpu;
/***************** Macros (Inline Functions) Definitions *********************/
#ifndef __linux__
#define XRtds2gpu_WriteReg(BaseAddress, RegOffset, Data) \
Xil_Out32((BaseAddress) + (RegOffset), (u32)(Data))
#define XRtds2gpu_ReadReg(BaseAddress, RegOffset) \
Xil_In32((BaseAddress) + (RegOffset))
#else
#define XRtds2gpu_WriteReg(BaseAddress, RegOffset, Data) \
*(volatile u32*)((BaseAddress) + (RegOffset)) = (u32)(Data)
#define XRtds2gpu_ReadReg(BaseAddress, RegOffset) \
*(volatile u32*)((BaseAddress) + (RegOffset))
#define Xil_AssertVoid(expr) assert(expr)
#define Xil_AssertNonvoid(expr) assert(expr)
#define XST_SUCCESS 0
#define XST_DEVICE_NOT_FOUND 2
#define XST_OPEN_DEVICE_FAILED 3
#define XIL_COMPONENT_IS_READY 1
#endif
/************************** Function Prototypes *****************************/
#ifndef __linux__
int XRtds2gpu_Initialize(XRtds2gpu *InstancePtr, u16 DeviceId);
XRtds2gpu_Config* XRtds2gpu_LookupConfig(u16 DeviceId);
int XRtds2gpu_CfgInitialize(XRtds2gpu *InstancePtr, XRtds2gpu_Config *ConfigPtr);
#else
int XRtds2gpu_Initialize(XRtds2gpu *InstancePtr, const char* InstanceName);
int XRtds2gpu_Release(XRtds2gpu *InstancePtr);
#endif
void XRtds2gpu_Start(XRtds2gpu *InstancePtr);
u32 XRtds2gpu_IsDone(XRtds2gpu *InstancePtr);
u32 XRtds2gpu_IsIdle(XRtds2gpu *InstancePtr);
u32 XRtds2gpu_IsReady(XRtds2gpu *InstancePtr);
void XRtds2gpu_EnableAutoRestart(XRtds2gpu *InstancePtr);
void XRtds2gpu_DisableAutoRestart(XRtds2gpu *InstancePtr);
void XRtds2gpu_Set_baseaddr(XRtds2gpu *InstancePtr, u32 Data);
u32 XRtds2gpu_Get_baseaddr(XRtds2gpu *InstancePtr);
void XRtds2gpu_Set_data_offset(XRtds2gpu *InstancePtr, u32 Data);
u32 XRtds2gpu_Get_data_offset(XRtds2gpu *InstancePtr);
void XRtds2gpu_Set_doorbell_offset(XRtds2gpu *InstancePtr, u32 Data);
u32 XRtds2gpu_Get_doorbell_offset(XRtds2gpu *InstancePtr);
void XRtds2gpu_Set_frame_size(XRtds2gpu *InstancePtr, u32 Data);
u32 XRtds2gpu_Get_frame_size(XRtds2gpu *InstancePtr);
u32 XRtds2gpu_Get_status(XRtds2gpu *InstancePtr);
u32 XRtds2gpu_Get_status_vld(XRtds2gpu *InstancePtr);
void XRtds2gpu_InterruptGlobalEnable(XRtds2gpu *InstancePtr);
void XRtds2gpu_InterruptGlobalDisable(XRtds2gpu *InstancePtr);
void XRtds2gpu_InterruptEnable(XRtds2gpu *InstancePtr, u32 Mask);
void XRtds2gpu_InterruptDisable(XRtds2gpu *InstancePtr, u32 Mask);
void XRtds2gpu_InterruptClear(XRtds2gpu *InstancePtr, u32 Mask);
u32 XRtds2gpu_InterruptGetEnabled(XRtds2gpu *InstancePtr);
u32 XRtds2gpu_InterruptGetStatus(XRtds2gpu *InstancePtr);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -0,0 +1,61 @@
// ==============================================================
// File generated by Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC
// Version: 2017.3
// Copyright (C) 1986-2017 Xilinx, Inc. All Rights Reserved.
//
// ==============================================================
// CTRL
// 0x00 : Control signals
// bit 0 - ap_start (Read/Write/COH)
// bit 1 - ap_done (Read/COR)
// bit 2 - ap_idle (Read)
// bit 3 - ap_ready (Read)
// bit 7 - auto_restart (Read/Write)
// others - reserved
// 0x04 : Global Interrupt Enable Register
// bit 0 - Global Interrupt Enable (Read/Write)
// others - reserved
// 0x08 : IP Interrupt Enable Register (Read/Write)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x0c : IP Interrupt Status Register (Read/TOW)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x10 : Data signal of baseaddr
// bit 31~0 - baseaddr[31:0] (Read/Write)
// 0x14 : reserved
// 0x18 : Data signal of data_offset
// bit 31~0 - data_offset[31:0] (Read/Write)
// 0x1c : reserved
// 0x20 : Data signal of doorbell_offset
// bit 31~0 - doorbell_offset[31:0] (Read/Write)
// 0x24 : reserved
// 0x28 : Data signal of frame_size
// bit 31~0 - frame_size[31:0] (Read/Write)
// 0x2c : reserved
// 0x30 : Data signal of status
// bit 31~0 - status[31:0] (Read)
// 0x34 : Control signal of status
// bit 0 - status_ap_vld (Read/COR)
// others - reserved
// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake)
#define XRTDS2GPU_CTRL_ADDR_AP_CTRL 0x00
#define XRTDS2GPU_CTRL_ADDR_GIE 0x04
#define XRTDS2GPU_CTRL_ADDR_IER 0x08
#define XRTDS2GPU_CTRL_ADDR_ISR 0x0c
#define XRTDS2GPU_CTRL_ADDR_BASEADDR_DATA 0x10
#define XRTDS2GPU_CTRL_BITS_BASEADDR_DATA 32
#define XRTDS2GPU_CTRL_ADDR_DATA_OFFSET_DATA 0x18
#define XRTDS2GPU_CTRL_BITS_DATA_OFFSET_DATA 32
#define XRTDS2GPU_CTRL_ADDR_DOORBELL_OFFSET_DATA 0x20
#define XRTDS2GPU_CTRL_BITS_DOORBELL_OFFSET_DATA 32
#define XRTDS2GPU_CTRL_ADDR_FRAME_SIZE_DATA 0x28
#define XRTDS2GPU_CTRL_BITS_FRAME_SIZE_DATA 32
#define XRTDS2GPU_CTRL_ADDR_STATUS_DATA 0x30
#define XRTDS2GPU_CTRL_BITS_STATUS_DATA 32
#define XRTDS2GPU_CTRL_ADDR_STATUS_CTRL 0x34

View file

@ -34,8 +34,16 @@ set(SOURCES
ips/dma.cpp
ips/bram.cpp
ips/rtds.cpp
ips/rtds2gpu/rtds2gpu.cpp
ips/rtds2gpu/xrtds2gpu.c
ips/rtds2gpu/gpu2rtds.cpp
)
# we don't have much influence on drivers generated by Xilinx, so ignore warnings
set_source_files_properties(ips/rtds2gpu/xrtds2gpu.c
PROPERTIES COMPILE_FLAGS -Wno-int-to-pointer-cast)
include(FindPkgConfig)
pkg_check_modules(JANSSON jansson)

View file

@ -374,6 +374,13 @@ void Gpu::memcpyKernel(const MemoryBlock& src, const MemoryBlock& dst, size_t si
cudaDeviceSynchronize();
}
MemoryTranslation
Gpu::translate(const MemoryBlock& dst)
{
auto& mm = MemoryManager::get();
return mm.getTranslation(masterPciEAddrSpaceId, dst.getAddrSpaceId());
}
std::unique_ptr<villas::MemoryBlock, villas::MemoryBlock::deallocator_fn>
GpuAllocator::allocateBlock(size_t size)
@ -381,29 +388,53 @@ GpuAllocator::allocateBlock(size_t size)
cudaSetDevice(gpu.gpuId);
void* addr;
if(cudaSuccess != cudaMalloc(&addr, size)) {
logger->error("cudaMalloc(..., size={}) failed", size);
throw std::bad_alloc();
}
auto& mm = MemoryManager::get();
// assemble name for this block
std::stringstream name;
name << std::showbase << std::hex << reinterpret_cast<uintptr_t>(addr);
// search for an existing chunk that has enough free memory
auto chunk = std::find_if(chunks.begin(), chunks.end(), [&](const auto& chunk) {
return chunk->getAvailableMemory() >= size;
});
auto blockName = mm.getSlaveAddrSpaceName(getName(), name.str());
auto blockAddrSpaceId = mm.getOrCreateAddressSpace(blockName);
const auto localAddr = reinterpret_cast<uintptr_t>(addr);
std::unique_ptr<MemoryBlock, MemoryBlock::deallocator_fn>
mem(new MemoryBlock(localAddr, size, blockAddrSpaceId), this->free);
if(chunk != chunks.end()) {
logger->debug("Found existing chunk that can host the requested block");
insertMemoryBlock(*mem);
return (*chunk)->allocateBlock(size);
gpu.makeAccessibleToPCIeAndVA(*mem);
} else {
// allocate a new chunk
return mem;
// rounded-up multiple of GPU page size
const size_t chunkSize = size - (size & (GpuPageSize - 1)) + GpuPageSize;
logger->debug("Allocate new chunk of {:#x} bytes", chunkSize);
if(cudaSuccess != cudaMalloc(&addr, chunkSize)) {
logger->error("cudaMalloc(..., size={}) failed", chunkSize);
throw std::bad_alloc();
}
// assemble name for this block
std::stringstream name;
name << std::showbase << std::hex << reinterpret_cast<uintptr_t>(addr);
auto blockName = mm.getSlaveAddrSpaceName(getName(), name.str());
auto blockAddrSpaceId = mm.getOrCreateAddressSpace(blockName);
const auto localAddr = reinterpret_cast<uintptr_t>(addr);
std::unique_ptr<MemoryBlock, MemoryBlock::deallocator_fn>
mem(new MemoryBlock(localAddr, chunkSize, blockAddrSpaceId), this->free);
insertMemoryBlock(*mem);
// already make accessible to CPU
gpu.makeAccessibleToPCIeAndVA(*mem);
// create a new allocator to manage the chunk and push to chunk list
chunks.push_front(std::make_unique<LinearAllocator>(std::move(mem)));
// call again, this time there's a large enough chunk
return allocateBlock(size);
}
}

View file

@ -58,6 +58,9 @@ public:
void memcpyKernel(const MemoryBlock& src, const MemoryBlock& dst, size_t size);
MemoryTranslation
translate(const MemoryBlock& dst);
private:
bool registerIoMemory(const MemoryBlock& mem);
bool registerHostMemory(const MemoryBlock& mem);
@ -81,6 +84,8 @@ private:
class GpuAllocator : public BaseAllocator<GpuAllocator> {
public:
static constexpr size_t GpuPageSize = 64UL << 10;
GpuAllocator(Gpu& gpu);
std::string getName() const;
@ -90,6 +95,8 @@ public:
private:
Gpu& gpu;
// TODO: replace by multimap (key is available memory)
std::list<std::unique_ptr<LinearAllocator>> chunks;
};
class GpuFactory : public Plugin {

View file

@ -41,7 +41,7 @@ IpNode::streamGraph;
bool
IpNodeFactory::configureJson(IpCore& ip, json_t* json_ip)
{
auto& ipNode = reinterpret_cast<IpNode&>(ip);
auto& ipNode = dynamic_cast<IpNode&>(ip);
auto logger = getLogger();
json_t* json_ports = json_object_get(json_ip, "ports");
@ -216,7 +216,7 @@ IpNode::connectLoopback()
logger->debug("switch at: {}", portMaster->nodeName);
// TODO: verify this is really a switch!
auto axiStreamSwitch = reinterpret_cast<ip::AxiStreamSwitch*>(
auto axiStreamSwitch = dynamic_cast<ip::AxiStreamSwitch*>(
card->lookupIp(portMaster->nodeName));
if(axiStreamSwitch == nullptr) {

View file

@ -31,7 +31,7 @@ static BramFactory factory;
bool
BramFactory::configureJson(IpCore& ip, json_t* json_ip)
{
auto& bram = reinterpret_cast<Bram&>(ip);
auto& bram = dynamic_cast<Bram&>(ip);
if(json_unpack(json_ip, "{ s: i }", "size", &bram.size) != 0) {
getLogger()->error("Cannot parse 'size'");

View file

@ -165,7 +165,7 @@ Dma::write(const MemoryBlock& mem, size_t len)
mem.getAddrSpaceId());
const void* buf = reinterpret_cast<void*>(translation.getLocalAddr(0));
logger->debug("Write to address: {:p}", buf);
logger->debug("Write to stream from address {:p}", buf);
return hasScatterGather() ? writeSG(buf, len) : writeSimple(buf, len);
}
@ -180,7 +180,7 @@ Dma::read(const MemoryBlock& mem, size_t len)
mem.getAddrSpaceId());
void* buf = reinterpret_cast<void*>(translation.getLocalAddr(0));
logger->debug("Read from address: {:p}", buf);
logger->debug("Read from stream and write to address {:p}", buf);
return hasScatterGather() ? readSG(buf, len) : readSimple(buf, len);
}

View file

@ -125,7 +125,7 @@ bool
AxiPciExpressBridgeFactory::configureJson(IpCore& ip, json_t* json_ip)
{
auto logger = getLogger();
auto& pcie = reinterpret_cast<AxiPciExpressBridge&>(ip);
auto& pcie = dynamic_cast<AxiPciExpressBridge&>(ip);
for(auto barType : std::list<std::string>{"axi_bars", "pcie_bars"}) {
json_t* json_bars = json_object_get(json_ip, barType.c_str());

View file

@ -0,0 +1,142 @@
#include <unistd.h>
#include <cstring>
#include <villas/memory_manager.hpp>
#include <villas/fpga/ips/gpu2rtds.hpp>
#include "log.hpp"
namespace villas {
namespace fpga {
namespace ip {
static Gpu2RtdsFactory factory;
bool Gpu2Rtds::init()
{
Hls::init();
auto& registers = addressTranslations.at(registerMemory);
registerStatus = reinterpret_cast<StatusRegister*>(registers.getLocalAddr(registerStatusOffset));
registerStatusCtrl = reinterpret_cast<StatusControlRegister*>(registers.getLocalAddr(registerStatusCtrlOffset));
registerFrameSize = reinterpret_cast<uint32_t*>(registers.getLocalAddr(registerFrameSizeOffset));
registerFrames = reinterpret_cast<uint32_t*>(registers.getLocalAddr(registerFrameOffset));
maxFrameSize = getMaxFrameSize();
logger->info("Max. frame size supported: {}", maxFrameSize);
return true;
}
bool
Gpu2Rtds::startOnce(size_t frameSize)
{
*registerFrameSize = frameSize;
start();
return true;
}
void Gpu2Rtds::dump(spdlog::level::level_enum logLevel)
{
const auto frame_size = *registerFrameSize;
auto status = *registerStatus;
logger->log(logLevel, "Gpu2Rtds registers:");
logger->log(logLevel, " Frame size (words): {:#x}", frame_size);
logger->log(logLevel, " Status: {:#x}", status.value);
logger->log(logLevel, " Running: {}", (status.is_running ? "yes" : "no"));
logger->log(logLevel, " Frame too short: {}", (status.frame_too_short ? "yes" : "no"));
logger->log(logLevel, " Frame too long: {}", (status.frame_too_long ? "yes" : "no"));
logger->log(logLevel, " Frame size invalid: {}", (status.invalid_frame_size ? "yes" : "no"));
logger->log(logLevel, " Last count: {}", status.last_count);
logger->log(logLevel, " Last seq. number: {}", status.last_seq_nr);
logger->log(logLevel, " Max. frame size: {}", status.max_frame_size);
}
//bool Gpu2Rtds::startOnce(const MemoryBlock& mem, size_t frameSize, size_t dataOffset, size_t doorbellOffset)
//{
// auto& mm = MemoryManager::get();
// if(frameSize > maxFrameSize) {
// logger->error("Requested frame size of {} exceeds max. frame size of {}",
// frameSize, maxFrameSize);
// return false;
// }
// auto translationFromIp = mm.getTranslation(
// getMasterAddrSpaceByInterface(axiInterface),
// mem.getAddrSpaceId());
// // set address of memory block in HLS IP
// XGpu2Rtds_Set_baseaddr(&xInstance, translationFromIp.getLocalAddr(0));
// XGpu2Rtds_Set_doorbell_offset(&xInstance, doorbellOffset);
// XGpu2Rtds_Set_data_offset(&xInstance, dataOffset);
// XGpu2Rtds_Set_frame_size(&xInstance, frameSize);
// // prepare memory with all zeroes
// auto translationFromProcess = mm.getTranslationFromProcess(mem.getAddrSpaceId());
// auto memory = reinterpret_cast<void*>(translationFromProcess.getLocalAddr(0));
// memset(memory, 0, mem.getSize());
// // start IP
// return start();
//}
//bool
//Gpu2Rtds::updateStatus()
//{
// if(not XGpu2Rtds_Get_status_vld(&xInstance))
// return false;
// status.value = XGpu2Rtds_Get_status(&xInstance);
// return true;
//}
size_t
Gpu2Rtds::getMaxFrameSize()
{
*registerFrameSize = 0;
start();
while(not isFinished());
while(not registerStatusCtrl->status_ap_vld);
axilite_reg_status_t status = *registerStatus;
// logger->debug("(*registerStatus).max_frame_size: {}", (*registerStatus).max_frame_size);
// logger->debug("status.max_frame_size: {}", status.max_frame_size);
// assert(status.max_frame_size == (*registerStatus).max_frame_size);
return status.max_frame_size;
}
//void
//Gpu2Rtds::dumpDoorbell(uint32_t doorbellRegister) const
//{
// auto& doorbell = reinterpret_cast<reg_doorbell_t&>(doorbellRegister);
// logger->info("Doorbell register: {:#08x}", doorbell.value);
// logger->info(" Valid: {}", (doorbell.is_valid ? "yes" : "no"));
// logger->info(" Count: {}", doorbell.count);
// logger->info(" Seq. number: {}", doorbell.seq_nr);
//}
Gpu2RtdsFactory::Gpu2RtdsFactory() :
IpNodeFactory(getName())
{
}
} // namespace ip
} // namespace fpga
} // namespace villas

View file

@ -0,0 +1,131 @@
#include <unistd.h>
#include <cstring>
#include <villas/memory_manager.hpp>
#include <villas/fpga/ips/rtds2gpu.hpp>
#include "log.hpp"
namespace villas {
namespace fpga {
namespace ip {
static Rtds2GpuFactory factory;
bool Rtds2Gpu::init()
{
Hls::init();
xInstance.IsReady = XIL_COMPONENT_IS_READY;
xInstance.Ctrl_BaseAddress = getBaseAddr(registerMemory);
status.value = 0;
started = false;
// maxFrameSize = getMaxFrameSize();
maxFrameSize = 16;
logger->info("Max. frame size supported: {}", maxFrameSize);
return true;
}
void Rtds2Gpu::dump(spdlog::level::level_enum logLevel)
{
const auto baseaddr = XRtds2gpu_Get_baseaddr(&xInstance);
const auto data_offset = XRtds2gpu_Get_data_offset(&xInstance);
const auto doorbell_offset = XRtds2gpu_Get_doorbell_offset(&xInstance);
const auto frame_size = XRtds2gpu_Get_frame_size(&xInstance);
logger->log(logLevel, "Rtds2Gpu registers (IP base {:#x}):", xInstance.Ctrl_BaseAddress);
logger->log(logLevel, " Base address (bytes): {:#x}", baseaddr);
logger->log(logLevel, " Doorbell offset (bytes): {:#x}", doorbell_offset);
logger->log(logLevel, " Data offset (bytes): {:#x}", data_offset);
logger->log(logLevel, " Frame size (words): {:#x}", frame_size);
logger->log(logLevel, " Status: {:#x}", status.value);
logger->log(logLevel, " Running: {}", (status.is_running ? "yes" : "no"));
logger->log(logLevel, " Frame too short: {}", (status.frame_too_short ? "yes" : "no"));
logger->log(logLevel, " Frame too long: {}", (status.frame_too_long ? "yes" : "no"));
logger->log(logLevel, " Frame size invalid: {}", (status.invalid_frame_size ? "yes" : "no"));
logger->log(logLevel, " Last count: {}", status.last_count);
logger->log(logLevel, " Last seq. number: {}", status.last_seq_nr);
logger->log(logLevel, " Max. frame size: {}", status.max_frame_size);
}
bool Rtds2Gpu::startOnce(const MemoryBlock& mem, size_t frameSize, size_t dataOffset, size_t doorbellOffset)
{
auto& mm = MemoryManager::get();
if(frameSize > maxFrameSize) {
logger->error("Requested frame size of {} exceeds max. frame size of {}",
frameSize, maxFrameSize);
return false;
}
auto translationFromIp = mm.getTranslation(
getMasterAddrSpaceByInterface(axiInterface),
mem.getAddrSpaceId());
// set address of memory block in HLS IP
XRtds2gpu_Set_baseaddr(&xInstance, translationFromIp.getLocalAddr(0));
XRtds2gpu_Set_doorbell_offset(&xInstance, doorbellOffset);
XRtds2gpu_Set_data_offset(&xInstance, dataOffset);
XRtds2gpu_Set_frame_size(&xInstance, frameSize);
// prepare memory with all zeroes
auto translationFromProcess = mm.getTranslationFromProcess(mem.getAddrSpaceId());
auto memory = reinterpret_cast<void*>(translationFromProcess.getLocalAddr(0));
memset(memory, 0, mem.getSize());
// start IP
return start();
}
bool
Rtds2Gpu::updateStatus()
{
if(not XRtds2gpu_Get_status_vld(&xInstance))
return false;
status.value = XRtds2gpu_Get_status(&xInstance);
return true;
}
size_t
Rtds2Gpu::getMaxFrameSize()
{
XRtds2gpu_Set_frame_size(&xInstance, 0);
start();
while(not isFinished());
updateStatus();
return status.max_frame_size;
}
void
Rtds2Gpu::dumpDoorbell(uint32_t doorbellRegister) const
{
auto& doorbell = reinterpret_cast<reg_doorbell_t&>(doorbellRegister);
logger->info("Doorbell register: {:#08x}", doorbell.value);
logger->info(" Valid: {}", (doorbell.is_valid ? "yes" : "no"));
logger->info(" Count: {}", doorbell.count);
logger->info(" Seq. number: {}", doorbell.seq_nr);
}
Rtds2GpuFactory::Rtds2GpuFactory() :
IpNodeFactory(getName())
{
}
} // namespace ip
} // namespace fpga
} // namespace villas

View file

@ -0,0 +1,221 @@
// ==============================================================
// File generated by Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC
// Version: 2017.3
// Copyright (C) 1986-2017 Xilinx, Inc. All Rights Reserved.
//
// ==============================================================
/***************************** Include Files *********************************/
#include <villas/fpga/ips/rtds2gpu/xrtds2gpu.h>
/************************** Function Implementation *************************/
#ifndef __linux__
int XRtds2gpu_CfgInitialize(XRtds2gpu *InstancePtr, XRtds2gpu_Config *ConfigPtr) {
Xil_AssertNonvoid(InstancePtr != NULL);
Xil_AssertNonvoid(ConfigPtr != NULL);
InstancePtr->Ctrl_BaseAddress = ConfigPtr->Ctrl_BaseAddress;
InstancePtr->IsReady = XIL_COMPONENT_IS_READY;
return XST_SUCCESS;
}
#endif
void XRtds2gpu_Start(XRtds2gpu *InstancePtr) {
u32 Data;
Xil_AssertVoid(InstancePtr != NULL);
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL) & 0x80;
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL, Data | 0x01);
}
u32 XRtds2gpu_IsDone(XRtds2gpu *InstancePtr) {
u32 Data;
Xil_AssertNonvoid(InstancePtr != NULL);
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL);
return (Data >> 1) & 0x1;
}
u32 XRtds2gpu_IsIdle(XRtds2gpu *InstancePtr) {
u32 Data;
Xil_AssertNonvoid(InstancePtr != NULL);
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL);
return (Data >> 2) & 0x1;
}
u32 XRtds2gpu_IsReady(XRtds2gpu *InstancePtr) {
u32 Data;
Xil_AssertNonvoid(InstancePtr != NULL);
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL);
// check ap_start to see if the pcore is ready for next input
return !(Data & 0x1);
}
void XRtds2gpu_EnableAutoRestart(XRtds2gpu *InstancePtr) {
Xil_AssertVoid(InstancePtr != NULL);
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL, 0x80);
}
void XRtds2gpu_DisableAutoRestart(XRtds2gpu *InstancePtr) {
Xil_AssertVoid(InstancePtr != NULL);
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_AP_CTRL, 0);
}
void XRtds2gpu_Set_baseaddr(XRtds2gpu *InstancePtr, u32 Data) {
Xil_AssertVoid(InstancePtr != NULL);
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_BASEADDR_DATA, Data);
}
u32 XRtds2gpu_Get_baseaddr(XRtds2gpu *InstancePtr) {
u32 Data;
Xil_AssertNonvoid(InstancePtr != NULL);
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_BASEADDR_DATA);
return Data;
}
void XRtds2gpu_Set_data_offset(XRtds2gpu *InstancePtr, u32 Data) {
Xil_AssertVoid(InstancePtr != NULL);
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_DATA_OFFSET_DATA, Data);
}
u32 XRtds2gpu_Get_data_offset(XRtds2gpu *InstancePtr) {
u32 Data;
Xil_AssertNonvoid(InstancePtr != NULL);
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_DATA_OFFSET_DATA);
return Data;
}
void XRtds2gpu_Set_doorbell_offset(XRtds2gpu *InstancePtr, u32 Data) {
Xil_AssertVoid(InstancePtr != NULL);
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_DOORBELL_OFFSET_DATA, Data);
}
u32 XRtds2gpu_Get_doorbell_offset(XRtds2gpu *InstancePtr) {
u32 Data;
Xil_AssertNonvoid(InstancePtr != NULL);
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_DOORBELL_OFFSET_DATA);
return Data;
}
void XRtds2gpu_Set_frame_size(XRtds2gpu *InstancePtr, u32 Data) {
Xil_AssertVoid(InstancePtr != NULL);
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_FRAME_SIZE_DATA, Data);
}
u32 XRtds2gpu_Get_frame_size(XRtds2gpu *InstancePtr) {
u32 Data;
Xil_AssertNonvoid(InstancePtr != NULL);
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_FRAME_SIZE_DATA);
return Data;
}
u32 XRtds2gpu_Get_status(XRtds2gpu *InstancePtr) {
u32 Data;
Xil_AssertNonvoid(InstancePtr != NULL);
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_STATUS_DATA);
return Data;
}
u32 XRtds2gpu_Get_status_vld(XRtds2gpu *InstancePtr) {
u32 Data;
Xil_AssertNonvoid(InstancePtr != NULL);
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
Data = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_STATUS_CTRL);
return Data & 0x1;
}
void XRtds2gpu_InterruptGlobalEnable(XRtds2gpu *InstancePtr) {
Xil_AssertVoid(InstancePtr != NULL);
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_GIE, 1);
}
void XRtds2gpu_InterruptGlobalDisable(XRtds2gpu *InstancePtr) {
Xil_AssertVoid(InstancePtr != NULL);
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_GIE, 0);
}
void XRtds2gpu_InterruptEnable(XRtds2gpu *InstancePtr, u32 Mask) {
u32 Register;
Xil_AssertVoid(InstancePtr != NULL);
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
Register = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_IER);
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_IER, Register | Mask);
}
void XRtds2gpu_InterruptDisable(XRtds2gpu *InstancePtr, u32 Mask) {
u32 Register;
Xil_AssertVoid(InstancePtr != NULL);
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
Register = XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_IER);
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_IER, Register & (~Mask));
}
void XRtds2gpu_InterruptClear(XRtds2gpu *InstancePtr, u32 Mask) {
Xil_AssertVoid(InstancePtr != NULL);
Xil_AssertVoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
XRtds2gpu_WriteReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_ISR, Mask);
}
u32 XRtds2gpu_InterruptGetEnabled(XRtds2gpu *InstancePtr) {
Xil_AssertNonvoid(InstancePtr != NULL);
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
return XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_IER);
}
u32 XRtds2gpu_InterruptGetStatus(XRtds2gpu *InstancePtr) {
Xil_AssertNonvoid(InstancePtr != NULL);
Xil_AssertNonvoid(InstancePtr->IsReady == XIL_COMPONENT_IS_READY);
return XRtds2gpu_ReadReg(InstancePtr->Ctrl_BaseAddress, XRTDS2GPU_CTRL_ADDR_ISR);
}

View file

@ -143,7 +143,7 @@ AxiStreamSwitchFactory::configureJson(IpCore& ip, json_t* json_ip)
auto logger = getLogger();
auto& axiSwitch = reinterpret_cast<AxiStreamSwitch&>(ip);
auto& axiSwitch = dynamic_cast<AxiStreamSwitch&>(ip);
if(json_unpack(json_ip, "{ s: i }", "num_ports", &axiSwitch.num_ports) != 0) {
logger->error("Cannot parse 'num_ports'");

View file

@ -36,7 +36,9 @@ whitelist = [
[ 'xilinx.com', 'ip', 'axi_gpio' ],
[ 'xilinx.com', 'ip', 'axi_bram_ctrl' ],
[ 'xilinx.com', 'ip', 'axis_data_fifo' ],
[ 'xilinx.com', 'ip', 'axi_pcie' ]
[ 'xilinx.com', 'ip', 'axi_pcie' ],
[ 'xilinx.com', 'hls', 'rtds2gpu' ],
[ 'xilinx.com', 'hls', 'mem' ]
]
# List of VLNI ids of AXI4-Stream infrastructure IP cores which do not alter data

View file

@ -27,11 +27,13 @@ set(SOURCES
dma.cpp
fifo.cpp
rtds.cpp
rtds2gpu.cpp
timer.cpp
)
if(CMAKE_CUDA_COMPILER)
list(APPEND SOURCES gpu.cpp)
enable_language(CUDA)
list(APPEND SOURCES gpu.cpp gpu_kernels.cu)
endif()
add_executable(unit-tests ${SOURCES})

View file

@ -46,7 +46,7 @@ Test(fpga, dma, .description = "DMA")
logger->info("Testing {}", *ip);
auto dma = reinterpret_cast<villas::fpga::ip::Dma&>(*ip);
auto dma = dynamic_cast<villas::fpga::ip::Dma&>(*ip);
if(not dma.loopbackPossible()) {
logger->info("Loopback test not possible for {}", *ip);

View file

@ -46,7 +46,7 @@ Test(fpga, fifo, .description = "FIFO")
logger->info("Testing {}", *ip);
auto fifo = reinterpret_cast<villas::fpga::ip::Fifo&>(*ip);
auto fifo = dynamic_cast<villas::fpga::ip::Fifo&>(*ip);
if(not fifo.connectLoopback()) {
continue;

View file

@ -62,7 +62,7 @@ Test(fpga, gpu_dma, .description = "GPU DMA tests")
logger->info("Testing {}", *ip);
auto bram = reinterpret_cast<villas::fpga::ip::Bram*>(ip.get());
auto bram = dynamic_cast<villas::fpga::ip::Bram*>(ip.get());
cr_assert_not_null(bram, "Couldn't find BRAM");
count++;

75
fpga/tests/gpu_kernels.cu Normal file
View file

@ -0,0 +1,75 @@
#include <cstdint>
#include <cstdio>
#include <cuda.h>
#include <cuda_runtime.h>
#include <villas/gpu.hpp>
#include <villas/fpga/ips/rtds2gpu.hpp>
__global__ void
gpu_rtds_rtt_kernel(volatile uint32_t* dataIn, volatile reg_doorbell_t* doorbellIn,
volatile uint32_t* dataOut, volatile villas::fpga::ip::ControlRegister* controlRegister,
int* run)
{
printf("[gpu] gpu kernel go\n");
printf("dataIn: %p\n", dataIn);
printf("doorbellIn: %p\n", doorbellIn);
printf("dataOut: %p\n", dataOut);
printf("control: %p\n", controlRegister);
printf("run: %p\n", run);
// *run = reinterpret_cast<bool*>(malloc(sizeof(bool)));
// **run = true;
uint32_t last_seq;
while(*run) {
// wait for data
// printf("[gpu] wait for data, last_seq=%u\n", last_seq);
while(not (doorbellIn->is_valid and (last_seq != doorbellIn->seq_nr)) and *run);
// printf("doorbell: 0x%08x\n", doorbellIn->value);
last_seq = doorbellIn->seq_nr;
// printf("[gpu] copy data\n");
for(size_t i = 0; i < doorbellIn->count; i++) {
dataOut[i] = dataIn[i];
}
// reset doorbell
// printf("[gpu] reset doorbell\n");
// doorbellIn->value = 0;
// printf("[gpu] signal go for gpu2rtds\n");
controlRegister->ap_start = 1;
}
printf("kernel done\n");
}
static int* run = nullptr;
void gpu_rtds_rtt_start(volatile uint32_t* dataIn, volatile reg_doorbell_t* doorbellIn,
volatile uint32_t* dataOut, volatile villas::fpga::ip::ControlRegister* controlRegister)
{
printf("run: %p\n", run);
if(run == nullptr) {
run = (int*)malloc(sizeof(uint32_t));
cudaHostRegister(run, sizeof(uint32_t), 0);
}
printf("run: %p\n", run);
*run = 1;
gpu_rtds_rtt_kernel<<<1, 1>>>(dataIn, doorbellIn, dataOut, controlRegister, run);
printf("[cpu] kernel launched\n");
}
void gpu_rtds_rtt_stop()
{
*run = 0;
cudaDeviceSynchronize();
}

View file

@ -80,8 +80,12 @@ Test(fpga, rtds, .description = "RTDS")
auto dmaMaster = dma->getMasterPort(dma->mm2sPort);
auto dmaSlave = dma->getSlavePort(dma->s2mmPort);
rtds->connect(rtdsMaster, dmaSlave);
dma->connect(dmaMaster, rtdsSlave);
// rtds->connect(*rtds);
// logger->info("loopback");
// while(1);
// rtds->connect(rtdsMaster, dmaSlave);
// dma->connect(dmaMaster, rtdsSlave);
auto mem = villas::HostRam::getAllocator().allocate<int32_t>(0x100 / sizeof(int32_t));

349
fpga/tests/rtds2gpu.cpp Normal file
View file

@ -0,0 +1,349 @@
/** FIFO unit test.
*
* @file
* @author Steffen Vogel <stvogel@eonerc.rwth-aachen.de>
* @copyright 2017, Steffen Vogel
* @license GNU General Public License (version 3)
*
* VILLASfpga
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*********************************************************************************/
#include <criterion/criterion.h>
#include <iostream>
#include <villas/log.hpp>
#include <villas/memory.hpp>
#include <villas/fpga/card.hpp>
#include <villas/fpga/ips/rtds2gpu.hpp>
#include <villas/fpga/ips/gpu2rtds.hpp>
#include <villas/fpga/ips/switch.hpp>
#include <villas/fpga/ips/dma.hpp>
#include <villas/fpga/ips/rtds.hpp>
#include <villas/gpu.hpp>
#include "global.hpp"
static constexpr size_t SAMPLE_SIZE = 4;
static constexpr size_t SAMPLE_COUNT = 1;
static constexpr size_t FRAME_SIZE = SAMPLE_COUNT * SAMPLE_SIZE;
static constexpr size_t DOORBELL_OFFSET = SAMPLE_COUNT;
static constexpr size_t DATA_OFFSET = 0;
static void dumpMem(const uint32_t* addr, size_t len)
{
const size_t bytesPerLine = 16;
const size_t lines = (len) / bytesPerLine + 1;
const uint8_t* buf = reinterpret_cast<const uint8_t*>(addr);
size_t bytesRead = 0;
for(size_t line = 0; line < lines; line++) {
const unsigned base = line * bytesPerLine;
printf("0x%04x: ", base);
for(size_t i = 0; i < bytesPerLine && bytesRead < len; i++) {
printf("0x%02x ", buf[base + i]);
bytesRead++;
}
puts("");
}
}
Test(fpga, rtds2gpu_loopback_dma, .description = "Rtds2Gpu")
{
auto logger = loggerGetOrCreate("unittest:rtds2gpu");
for(auto& ip : state.cards.front()->ips) {
if(*ip != villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:rtds2gpu:"))
continue;
logger->info("Testing {}", *ip);
/* Collect neccessary IPs */
auto rtds2gpu = dynamic_cast<villas::fpga::ip::Rtds2Gpu&>(*ip);
auto axiSwitch = dynamic_cast<villas::fpga::ip::AxiStreamSwitch*>(
state.cards.front()->lookupIp(villas::fpga::Vlnv("xilinx.com:ip:axis_switch:")));
auto dma = dynamic_cast<villas::fpga::ip::Dma*>(
state.cards.front()->lookupIp(villas::fpga::Vlnv("xilinx.com:ip:axi_dma:")));
auto gpu2rtds = dynamic_cast<villas::fpga::ip::Gpu2Rtds*>(
state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:gpu2rtds:")));
auto rtds = dynamic_cast<villas::fpga::ip::Rtds*>(
state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:user:rtds_axis:")));
cr_assert_not_null(axiSwitch, "No AXI switch IP found");
cr_assert_not_null(dma, "No DMA IP found");
cr_assert_not_null(gpu2rtds, "No Gpu2Rtds IP found");
cr_assert_not_null(rtds, "RTDS IP not found");
rtds2gpu.dump(spdlog::level::debug);
gpu2rtds->dump(spdlog::level::debug);
/* Allocate and prepare memory */
// allocate space for all samples and doorbell register
auto dmaMemSrc = villas::HostDmaRam::getAllocator(0).allocate<uint32_t>(SAMPLE_COUNT + 1);
auto dmaMemDst = villas::HostDmaRam::getAllocator(0).allocate<uint32_t>(SAMPLE_COUNT + 1);
auto dmaMemDst2 = villas::HostDmaRam::getAllocator(0).allocate<uint32_t>(SAMPLE_COUNT + 1);
memset(&dmaMemSrc, 0x11, dmaMemSrc.getMemoryBlock().getSize());
memset(&dmaMemDst, 0x55, dmaMemDst.getMemoryBlock().getSize());
memset(&dmaMemDst2, 0x77, dmaMemDst2.getMemoryBlock().getSize());
const uint32_t* dataSrc = &dmaMemSrc[DATA_OFFSET];
const uint32_t* dataDst = &dmaMemDst[DATA_OFFSET];
const uint32_t* dataDst2 = &dmaMemDst2[0];
dumpMem(dataSrc, dmaMemSrc.getMemoryBlock().getSize());
dumpMem(dataDst, dmaMemDst.getMemoryBlock().getSize());
dumpMem(dataDst2, dmaMemDst2.getMemoryBlock().getSize());
// connect AXI Stream from DMA to Rtds2Gpu IP
cr_assert(dma->connect(rtds2gpu));
cr_assert(rtds2gpu.startOnce(dmaMemDst.getMemoryBlock(), SAMPLE_COUNT, DATA_OFFSET * 4, DOORBELL_OFFSET * 4),
"Preparing Rtds2Gpu IP failed");
cr_assert(dma->write(dmaMemSrc.getMemoryBlock(), FRAME_SIZE),
"Starting DMA MM2S transfer failed");
cr_assert(dma->writeComplete(),
"DMA failed");
while(not rtds2gpu.isFinished());
const uint32_t* doorbellDst = &dmaMemDst[DOORBELL_OFFSET];
rtds2gpu.dump(spdlog::level::info);
rtds2gpu.dumpDoorbell(*doorbellDst);
cr_assert(memcmp(dataSrc, dataDst, FRAME_SIZE) == 0, "Memory not equal");
for(size_t i = 0; i < SAMPLE_COUNT; i++) {
gpu2rtds->registerFrames[i] = dmaMemDst[i];
}
// connect AXI Stream from Gpu2Rtds IP to DMA
cr_assert(gpu2rtds->connect(*dma));
cr_assert(dma->read(dmaMemDst2.getMemoryBlock(), FRAME_SIZE),
"Starting DMA S2MM transfer failed");
cr_assert(gpu2rtds->startOnce(SAMPLE_COUNT),
"Preparing Gpu2Rtds IP failed");
cr_assert(dma->readComplete(),
"DMA failed");
while(not gpu2rtds->isFinished());
cr_assert(memcmp(dataSrc, dataDst2, FRAME_SIZE) == 0, "Memory not equal");
dumpMem(dataSrc, dmaMemSrc.getMemoryBlock().getSize());
dumpMem(dataDst, dmaMemDst.getMemoryBlock().getSize());
dumpMem(dataDst2, dmaMemDst2.getMemoryBlock().getSize());
logger->info(TXT_GREEN("Passed"));
}
}
Test(fpga, rtds2gpu_rtt_cpu, .description = "Rtds2Gpu RTT via CPU")
{
auto logger = loggerGetOrCreate("unittest:rtds2gpu");
/* Collect neccessary IPs */
auto gpu2rtds = dynamic_cast<villas::fpga::ip::Gpu2Rtds*>(
state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:gpu2rtds:")));
auto rtds2gpu = dynamic_cast<villas::fpga::ip::Rtds2Gpu*>(
state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:rtds2gpu:")));
cr_assert_not_null(gpu2rtds, "No Gpu2Rtds IP found");
cr_assert_not_null(rtds2gpu, "No Rtds2Gpu IP not found");
for(auto& ip : state.cards.front()->ips) {
if(*ip != villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:user:rtds_axis:"))
continue;
auto& rtds = dynamic_cast<villas::fpga::ip::Rtds&>(*ip);
logger->info("Testing {}", rtds);
auto dmaRam = villas::HostDmaRam::getAllocator().allocate<uint32_t>(SAMPLE_COUNT + 1);
uint32_t* data = &dmaRam[DATA_OFFSET];
uint32_t* doorbell = &dmaRam[DOORBELL_OFFSET];
// TEST: rtds loopback via switch, this should always work and have RTT=1
//cr_assert(rtds.connect(rtds));
//logger->info("loopback");
//while(1);
cr_assert(rtds.connect(*rtds2gpu));
cr_assert(gpu2rtds->connect(rtds));
for(size_t i = 1; i <= 10000; ) {
rtds2gpu->doorbellReset(*doorbell);
rtds2gpu->startOnce(dmaRam.getMemoryBlock(), SAMPLE_COUNT, DATA_OFFSET * 4, DOORBELL_OFFSET * 4);
// Wait by polling rtds2gpu IP or ...
// while(not rtds2gpu->isFinished());
// Wait by polling (local) doorbell register (= just memory)
while(not rtds2gpu->doorbellIsValid(*doorbell));
// copy samples to gpu2rtds IP
for(size_t i = 0; i < SAMPLE_COUNT; i++) {
gpu2rtds->registerFrames[i] = data[i];
}
// Waiting for gpu2rtds is not strictly required
gpu2rtds->startOnce(SAMPLE_COUNT);
//while(not gpu2rtds->isFinished());
if(i % 1000 == 0) {
logger->info("Successful iterations {}, data {}", i, data[0]);
rtds2gpu->dump();
rtds2gpu->dumpDoorbell(data[1]);
}
}
logger->info(TXT_GREEN("Passed"));
}
}
void gpu_rtds_rtt_start(volatile uint32_t* dataIn, volatile reg_doorbell_t* doorbellIn,
volatile uint32_t* dataOut, volatile villas::fpga::ip::ControlRegister* controlRegister);
void gpu_rtds_rtt_stop();
Test(fpga, rtds2gpu_rtt_gpu, .description = "Rtds2Gpu RTT via GPU")
{
auto logger = loggerGetOrCreate("unittest:rtds2gpu");
/* Collect neccessary IPs */
auto gpu2rtds = dynamic_cast<villas::fpga::ip::Gpu2Rtds*>(
state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:gpu2rtds:")));
auto rtds2gpu = dynamic_cast<villas::fpga::ip::Rtds2Gpu*>(
state.cards.front()->lookupIp(villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:hls:rtds2gpu:")));
cr_assert_not_null(gpu2rtds, "No Gpu2Rtds IP found");
cr_assert_not_null(rtds2gpu, "No Rtds2Gpu IP not found");
villas::Plugin* plugin = villas::Plugin::lookup(villas::Plugin::Type::Gpu, "");
auto gpuPlugin = dynamic_cast<villas::gpu::GpuFactory*>(plugin);
cr_assert_not_null(gpuPlugin, "No GPU plugin found");
auto gpus = gpuPlugin->make();
cr_assert(gpus.size() > 0, "No GPUs found");
// just get first cpu
auto& gpu = gpus.front();
// allocate memory on GPU and make accessible by to PCIe/FPGA
auto gpuRam = gpu->getAllocator().allocate<uint32_t>(SAMPLE_COUNT + 1);
cr_assert(gpu->makeAccessibleToPCIeAndVA(gpuRam.getMemoryBlock()));
// make Gpu2Rtds IP register memory on FPGA accessible to GPU
cr_assert(gpu->makeAccessibleFromPCIeOrHostRam(gpu2rtds->getRegisterMemory()));
auto tr = gpu->translate(gpuRam.getMemoryBlock());
auto dataIn = reinterpret_cast<uint32_t*>(tr.getLocalAddr(DATA_OFFSET * sizeof(uint32_t)));
auto doorbellIn = reinterpret_cast<reg_doorbell_t*>(tr.getLocalAddr(DOORBELL_OFFSET * sizeof(uint32_t)));
auto gpu2rtdsRegisters = gpu->translate(gpu2rtds->getRegisterMemory());
auto frameRegister = reinterpret_cast<uint32_t*>(gpu2rtdsRegisters.getLocalAddr(gpu2rtds->registerFrameOffset));
auto controlRegister = reinterpret_cast<villas::fpga::ip::ControlRegister*>(gpu2rtdsRegisters.getLocalAddr(gpu2rtds->registerControlAddr));
// auto doorbellInCpu = reinterpret_cast<reg_doorbell_t*>(&gpuRam[DOORBELL_OFFSET]);
for(auto& ip : state.cards.front()->ips) {
if(*ip != villas::fpga::Vlnv("acs.eonerc.rwth-aachen.de:user:rtds_axis:"))
continue;
auto& rtds = dynamic_cast<villas::fpga::ip::Rtds&>(*ip);
logger->info("Testing {}", rtds);
// TEST: rtds loopback via switch, this should always work and have RTT=1
//cr_assert(rtds.connect(rtds));
//logger->info("loopback");
//while(1);
cr_assert(rtds.connect(*rtds2gpu));
cr_assert(gpu2rtds->connect(rtds));
// launch once so they are configured
cr_assert(rtds2gpu->startOnce(gpuRam.getMemoryBlock(), SAMPLE_COUNT, DATA_OFFSET * 4, DOORBELL_OFFSET * 4));
cr_assert(gpu2rtds->startOnce(SAMPLE_COUNT));
rtds2gpu->setAutoRestart(true);
rtds2gpu->start();
logger->info("GPU RTT RTDS");
std::string dummy;
// logger->info("Press enter to proceed");
// std::cin >> dummy;
gpu_rtds_rtt_start(dataIn, doorbellIn, frameRegister, controlRegister);
// while(1) {
// cr_assert(rtds2gpu->startOnce(gpuRam.getMemoryBlock(), SAMPLE_COUNT, DATA_OFFSET * 4, DOORBELL_OFFSET * 4));
// }
// for(int i = 0; i < 10000; i++) {
// while(not doorbellInCpu->is_valid);
// logger->debug("received data");
// }
// logger->info("Press enter to cancel");
// std::cin >> dummy;
while(1) {
sleep(1);
// logger->debug("Current sequence number: {}", doorbellInCpu->seq_nr);
logger->debug("Still running");
}
gpu_rtds_rtt_stop();
logger->info(TXT_GREEN("Passed"));
}
}

View file

@ -45,7 +45,7 @@ Test(fpga, timer, .description = "Timer Counter")
count++;
auto timer = reinterpret_cast<villas::fpga::ip::Timer&>(*ip);
auto timer = dynamic_cast<villas::fpga::ip::Timer&>(*ip);
logger->info("Test simple waiting");
timer.start(timer.getFrequency() / 10);