1
0
Fork 0
mirror of https://git.rwth-aachen.de/acs/public/villas/node/ synced 2025-03-09 00:00:00 +01:00
VILLASnode/fpga/tests/unit/gpu_kernels.cu
Alexandra b39e4a0ace feat: new smu node-type
Signed-off-by: Alexandra <alexandra.bach@eonerc.rwth-aachen.de>
2025-01-14 14:42:39 +00:00

79 lines
2.2 KiB
Text

/** GPU CUDA kernel
*
* Author: Daniel Krebs <github@daniel-krebs.net>
* SPDX-FileCopyrightText: 2017 Daniel Krebs <github@daniel-krebs.net>
* SPDX-License-Identifier: Apache-2.0
*********************************************************************************/
#include <cstdint>
#include <cstdio>
#include <cuda.h>
#include <cuda_runtime.h>
#include <villas/gpu.hpp>
#include <villas/fpga/ips/rtds2gpu.hpp>
__global__ void
gpu_rtds_rtt_kernel(volatile uint32_t* dataIn, volatile reg_doorbell_t* doorbellIn,
volatile uint32_t* dataOut, volatile villas::fpga::ip::ControlRegister* controlRegister,
int* run)
{
printf("[gpu] gpu kernel go\n");
printf("dataIn: %p\n", dataIn);
printf("doorbellIn: %p\n", doorbellIn);
printf("dataOut: %p\n", dataOut);
printf("control: %p\n", controlRegister);
printf("run: %p\n", run);
// *run = reinterpret_cast<bool*>(malloc(sizeof(bool)));
// **run = true;
uint32_t last_seq;
while (*run) {
// wait for data
// printf("[gpu] wait for data, last_seq=%u\n", last_seq);
while (not (doorbellIn->is_valid and (last_seq != doorbellIn->seq_nr)) and *run);
// printf("doorbell: 0x%08x\n", doorbellIn->value);
last_seq = doorbellIn->seq_nr;
// printf("[gpu] copy data\n");
for (size_t i = 0; i < doorbellIn->count; i++) {
dataOut[i] = dataIn[i];
}
// reset doorbell
// printf("[gpu] reset doorbell\n");
// doorbellIn->value = 0;
// printf("[gpu] signal go for gpu2rtds\n");
controlRegister->ap_start = 1;
}
printf("kernel done\n");
}
static int* run = nullptr;
void gpu_rtds_rtt_start(volatile uint32_t* dataIn, volatile reg_doorbell_t* doorbellIn,
volatile uint32_t* dataOut, volatile villas::fpga::ip::ControlRegister* controlRegister)
{
printf("run: %p\n", run);
if (run == nullptr) {
run = (int*)malloc(sizeof(uint32_t));
cudaHostRegister(run, sizeof(uint32_t), 0);
}
printf("run: %p\n", run);
*run = 1;
gpu_rtds_rtt_kernel<<<1, 1>>>(dataIn, doorbellIn, dataOut, controlRegister, run);
printf("[cpu] kernel launched\n");
}
void gpu_rtds_rtt_stop()
{
*run = 0;
cudaDeviceSynchronize();
}