/** GPU unit tests. * * @author Steffen Vogel * @copyright 2017-2018, Steffen Vogel * @license GNU General Public License (version 3) * * VILLASfpga * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . *********************************************************************************/ #include #include #include #include #include #include #include #include #include "global.hpp" #include #include using namespace villas; // cppcheck-suppress unknownMacro Test(fpga, gpu_dma, .description = "GPU DMA tests") { auto logger = logging.get("unit-test:dma"); auto &card = state.cards.front(); auto gpuPlugin = Plugin::Registry("cuda"); cr_assert_not_null(gpuPlugin, "No GPU plugin found"); auto gpus = gpuPlugin->make(); cr_assert(gpus.size() > 0, "No GPUs found"); // just get first cpu auto &gpu = gpus.front(); size_t count = 0; for (auto &ip : card->ips) { // skip non-dma IPs if (*ip != fpga::Vlnv("xilinx.com:ip:axi_bram_ctrl:")) continue; logger->info("Testing {}", *ip); auto bram = dynamic_cast(ip.get()); cr_assert_not_null(bram, "Couldn't find BRAM"); count++; size_t len = 4 * (1 << 10); /* Allocate memory to use with DMA */ auto bram0 = bram->getAllocator().allocate(len); auto bram1 = bram->getAllocator().allocate(len); gpu->makeAccessibleFromPCIeOrHostRam(bram0.getMemoryBlock()); gpu->makeAccessibleFromPCIeOrHostRam(bram1.getMemoryBlock()); auto hostRam0 = HostRam::getAllocator().allocate(len); auto hostRam1 = HostRam::getAllocator().allocate(len); gpu->makeAccessibleFromPCIeOrHostRam(hostRam0.getMemoryBlock()); gpu->makeAccessibleFromPCIeOrHostRam(hostRam1.getMemoryBlock()); auto dmaRam0 = HostDmaRam::getAllocator().allocate(len); auto dmaRam1 = HostDmaRam::getAllocator().allocate(len); gpu->makeAccessibleFromPCIeOrHostRam(dmaRam0.getMemoryBlock()); gpu->makeAccessibleFromPCIeOrHostRam(dmaRam1.getMemoryBlock()); auto gpuMem0 = gpu->getAllocator().allocate(64 << 10); auto gpuMem1 = gpu->getAllocator().allocate(64 << 10); gpu->makeAccessibleToPCIeAndVA(gpuMem0.getMemoryBlock()); gpu->makeAccessibleToPCIeAndVA(gpuMem1.getMemoryBlock()); // auto &src = bram0; // auto &dst = bram1; // auto &src = hostRam0; // auto &dst = hostRam1; auto &src = dmaRam0; // auto &dst = dmaRam1; // auto &src = gpuMem0; auto &dst = gpuMem1; std::list>> memcpyFuncs = { {"cudaMemcpy", [&]() {gpu->memcpySync(src.getMemoryBlock(), dst.getMemoryBlock(), len);}}, {"CUDA kernel", [&]() {gpu->memcpyKernel(src.getMemoryBlock(), dst.getMemoryBlock(), len);}}, }; auto dmaIp = card->lookupIp(fpga::Vlnv("xilinx.com:ip:axi_dma:")); auto dma = std::dynamic_pointer_cast(dmaIp); if (dma != nullptr and dma->connectLoopback()) { memcpyFuncs.push_back({ "DMA memcpy", [&]() { if (not dma->makeAccesibleFromVA(src.getMemoryBlock()) or not dma->makeAccesibleFromVA(dst.getMemoryBlock())) { return; } dma->memcpy(src.getMemoryBlock(), dst.getMemoryBlock(), len); }}); } for (auto& [name, memcpyFunc] : memcpyFuncs) { logger->info("Testing {}", name); /* Get new random data */ const size_t lenRandom = utils::read_random(&src, len); cr_assert(len == lenRandom, "Failed to get random data"); memcpyFunc(); const bool success = memcmp(&src, &dst, len) == 0; logger->info(" {}", success ? CLR_GRN("Passed") : CLR_RED("Failed")); } MemoryManager::getGraph().dump(); } cr_assert(count > 0, "No BRAM found"); }