2023-09-08 11:35:18 +02:00
|
|
|
/* GPU managment.
|
2018-06-25 17:03:09 +02:00
|
|
|
*
|
2023-01-07 17:20:15 +01:00
|
|
|
* Author: Daniel Krebs <github@daniel-krebs.net>
|
2023-09-08 11:35:18 +02:00
|
|
|
* SPDX-FileCopyrightText: 2017 Institute for Automation of Complex Power Systems, RWTH Aachen University
|
2023-01-07 17:20:15 +01:00
|
|
|
* SPDX-License-Identifier: Apache-2.0
|
2023-09-08 11:35:18 +02:00
|
|
|
*/
|
2018-06-25 17:03:09 +02:00
|
|
|
|
2018-05-15 17:35:45 +02:00
|
|
|
#include <cstdint>
|
2024-02-29 19:34:27 +01:00
|
|
|
#include <cstdio>
|
2018-05-15 17:35:45 +02:00
|
|
|
#include <sys/mman.h>
|
|
|
|
|
|
|
|
#include <algorithm>
|
2024-02-29 19:34:27 +01:00
|
|
|
#include <memory>
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2018-04-13 15:59:34 +02:00
|
|
|
#include <villas/gpu.hpp>
|
2024-08-14 17:58:14 +02:00
|
|
|
#include <villas/kernel/devices/pci_device.hpp>
|
2024-02-29 19:34:27 +01:00
|
|
|
#include <villas/log.hpp>
|
2018-08-21 13:28:07 +02:00
|
|
|
#include <villas/memory_manager.hpp>
|
2018-05-15 17:35:45 +02:00
|
|
|
|
|
|
|
#include <cuda.h>
|
|
|
|
#include <cuda_runtime.h>
|
|
|
|
#include <gdrapi.h>
|
|
|
|
|
|
|
|
#include "kernels.hpp"
|
2018-04-13 15:59:34 +02:00
|
|
|
|
2020-06-14 22:11:15 +02:00
|
|
|
using namespace villas::gpu;
|
2018-04-13 15:59:34 +02:00
|
|
|
|
2018-05-15 17:35:45 +02:00
|
|
|
static GpuFactory gpuFactory;
|
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
GpuAllocator::GpuAllocator(Gpu &gpu)
|
|
|
|
: BaseAllocator(gpu.masterPciEAddrSpaceId), gpu(gpu) {
|
|
|
|
free = [&](MemoryBlock *mem) {
|
|
|
|
cudaSetDevice(gpu.gpuId);
|
|
|
|
if (cudaFree(reinterpret_cast<void *>(mem->getOffset())) != cudaSuccess) {
|
|
|
|
logger->warn("cudaFree() failed for {:#x} of size {:#x}",
|
|
|
|
mem->getOffset(), mem->getSize());
|
|
|
|
}
|
|
|
|
|
|
|
|
removeMemoryBlock(*mem);
|
|
|
|
};
|
2018-05-15 17:35:45 +02:00
|
|
|
}
|
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
std::string villas::gpu::GpuAllocator::getName() const {
|
|
|
|
std::stringstream name;
|
|
|
|
name << "GpuAlloc" << getAddrSpaceId();
|
|
|
|
return name.str();
|
2018-05-15 17:35:45 +02:00
|
|
|
}
|
|
|
|
|
2024-02-29 20:35:00 +01:00
|
|
|
GpuFactory::GpuFactory()
|
2024-07-29 12:35:42 +02:00
|
|
|
: logger(villas::Log::get("gpu:factory")),
|
2024-02-29 20:35:00 +01:00
|
|
|
Plugin("cuda", "CUDA capable GPUs") {}
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2022-08-30 12:01:47 -04:00
|
|
|
// Required to be defined here for PIMPL to compile
|
2024-02-29 19:34:27 +01:00
|
|
|
Gpu::~Gpu() {
|
|
|
|
auto &mm = MemoryManager::get();
|
|
|
|
mm.removeAddressSpace(masterPciEAddrSpaceId);
|
2018-05-15 17:35:45 +02:00
|
|
|
}
|
|
|
|
|
2022-08-30 12:01:47 -04:00
|
|
|
// We use PIMPL in order to hide gdrcopy types from the public header
|
2018-05-15 17:35:45 +02:00
|
|
|
class Gpu::impl {
|
|
|
|
public:
|
2024-02-29 19:34:27 +01:00
|
|
|
gdr_t gdr;
|
|
|
|
struct pci_device pdev;
|
2018-05-15 17:35:45 +02:00
|
|
|
};
|
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
std::string Gpu::getName() const {
|
|
|
|
cudaDeviceProp deviceProp;
|
|
|
|
if (cudaGetDeviceProperties(&deviceProp, gpuId) != cudaSuccess) {
|
|
|
|
// Logger not yet availabe
|
2024-07-29 12:35:42 +02:00
|
|
|
villas::Log::get("gpu")->error("Cannot retrieve properties for GPU {}",
|
|
|
|
gpuId);
|
2024-02-29 19:34:27 +01:00
|
|
|
throw std::exception();
|
|
|
|
}
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
std::stringstream name;
|
|
|
|
name << "gpu" << gpuId << "(" << deviceProp.name << ")";
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
return name.str();
|
2018-05-15 17:35:45 +02:00
|
|
|
}
|
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
bool Gpu::registerIoMemory(const MemoryBlock &mem) {
|
|
|
|
auto &mm = MemoryManager::get();
|
|
|
|
const auto pciAddrSpaceId = mm.getPciAddressSpace();
|
|
|
|
|
|
|
|
// Check if we need to map anything at all, maybe it's already reachable
|
|
|
|
try {
|
|
|
|
// TODO: there might already be a path through the graph, but there's no
|
|
|
|
// overlapping window, so this will fail badly!
|
|
|
|
auto translation =
|
|
|
|
mm.getTranslation(masterPciEAddrSpaceId, mem.getAddrSpaceId());
|
|
|
|
if (translation.getSize() >= mem.getSize())
|
|
|
|
// There is already a sufficient path
|
|
|
|
logger->debug("Already mapped through another mapping");
|
|
|
|
return true;
|
|
|
|
else logger->warn("There's already a mapping, but too small");
|
|
|
|
} catch (const std::out_of_range &) {
|
|
|
|
// Not yet reachable, that's okay, proceed
|
|
|
|
}
|
|
|
|
|
|
|
|
// In order to register IO memory with CUDA, it has to be mapped to the VA
|
|
|
|
// space of the current process (requirement of CUDA API). Check this now.
|
|
|
|
MemoryManager::AddressSpaceId mappedBaseAddrSpaceId;
|
|
|
|
try {
|
|
|
|
auto path = mm.findPath(mm.getProcessAddressSpace(), mem.getAddrSpaceId());
|
|
|
|
// First node in path is the mapped memory space whose virtual address
|
|
|
|
// we need to hand to CUDA
|
|
|
|
mappedBaseAddrSpaceId = path.front();
|
|
|
|
} catch (const std::out_of_range &) {
|
|
|
|
logger->error("Memory not reachable from process, but required by CUDA");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Determine the base address of the mapped memory region needed by CUDA
|
|
|
|
const auto translationProcess =
|
|
|
|
mm.getTranslationFromProcess(mappedBaseAddrSpaceId);
|
|
|
|
const uintptr_t baseAddrForProcess = translationProcess.getLocalAddr(0);
|
|
|
|
|
|
|
|
// Now check that the memory is also reachable via PCIe bus, otherwise GPU
|
|
|
|
// has no means to access it.
|
|
|
|
uintptr_t baseAddrOnPci;
|
|
|
|
size_t sizeOnPci;
|
|
|
|
try {
|
|
|
|
auto translationPci =
|
|
|
|
mm.getTranslation(pciAddrSpaceId, mappedBaseAddrSpaceId);
|
|
|
|
baseAddrOnPci = translationPci.getLocalAddr(0);
|
|
|
|
sizeOnPci = translationPci.getSize();
|
|
|
|
} catch (const std::out_of_range &) {
|
|
|
|
logger->error("Memory is not reachable via PCIe bus");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sizeOnPci < mem.getSize()) {
|
|
|
|
logger->warn(
|
|
|
|
"VA mapping of IO memory is too small: {:#x} instead of {:#x} bytes",
|
|
|
|
sizeOnPci, mem.getSize());
|
|
|
|
logger->warn("If something later on fails or behaves strangely, this might "
|
|
|
|
"be the cause!");
|
|
|
|
}
|
|
|
|
|
|
|
|
cudaSetDevice(gpuId);
|
|
|
|
|
|
|
|
auto baseAddrVA = reinterpret_cast<void *>(baseAddrForProcess);
|
|
|
|
if (cudaHostRegister(baseAddrVA, sizeOnPci, cudaHostRegisterIoMemory) !=
|
|
|
|
cudaSuccess) {
|
|
|
|
logger->error("Cannot register IO memory for block {}",
|
|
|
|
mem.getAddrSpaceId());
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void *devicePointer = nullptr;
|
|
|
|
if (cudaHostGetDevicePointer(&devicePointer, baseAddrVA, 0) != cudaSuccess) {
|
|
|
|
logger->error("Cannot retrieve device pointer for IO memory");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
mm.createMapping(reinterpret_cast<uintptr_t>(devicePointer), baseAddrOnPci,
|
|
|
|
sizeOnPci, "CudaIoMem", masterPciEAddrSpaceId,
|
|
|
|
pciAddrSpaceId);
|
|
|
|
|
|
|
|
return true;
|
2018-05-15 17:35:45 +02:00
|
|
|
}
|
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
bool Gpu::registerHostMemory(const MemoryBlock &mem) {
|
|
|
|
auto &mm = MemoryManager::get();
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
auto translation = mm.getTranslationFromProcess(mem.getAddrSpaceId());
|
|
|
|
auto localBase = reinterpret_cast<void *>(translation.getLocalAddr(0));
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
int ret = cudaHostRegister(localBase, mem.getSize(), 0);
|
|
|
|
if (ret != cudaSuccess) {
|
|
|
|
logger->error(
|
|
|
|
"Cannot register memory block {} addr={:p} size={:#x} to CUDA: ret={}",
|
|
|
|
mem.getAddrSpaceId(), localBase, mem.getSize(), ret);
|
|
|
|
return false;
|
|
|
|
}
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
void *devicePointer = nullptr;
|
|
|
|
ret = cudaHostGetDevicePointer(&devicePointer, localBase, 0);
|
|
|
|
if (ret != cudaSuccess) {
|
|
|
|
logger->error("Cannot retrieve device pointer for IO memory: ret={}", ret);
|
|
|
|
return false;
|
|
|
|
}
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
mm.createMapping(reinterpret_cast<uintptr_t>(devicePointer), 0, mem.getSize(),
|
|
|
|
"CudaHostMem", masterPciEAddrSpaceId, mem.getAddrSpaceId());
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
return true;
|
2018-05-15 17:35:45 +02:00
|
|
|
}
|
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
bool Gpu::makeAccessibleToPCIeAndVA(const MemoryBlock &mem) {
|
|
|
|
if (pImpl->gdr == nullptr) {
|
|
|
|
logger->warn("GDRcopy not available");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto &mm = MemoryManager::get();
|
|
|
|
|
|
|
|
try {
|
|
|
|
auto path = mm.findPath(masterPciEAddrSpaceId, mem.getAddrSpaceId());
|
|
|
|
// If first hop is the PCIe bus, we know that memory is off-GPU
|
|
|
|
if (path.front() == mm.getPciAddressSpace())
|
|
|
|
throw std::out_of_range("Memory block is outside of this GPU");
|
|
|
|
|
|
|
|
} catch (const std::out_of_range &) {
|
|
|
|
logger->error("Trying to map non-GPU memory block");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
logger->debug("retrieve complete device pointer from point of view of GPU");
|
|
|
|
|
|
|
|
// Retrieve complete device pointer from point of view of GPU
|
|
|
|
auto translation =
|
|
|
|
mm.getTranslation(masterPciEAddrSpaceId, mem.getAddrSpaceId());
|
|
|
|
CUdeviceptr devptr = translation.getLocalAddr(0);
|
|
|
|
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
// Required to set this flag before mapping
|
|
|
|
unsigned int enable = 1;
|
|
|
|
ret =
|
|
|
|
cuPointerSetAttribute(&enable, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, devptr);
|
|
|
|
if (ret != CUDA_SUCCESS) {
|
|
|
|
logger->error("Cannot set pointer attributes on memory block {}: {}",
|
|
|
|
mem.getAddrSpaceId(), ret);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
gdr_mh_t mh;
|
|
|
|
ret = gdr_pin_buffer(pImpl->gdr, devptr, mem.getSize(), 0, 0, &mh);
|
|
|
|
if (ret != 0) {
|
|
|
|
logger->error("Cannot pin memory block {} via gdrcopy: {}",
|
|
|
|
mem.getAddrSpaceId(), ret);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void *bar = nullptr;
|
|
|
|
ret = gdr_map(pImpl->gdr, mh, &bar, mem.getSize());
|
|
|
|
if (ret != 0) {
|
|
|
|
logger->error("Cannot map memory block {} via gdrcopy: {}",
|
|
|
|
mem.getAddrSpaceId(), ret);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
gdr_info_t info;
|
|
|
|
ret = gdr_get_info(pImpl->gdr, mh, &info);
|
|
|
|
if (ret != 0) {
|
|
|
|
logger->error("Cannot get info for mapping of memory block {}: {}",
|
|
|
|
mem.getAddrSpaceId(), ret);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
const uintptr_t offset = info.va - devptr;
|
|
|
|
const uintptr_t userPtr = reinterpret_cast<uintptr_t>(bar) + offset;
|
|
|
|
|
|
|
|
logger->debug("BAR ptr: {:p}", bar);
|
|
|
|
logger->debug("info.va: {:#x}", info.va);
|
|
|
|
logger->debug("info.mapped_size: {:#x}", info.mapped_size);
|
|
|
|
logger->debug("info.page_size: {:#x}", info.page_size);
|
|
|
|
logger->debug("offset: {:#x}", offset);
|
|
|
|
logger->debug("user pointer: {:#x}", userPtr);
|
|
|
|
|
|
|
|
// Mapping to acceses memory block from process
|
|
|
|
mm.createMapping(userPtr, 0, info.mapped_size, "GDRcopy",
|
|
|
|
mm.getProcessAddressSpace(), mem.getAddrSpaceId());
|
|
|
|
|
|
|
|
// Retrieve bus address
|
|
|
|
uint64_t addr[8];
|
|
|
|
ret = gdr_map_dma(pImpl->gdr, mh, 3, 0, 0, addr, 8);
|
|
|
|
|
|
|
|
for (int i = 0; i < ret; i++)
|
|
|
|
logger->debug("DMA addr[{}]: {:#x}", i, addr[i]);
|
|
|
|
|
|
|
|
if (ret != 1) {
|
|
|
|
logger->error("Only one DMA address per block supported at the moment");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Mapping to access memory block from peer devices via PCIe
|
|
|
|
mm.createMapping(addr[0], 0, mem.getSize(), "GDRcopyDMA",
|
|
|
|
mm.getPciAddressSpace(), mem.getAddrSpaceId());
|
|
|
|
|
|
|
|
return true;
|
2018-05-15 17:35:45 +02:00
|
|
|
}
|
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
bool Gpu::makeAccessibleFromPCIeOrHostRam(const MemoryBlock &mem) {
|
|
|
|
// Check which kind of memory this is and where it resides
|
|
|
|
// There are two possibilities:
|
|
|
|
// - Host memory not managed by CUDA
|
|
|
|
// - IO memory somewhere on the PCIe bus
|
|
|
|
|
|
|
|
auto &mm = MemoryManager::get();
|
|
|
|
|
|
|
|
bool isIoMemory = false;
|
|
|
|
try {
|
|
|
|
auto path = mm.findPath(mm.getPciAddressSpace(), mem.getAddrSpaceId());
|
|
|
|
isIoMemory = true;
|
|
|
|
} catch (const std::out_of_range &) {
|
|
|
|
// Not reachable via PCI -> not IO memory
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isIoMemory) {
|
|
|
|
logger->debug("Memory block {} is assumed to be IO memory",
|
|
|
|
mem.getAddrSpaceId());
|
|
|
|
|
|
|
|
return registerIoMemory(mem);
|
|
|
|
} else {
|
|
|
|
logger->debug("Memory block {} is assumed to be non-CUDA host memory",
|
|
|
|
mem.getAddrSpaceId());
|
|
|
|
|
|
|
|
return registerHostMemory(mem);
|
|
|
|
}
|
2018-05-15 17:35:45 +02:00
|
|
|
}
|
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
void Gpu::memcpySync(const MemoryBlock &src, const MemoryBlock &dst,
|
|
|
|
size_t size) {
|
|
|
|
auto &mm = MemoryManager::get();
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
auto src_translation =
|
|
|
|
mm.getTranslation(masterPciEAddrSpaceId, src.getAddrSpaceId());
|
|
|
|
const void *src_buf =
|
|
|
|
reinterpret_cast<void *>(src_translation.getLocalAddr(0));
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
auto dst_translation =
|
|
|
|
mm.getTranslation(masterPciEAddrSpaceId, dst.getAddrSpaceId());
|
|
|
|
void *dst_buf = reinterpret_cast<void *>(dst_translation.getLocalAddr(0));
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
cudaSetDevice(gpuId);
|
|
|
|
cudaMemcpy(dst_buf, src_buf, size, cudaMemcpyDefault);
|
2018-05-15 17:35:45 +02:00
|
|
|
}
|
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
void Gpu::memcpyKernel(const MemoryBlock &src, const MemoryBlock &dst,
|
|
|
|
size_t size) {
|
|
|
|
auto &mm = MemoryManager::get();
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
auto src_translation =
|
|
|
|
mm.getTranslation(masterPciEAddrSpaceId, src.getAddrSpaceId());
|
|
|
|
auto src_buf = reinterpret_cast<uint8_t *>(src_translation.getLocalAddr(0));
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
auto dst_translation =
|
|
|
|
mm.getTranslation(masterPciEAddrSpaceId, dst.getAddrSpaceId());
|
|
|
|
auto dst_buf = reinterpret_cast<uint8_t *>(dst_translation.getLocalAddr(0));
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
cudaSetDevice(gpuId);
|
|
|
|
kernel_memcpy<<<1, 1>>>(dst_buf, src_buf, size);
|
|
|
|
cudaDeviceSynchronize();
|
2018-05-15 17:35:45 +02:00
|
|
|
}
|
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
MemoryTranslation Gpu::translate(const MemoryBlock &dst) {
|
|
|
|
auto &mm = MemoryManager::get();
|
|
|
|
return mm.getTranslation(masterPciEAddrSpaceId, dst.getAddrSpaceId());
|
2018-06-06 09:55:14 +02:00
|
|
|
}
|
|
|
|
|
2018-05-15 17:35:45 +02:00
|
|
|
std::unique_ptr<villas::MemoryBlock, villas::MemoryBlock::deallocator_fn>
|
2024-02-29 19:34:27 +01:00
|
|
|
GpuAllocator::allocateBlock(size_t size) {
|
|
|
|
cudaSetDevice(gpu.gpuId);
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
void *addr;
|
|
|
|
auto &mm = MemoryManager::get();
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
// Search for an existing chunk that has enough free memory
|
|
|
|
auto chunk =
|
|
|
|
std::find_if(chunks.begin(), chunks.end(), [&](const auto &chunk) {
|
|
|
|
return chunk->getAvailableMemory() >= size;
|
|
|
|
});
|
2018-07-20 16:46:55 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
if (chunk != chunks.end()) {
|
|
|
|
logger->debug("Found existing chunk that can host the requested block");
|
2018-07-20 16:46:55 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
return (*chunk)->allocateBlock(size);
|
|
|
|
} else {
|
|
|
|
// Allocate a new chunk
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
// Rounded-up multiple of GPU page size
|
|
|
|
const size_t chunkSize = size - (size & (GpuPageSize - 1)) + GpuPageSize;
|
|
|
|
logger->debug("Allocate new chunk of {:#x} bytes", chunkSize);
|
2018-07-20 16:46:55 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
if (cudaSuccess != cudaMalloc(&addr, chunkSize)) {
|
|
|
|
logger->error("cudaMalloc(..., size={}) failed", chunkSize);
|
|
|
|
throw std::bad_alloc();
|
|
|
|
}
|
2018-07-20 16:46:55 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
// Assemble name for this block
|
|
|
|
std::stringstream name;
|
|
|
|
name << std::showbase << std::hex << reinterpret_cast<uintptr_t>(addr);
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
auto blockName = mm.getSlaveAddrSpaceName(getName(), name.str());
|
|
|
|
auto blockAddrSpaceId = mm.getOrCreateAddressSpace(blockName);
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
const auto localAddr = reinterpret_cast<uintptr_t>(addr);
|
|
|
|
std::unique_ptr<MemoryBlock, MemoryBlock::deallocator_fn> mem(
|
|
|
|
new MemoryBlock(localAddr, chunkSize, blockAddrSpaceId), this->free);
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
insertMemoryBlock(*mem);
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
// Already make accessible to CPU
|
|
|
|
gpu.makeAccessibleToPCIeAndVA(*mem);
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
// Create a new allocator to manage the chunk and push to chunk list
|
|
|
|
chunks.push_front(std::make_unique<LinearAllocator>(std::move(mem)));
|
2018-07-20 16:46:55 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
// Call again, this time there's a large enough chunk
|
|
|
|
return allocateBlock(size);
|
|
|
|
}
|
2018-05-15 17:35:45 +02:00
|
|
|
}
|
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
Gpu::Gpu(int gpuId) : pImpl{std::make_unique<impl>()}, gpuId(gpuId) {
|
2024-07-29 12:35:42 +02:00
|
|
|
logger = villas::Log::get(getName());
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
pImpl->gdr = gdr_open();
|
|
|
|
if (pImpl->gdr == nullptr) {
|
|
|
|
logger->warn("No GDRcopy support enabled, cannot open /dev/gdrdrv");
|
|
|
|
}
|
2018-05-15 17:35:45 +02:00
|
|
|
}
|
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
bool Gpu::init() {
|
|
|
|
auto &mm = MemoryManager::get();
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
const auto gpuPciEAddrSpaceName =
|
|
|
|
mm.getMasterAddrSpaceName(getName(), "pcie");
|
|
|
|
masterPciEAddrSpaceId = mm.getOrCreateAddressSpace(gpuPciEAddrSpaceName);
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
allocator = std::make_unique<GpuAllocator>(*this);
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
cudaDeviceProp deviceProp;
|
|
|
|
cudaGetDeviceProperties(&deviceProp, gpuId);
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
pImpl->pdev.slot = {deviceProp.pciDomainID, deviceProp.pciBusID,
|
|
|
|
deviceProp.pciDeviceID, 0};
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
struct pci_region *pci_regions = nullptr;
|
|
|
|
const size_t pci_num_regions = pci_get_regions(&pImpl->pdev, &pci_regions);
|
|
|
|
for (size_t i = 0; i < pci_num_regions; i++) {
|
|
|
|
const size_t region_size = pci_regions[i].end - pci_regions[i].start + 1;
|
|
|
|
logger->info("BAR{}: bus addr={:#x} size={:#x}", pci_regions[i].num,
|
|
|
|
pci_regions[i].start, region_size);
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
char name[] = "BARx";
|
|
|
|
name[3] = '0' + pci_regions[i].num;
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
auto gpuBarXAddrSpaceName = mm.getSlaveAddrSpaceName(getName(), name);
|
|
|
|
auto gpuBarXAddrSpaceId = mm.getOrCreateAddressSpace(gpuBarXAddrSpaceName);
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
mm.createMapping(pci_regions[i].start, 0, region_size,
|
|
|
|
std::string("PCI-") + name, mm.getPciAddressSpace(),
|
|
|
|
gpuBarXAddrSpaceId);
|
|
|
|
}
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
free(pci_regions);
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
return true;
|
2018-05-15 17:35:45 +02:00
|
|
|
}
|
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
std::list<std::unique_ptr<Gpu>> GpuFactory::make() {
|
|
|
|
int deviceCount = 0;
|
|
|
|
cudaGetDeviceCount(&deviceCount);
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
std::list<std::unique_ptr<Gpu>> gpuList;
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
for (int gpuId = 0; gpuId < deviceCount; gpuId++) {
|
|
|
|
if (cudaSetDevice(gpuId) != cudaSuccess) {
|
|
|
|
logger->warn("Cannot activate GPU {}", gpuId);
|
|
|
|
continue;
|
|
|
|
}
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
auto gpu = std::make_unique<Gpu>(gpuId);
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
if (not gpu->init()) {
|
|
|
|
logger->warn("Cannot initialize GPU {}", gpuId);
|
|
|
|
continue;
|
|
|
|
}
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
gpuList.emplace_back(std::move(gpu));
|
|
|
|
}
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
logger->info("Initialized {} GPUs", gpuList.size());
|
|
|
|
for (auto &gpu : gpuList) {
|
|
|
|
logger->debug(" - {}", gpu->getName());
|
|
|
|
}
|
2018-05-15 17:35:45 +02:00
|
|
|
|
2024-02-29 19:34:27 +01:00
|
|
|
return gpuList;
|
2018-05-15 17:35:45 +02:00
|
|
|
}
|