mirror of
https://github.com/hermitcore/libhermit.git
synced 2025-03-30 00:00:15 +01:00
support for ODP using RDMA (#92)
-- optionally enable ODP (if supported) -- dynamically choose first active device port -- add memory allocator test
This commit is contained in:
parent
b5d5c0a3bb
commit
77536f5280
3 changed files with 188 additions and 58 deletions
|
@ -27,6 +27,7 @@
|
||||||
|
|
||||||
#define _GNU_SOURCE
|
#define _GNU_SOURCE
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <arpa/inet.h>
|
#include <arpa/inet.h>
|
||||||
#include <infiniband/verbs.h>
|
#include <infiniband/verbs.h>
|
||||||
|
@ -42,12 +43,15 @@
|
||||||
|
|
||||||
|
|
||||||
#ifdef __RDMA_MIGRATION__
|
#ifdef __RDMA_MIGRATION__
|
||||||
|
#define IB_USE_ODP (0)
|
||||||
|
|
||||||
#define IB_USED_PORT (1)
|
|
||||||
#define IB_CQ_ENTRIES (1)
|
#define IB_CQ_ENTRIES (1)
|
||||||
#define IB_MAX_INLINE_DATA (0)
|
#define IB_MAX_INLINE_DATA (0)
|
||||||
#define IB_MAX_DEST_RD_ATOMIC (1)
|
#define IB_MAX_DEST_RD_ATOMIC (1)
|
||||||
#define IB_MIN_RNR_TIMER (1)
|
#define IB_MIN_RNR_TIMER (1)
|
||||||
|
#define IB_MAX_SEND_WR (8192) // TODO: should be
|
||||||
|
// com_hndl.dev_attr_ex.orig_attr.max_qp_wr
|
||||||
|
// fix for mlx_5 adapter
|
||||||
#define IB_MAX_RECV_WR (1)
|
#define IB_MAX_RECV_WR (1)
|
||||||
#define IB_MAX_SEND_SGE (1)
|
#define IB_MAX_SEND_SGE (1)
|
||||||
#define IB_MAX_RECV_SGE (1)
|
#define IB_MAX_RECV_SGE (1)
|
||||||
|
@ -70,18 +74,19 @@ typedef struct qp_info {
|
||||||
} qp_info_t;
|
} qp_info_t;
|
||||||
|
|
||||||
typedef struct com_hndl {
|
typedef struct com_hndl {
|
||||||
struct ibv_context *ctx; /* device context */
|
struct ibv_context *ctx; /* device context */
|
||||||
struct ibv_device_attr dev_attr; /* device attributes */
|
struct ibv_device_attr_ex dev_attr_ex; /* extended device attributes */
|
||||||
struct ibv_port_attr port_attr; /* port attributes */
|
struct ibv_port_attr port_attr; /* port attributes */
|
||||||
struct ibv_pd *pd; /* protection domain */
|
struct ibv_pd *pd; /* protection domain */
|
||||||
struct ibv_mr **mrs; /* memory regions */
|
struct ibv_mr **mrs; /* memory regions */
|
||||||
struct ibv_cq *cq; /* completion queue */
|
struct ibv_cq *cq; /* completion queue */
|
||||||
struct ibv_qp *qp; /* queue pair */
|
struct ibv_qp *qp; /* queue pair */
|
||||||
struct ibv_comp_channel *comp_chan; /* completion event channel */
|
struct ibv_comp_channel *comp_chan; /* comp. event channel */
|
||||||
qp_info_t loc_qp_info;
|
qp_info_t loc_qp_info;
|
||||||
qp_info_t rem_qp_info;
|
qp_info_t rem_qp_info;
|
||||||
uint8_t *buf; /* the guest memory (with potential gaps!) */
|
uint8_t used_port; /* port of the IB device */
|
||||||
size_t mr_cnt; /* number of memory regions */
|
uint8_t *buf; /* the guest memory (with potential gaps!) */
|
||||||
|
size_t mr_cnt; /* number of memory regions */
|
||||||
} com_hndl_t;
|
} com_hndl_t;
|
||||||
|
|
||||||
|
|
||||||
|
@ -134,13 +139,16 @@ void print_send_wr_info(uint64_t id)
|
||||||
static void
|
static void
|
||||||
init_com_hndl(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
|
init_com_hndl(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
|
||||||
{
|
{
|
||||||
|
/* initialize com_hndl */
|
||||||
|
memset(&com_hndl, 0, sizeof(com_hndl));
|
||||||
|
|
||||||
/* the guest physical memory is the communication buffer */
|
/* the guest physical memory is the communication buffer */
|
||||||
com_hndl.buf = guest_mem;
|
com_hndl.buf = guest_mem;
|
||||||
com_hndl.mr_cnt = mem_chunk_cnt;
|
com_hndl.mr_cnt = mem_chunk_cnt;
|
||||||
|
|
||||||
struct ibv_device **device_list;
|
struct ibv_device **device_list = NULL;
|
||||||
struct ibv_device *ib_pp_device;
|
int num_devices = 0;
|
||||||
int num_devices;
|
bool active_port_found = false;
|
||||||
|
|
||||||
/* determine first available device */
|
/* determine first available device */
|
||||||
if ((device_list = ibv_get_device_list(&num_devices)) == NULL) {
|
if ((device_list = ibv_get_device_list(&num_devices)) == NULL) {
|
||||||
|
@ -151,34 +159,86 @@ init_com_hndl(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
if (num_devices != 0) {
|
|
||||||
ib_pp_device = device_list[0];
|
/* find device with active port */
|
||||||
} else {
|
size_t cur_dev = 0;
|
||||||
fprintf(stderr,
|
for (cur_dev=0; cur_dev<num_devices; ++cur_dev){
|
||||||
"[ERROR] Could not find any IB device. Abort!\n");
|
/* open the device context */
|
||||||
exit(EXIT_FAILURE);
|
if ((com_hndl.ctx = ibv_open_device(device_list[cur_dev])) == NULL) {
|
||||||
}
|
fprintf(stderr,
|
||||||
|
"[ERROR] Could not open the device context "
|
||||||
/* open the device context and create protection domain */
|
"- %d (%s). Abort!\n",
|
||||||
if ((com_hndl.ctx = ibv_open_device(ib_pp_device)) == NULL) {
|
errno,
|
||||||
fprintf(stderr,
|
strerror(errno));
|
||||||
"[ERROR] Could not open the device context "
|
exit(EXIT_FAILURE);
|
||||||
"- %d (%s). Abort!\n",
|
}
|
||||||
errno,
|
|
||||||
strerror(errno));
|
/* query extended device capabilities (e.g., to check for ODP support */
|
||||||
exit(EXIT_FAILURE);
|
struct ibv_query_device_ex_input device_ex_input;
|
||||||
}
|
if (ibv_query_device_ex(com_hndl.ctx, &device_ex_input, &com_hndl.dev_attr_ex) < 0) {
|
||||||
|
fprintf(stderr,
|
||||||
/* query device capability (e.g., to determine 'max_qp_wr') */
|
"[ERROR] Could not query extended device attributes "
|
||||||
if (ibv_query_device(com_hndl.ctx, &com_hndl.dev_attr) < 0) {
|
"- %d (%s). Abort!\n",
|
||||||
fprintf(stderr,
|
errno,
|
||||||
"[ERROR] Could not query device attributes "
|
strerror(errno));
|
||||||
"- %d (%s). Abort!\n",
|
exit(EXIT_FAILURE);
|
||||||
errno,
|
}
|
||||||
strerror(errno));
|
|
||||||
|
/* determine port count via normal device query (necessary for mlx_5) */
|
||||||
|
if (ibv_query_device(com_hndl.ctx, &com_hndl.dev_attr_ex.orig_attr) < 0) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"[ERROR] Could not query normal device attributes "
|
||||||
|
"- %d (%s). Abort!\n",
|
||||||
|
errno,
|
||||||
|
strerror(errno));
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* check all ports */
|
||||||
|
size_t num_ports = com_hndl.dev_attr_ex.orig_attr.phys_port_cnt;
|
||||||
|
for (size_t cur_port=0; cur_port<=num_ports; ++cur_port) {
|
||||||
|
/* query current port */
|
||||||
|
if (ibv_query_port(com_hndl.ctx, cur_port, &com_hndl.port_attr) < 0){
|
||||||
|
fprintf(stderr,
|
||||||
|
"[ERROR] Could not query port %u "
|
||||||
|
"- %d (%s). Abort!\n",
|
||||||
|
cur_port,
|
||||||
|
errno,
|
||||||
|
strerror(errno));
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (com_hndl.port_attr.state == IBV_PORT_ACTIVE) {
|
||||||
|
active_port_found = 1;
|
||||||
|
com_hndl.used_port = cur_port;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* close this device if no active port was found */
|
||||||
|
if (!active_port_found) {
|
||||||
|
if (ibv_close_device(com_hndl.ctx) < 0) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"[ERROR] Could not close the device context "
|
||||||
|
"- %d (%s). Abort!\n",
|
||||||
|
errno,
|
||||||
|
strerror(errno));
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!active_port_found) {
|
||||||
|
fprintf(stderr, "[ERROR] No active port found. Abort!\n");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "[INFO] Using device '%s' and port %u\n",
|
||||||
|
ibv_get_device_name(device_list[cur_dev]),
|
||||||
|
com_hndl.used_port);
|
||||||
/* allocate protection domain */
|
/* allocate protection domain */
|
||||||
if ((com_hndl.pd = ibv_alloc_pd(com_hndl.ctx)) == NULL) {
|
if ((com_hndl.pd = ibv_alloc_pd(com_hndl.ctx)) == NULL) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
|
@ -189,27 +249,22 @@ init_com_hndl(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* determine LID */
|
|
||||||
if (ibv_query_port(com_hndl.ctx, IB_USED_PORT, &com_hndl.port_attr) < 0){
|
|
||||||
fprintf(stderr,
|
|
||||||
"[ERROR] Could not query port %u "
|
|
||||||
"- %d (%s). Abort!\n",
|
|
||||||
IB_USED_PORT,
|
|
||||||
errno,
|
|
||||||
strerror(errno));
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* register guest memory chunks with the protection domain */
|
/* register guest memory chunks with the protection domain */
|
||||||
int i = 0;
|
int i = 0;
|
||||||
com_hndl.mrs = (struct ibv_mr**)malloc(sizeof(struct ibv_mr*)*com_hndl.mr_cnt);
|
com_hndl.mrs = (struct ibv_mr**)malloc(sizeof(struct ibv_mr*)*com_hndl.mr_cnt);
|
||||||
|
|
||||||
|
int access_flags = (IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE);
|
||||||
|
if ((IB_USE_ODP) &&
|
||||||
|
(com_hndl.dev_attr_ex.odp_caps.general_caps & IBV_ODP_SUPPORT) &&
|
||||||
|
(com_hndl.dev_attr_ex.odp_caps.per_transport_caps.rc_odp_caps & IBV_ODP_SUPPORT_WRITE)) {
|
||||||
|
access_flags |= IBV_ACCESS_ON_DEMAND;
|
||||||
|
}
|
||||||
|
|
||||||
for (i=0; i<com_hndl.mr_cnt; ++i) {
|
for (i=0; i<com_hndl.mr_cnt; ++i) {
|
||||||
if ((com_hndl.mrs[i] = ibv_reg_mr(com_hndl.pd,
|
if ((com_hndl.mrs[i] = ibv_reg_mr(com_hndl.pd,
|
||||||
mem_chunks[i].ptr,
|
mem_chunks[i].ptr,
|
||||||
mem_chunks[i].size,
|
mem_chunks[i].size,
|
||||||
IBV_ACCESS_LOCAL_WRITE |
|
access_flags)) == NULL) {
|
||||||
IBV_ACCESS_REMOTE_WRITE)) == NULL) {
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"[ERROR] Could not register the memory region #%d (ptr: %llx; size: %llu) "
|
"[ERROR] Could not register the memory region #%d (ptr: %llx; size: %llu) "
|
||||||
"- %d (%s). Abort!\n",
|
"- %d (%s). Abort!\n",
|
||||||
|
@ -257,7 +312,7 @@ init_com_hndl(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
|
||||||
.send_cq = com_hndl.cq,
|
.send_cq = com_hndl.cq,
|
||||||
.recv_cq = com_hndl.cq,
|
.recv_cq = com_hndl.cq,
|
||||||
.cap = {
|
.cap = {
|
||||||
.max_send_wr = com_hndl.dev_attr.max_qp_wr,
|
.max_send_wr = IB_MAX_SEND_WR,
|
||||||
.max_recv_wr = IB_MAX_RECV_WR,
|
.max_recv_wr = IB_MAX_RECV_WR,
|
||||||
.max_send_sge = IB_MAX_SEND_SGE,
|
.max_send_sge = IB_MAX_SEND_SGE,
|
||||||
.max_recv_sge = IB_MAX_RECV_SGE,
|
.max_recv_sge = IB_MAX_RECV_SGE,
|
||||||
|
@ -278,7 +333,7 @@ init_com_hndl(size_t mem_chunk_cnt, mem_chunk_t *mem_chunks)
|
||||||
struct ibv_qp_attr attr = {
|
struct ibv_qp_attr attr = {
|
||||||
.qp_state = IBV_QPS_INIT,
|
.qp_state = IBV_QPS_INIT,
|
||||||
.pkey_index = 0,
|
.pkey_index = 0,
|
||||||
.port_num = IB_USED_PORT,
|
.port_num = com_hndl.used_port,
|
||||||
.qp_access_flags = (IBV_ACCESS_REMOTE_WRITE)
|
.qp_access_flags = (IBV_ACCESS_REMOTE_WRITE)
|
||||||
};
|
};
|
||||||
if (ibv_modify_qp(com_hndl.qp,
|
if (ibv_modify_qp(com_hndl.qp,
|
||||||
|
@ -406,7 +461,7 @@ con_com_buf(void) {
|
||||||
.sl = 0,
|
.sl = 0,
|
||||||
.src_path_bits = 0,
|
.src_path_bits = 0,
|
||||||
.dlid = com_hndl.rem_qp_info.lid,
|
.dlid = com_hndl.rem_qp_info.lid,
|
||||||
.port_num = IB_USED_PORT,
|
.port_num = com_hndl.used_port,
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
if (ibv_modify_qp(com_hndl.qp,
|
if (ibv_modify_qp(com_hndl.qp,
|
||||||
|
@ -581,7 +636,7 @@ create_send_list_entry (void *addr, size_t addr_size, void *page, size_t page_si
|
||||||
send_list_last = send_list_last->next;
|
send_list_last = send_list_last->next;
|
||||||
}
|
}
|
||||||
/* we have to request a CQE if max_send_wr is reached to avoid overflows */
|
/* we have to request a CQE if max_send_wr is reached to avoid overflows */
|
||||||
if ((++send_list_length%com_hndl.dev_attr.max_qp_wr) == 0) {
|
if ((++send_list_length%com_hndl.dev_attr_ex.orig_attr.max_qp_wr) == 0) {
|
||||||
send_list_last->send_flags = IBV_SEND_SIGNALED;
|
send_list_last->send_flags = IBV_SEND_SIGNALED;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,6 +9,7 @@ add_executable(argv_envp argv_envp.c)
|
||||||
add_executable(hello++ hello++.cpp)
|
add_executable(hello++ hello++.cpp)
|
||||||
add_executable(hellof hellof.f90)
|
add_executable(hellof hellof.f90)
|
||||||
add_executable(pi pi.go)
|
add_executable(pi pi.go)
|
||||||
|
add_executable(allocator allocator.c)
|
||||||
|
|
||||||
add_executable(endless endless.c)
|
add_executable(endless endless.c)
|
||||||
target_compile_options(endless PRIVATE -fopenmp)
|
target_compile_options(endless PRIVATE -fopenmp)
|
||||||
|
|
74
usr/tests/allocator.c
Normal file
74
usr/tests/allocator.c
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <hermit/syscall.h>
|
||||||
|
|
||||||
|
static
|
||||||
|
uint64_t memparse(const char *ptr)
|
||||||
|
{
|
||||||
|
// local pointer to end of parsed string
|
||||||
|
char *endptr;
|
||||||
|
|
||||||
|
// parse number
|
||||||
|
uint64_t size = strtoull(ptr, &endptr, 0);
|
||||||
|
|
||||||
|
// parse size extension, intentional fall-through
|
||||||
|
switch (*endptr) {
|
||||||
|
case 'E':
|
||||||
|
case 'e':
|
||||||
|
size <<= 10;
|
||||||
|
case 'P':
|
||||||
|
case 'p':
|
||||||
|
size <<= 10;
|
||||||
|
case 'T':
|
||||||
|
case 't':
|
||||||
|
size <<= 10;
|
||||||
|
case 'G':
|
||||||
|
case 'g':
|
||||||
|
size <<= 10;
|
||||||
|
case 'M':
|
||||||
|
case 'm':
|
||||||
|
size <<= 10;
|
||||||
|
case 'K':
|
||||||
|
case 'k':
|
||||||
|
size <<= 10;
|
||||||
|
endptr++;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
if (argc != 2) {
|
||||||
|
fprintf(stderr, "ERROR: You have to specify the size of the memory region to be allocated!\n");
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t alloc_size = memparse(argv[1]);
|
||||||
|
printf("Allocating %llu bytes ...\n", alloc_size);
|
||||||
|
char *mem_reg = (char*)malloc(alloc_size);
|
||||||
|
if (mem_reg == NULL) {
|
||||||
|
fprintf(stderr, "ERROR: Could not allocate %llu bytes of memory!\n", alloc_size);
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
memset(mem_reg, 0x42, alloc_size);
|
||||||
|
printf("Allocated and mapped!\n");
|
||||||
|
|
||||||
|
uint32_t cnt = 0;
|
||||||
|
while(1) {
|
||||||
|
uint64_t i;
|
||||||
|
for (i=0; i<alloc_size; ++i) {
|
||||||
|
if (mem_reg[i] != 0x42) {
|
||||||
|
fprintf(stderr, "ERROR: mem_reg[%llu] = 0x%x != 0x42!\n", i, mem_reg[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("Sleeping ...");
|
||||||
|
fflush(stdout);
|
||||||
|
sys_msleep(500);
|
||||||
|
printf("%d!\n", cnt++);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue