mirror of
https://git.rwth-aachen.de/acs/public/villas/node/
synced 2025-03-09 00:00:00 +01:00
This commit should be able to create a connection between two nodes.
This commit isn't checked for bugs and no memory management is done. Furthermore, no poll threads are implemented.
This commit is contained in:
parent
ae22048e0d
commit
a0f0410e73
3 changed files with 230 additions and 45 deletions
|
@ -38,15 +38,26 @@
|
|||
/* Forward declarations */
|
||||
struct format_type;
|
||||
|
||||
enum poll_mode_e
|
||||
{
|
||||
EVENT,
|
||||
BUSY
|
||||
};
|
||||
|
||||
struct infiniband {
|
||||
struct rdma_cm_id *id;
|
||||
struct rdma_event_channel *ec;
|
||||
|
||||
struct ibv_pd *pd;
|
||||
struct ibv_cq *cq;
|
||||
struct ibv_comp_channel *comp_channel;
|
||||
struct context_s {
|
||||
struct ibv_pd *pd;
|
||||
struct ibv_cq *cq;
|
||||
struct ibv_comp_channel *comp_channel;
|
||||
} ctx;
|
||||
|
||||
pthread_t cq_poller_thread;
|
||||
struct poll_s {
|
||||
enum poll_mode_e poll_mode;
|
||||
pthread_t cq_poller_thread;
|
||||
} poll;
|
||||
|
||||
struct connection_s {
|
||||
struct addrinfo *src_addr;
|
||||
|
@ -54,13 +65,18 @@ struct infiniband {
|
|||
const int timeout;
|
||||
enum rdma_port_space port_space;
|
||||
|
||||
struct ibv_qp *qp;
|
||||
struct ibv_mr *mr_payload;
|
||||
struct r_addr_key_s *r_addr_key;
|
||||
} conn;
|
||||
|
||||
int is_source;
|
||||
struct init_s {
|
||||
int cq_size;
|
||||
enum ibv_qp_type qp_type;
|
||||
int max_send_wr;
|
||||
int max_recv_wr;
|
||||
} init;
|
||||
|
||||
int is_source;
|
||||
};
|
||||
|
||||
/** @see node_type::reverse */
|
||||
|
|
|
@ -28,38 +28,169 @@
|
|||
#include <villas/format_type.h>
|
||||
#include <rdma/rdma_cma.h>
|
||||
|
||||
static int infiniband_addr_resolved(struct rdma_cm_id *id)
|
||||
static void ib_create_busy_poll(struct node *n, struct rdma_cm_id *id)
|
||||
{
|
||||
struct infiniband *ib = (struct infiniband *) n->_vd;
|
||||
|
||||
// Create completion queue and bind to channel
|
||||
ib->ctx.cq = ibv_create_cq(ib->id->verbs, ib->init.cq_size, NULL, NULL, 0);
|
||||
if(!ib->ctx.cq)
|
||||
error("Could not create completion queue in node %s.", node_name(n));
|
||||
|
||||
//ToDo: Create poll pthread
|
||||
}
|
||||
|
||||
static void ib_create_event(struct node *n, struct rdma_cm_id *id)
|
||||
{
|
||||
int ret;
|
||||
struct infiniband *ib = (struct infiniband *) n->_vd;
|
||||
|
||||
// Create completion channel
|
||||
ib->ctx.comp_channel = ibv_create_comp_channel(ib->id->verbs);
|
||||
if(!ib->ctx.comp_channel)
|
||||
error("Could not create completion channel in node %s.", node_name(n));
|
||||
|
||||
// Create completion queue and bind to channel
|
||||
ib->ctx.cq = ibv_create_cq(ib->id->verbs,
|
||||
ib->init.cq_size,
|
||||
NULL,
|
||||
ib->ctx.comp_channel,
|
||||
0);
|
||||
if(!ib->ctx.cq)
|
||||
error("Could not create completion queue in node %s.", node_name(n));
|
||||
|
||||
// Request notifications from completion queue
|
||||
ret = ibv_req_notify_cq(ib->ctx.cq, 0);
|
||||
if(ret)
|
||||
error("Failed to request notifiy CQ in node %s: %s",
|
||||
node_name(n), gai_strerror(ret));
|
||||
|
||||
//ToDo: Create poll pthread
|
||||
}
|
||||
|
||||
static void ib_build_ibv(struct node *n, struct rdma_cm_id *id)
|
||||
{
|
||||
struct infiniband *ib = (struct infiniband *) n->_vd;
|
||||
int ret;
|
||||
|
||||
//Allocate protection domain
|
||||
ib->ctx.pd = ibv_alloc_pd(ib->id->verbs);
|
||||
if(!ib->ctx.pd)
|
||||
error("Could not allocate protection domain in node %s.", node_name(n));
|
||||
|
||||
// Initiate poll mode
|
||||
switch(ib->poll.poll_mode)
|
||||
{
|
||||
case EVENT:
|
||||
ib_create_event(n, id);
|
||||
break;
|
||||
case BUSY:
|
||||
ib_create_busy_poll(n, id);
|
||||
break;
|
||||
}
|
||||
|
||||
// Prepare Queue Pair (QP) attributes
|
||||
struct ibv_qp_init_attr qp_attr;
|
||||
qp_attr.send_cq = ib->ctx.cq;
|
||||
qp_attr.recv_cq = ib->ctx.cq;
|
||||
qp_attr.qp_type = ib->init.qp_type;
|
||||
|
||||
qp_attr.cap.max_send_wr = ib->init.max_send_wr;
|
||||
qp_attr.cap.max_recv_wr = ib->init.max_recv_wr;
|
||||
qp_attr.cap.max_send_sge = 1;
|
||||
qp_attr.cap.max_recv_sge = 1;
|
||||
|
||||
//ToDo: Set maximum inline data
|
||||
|
||||
// Create the actual QP
|
||||
ret = rdma_create_qp(id, ib->ctx.pd, &qp_attr);
|
||||
if(ret)
|
||||
error("Failed to create Queue Pair in node %s.", node_name(n));
|
||||
|
||||
info("Successfully created Queue Pair in node %s.", node_name(n));
|
||||
}
|
||||
|
||||
static int ib_addr_resolved(struct node *n, struct rdma_cm_id *id)
|
||||
{
|
||||
struct infiniband *ib = (struct infiniband *) n->_vd;
|
||||
int ret;
|
||||
|
||||
// Build all components from IB Verbs
|
||||
ib_build_ibv(n, id);
|
||||
|
||||
// Resolve address
|
||||
ret = rdma_resolve_route(id, ib->conn.timeout);
|
||||
if(ret)
|
||||
error("Failed to resolve route in node %s.", node_name(n));
|
||||
|
||||
info("Successfully resolved address node %s", node_name(n));
|
||||
|
||||
//ToDo: create check if data can be send inline
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int infiniband_route_resolved(struct rdma_cm_id *id)
|
||||
static int ib_route_resolved(struct node *n, struct rdma_cm_id *id)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ib_build_ibv(n, id);
|
||||
|
||||
//ToDo: Post receive WRs
|
||||
|
||||
struct rdma_conn_param cm_params;
|
||||
memset(&cm_params, 0, sizeof(cm_params));
|
||||
|
||||
// Send connection request
|
||||
ret = rdma_connect(id, &cm_params);
|
||||
if(ret)
|
||||
error("Failed to connect in node %s.", node_name(n));
|
||||
|
||||
info("Route resolved and called rdma_connect");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int infiniband_connect_request(struct rdma_cm_id *id)
|
||||
static int ib_connect_request(struct node *n, struct rdma_cm_id *id)
|
||||
{
|
||||
int ret;
|
||||
info("Received a connection request!");
|
||||
|
||||
ib_build_ibv(n, id);
|
||||
|
||||
//ToDo: Post receive WRs
|
||||
|
||||
struct rdma_conn_param cm_params;
|
||||
memset(&cm_params, 0, sizeof(cm_params));
|
||||
|
||||
// Accept connection request
|
||||
ret = rdma_accept(id, &cm_params);
|
||||
if(ret)
|
||||
error("Failed to connect in node %s.", node_name(n));
|
||||
|
||||
info("Successfully accepted connection request.");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int infiniband_event(struct rdma_cm_event *event)
|
||||
static int ib_event(struct node *n, struct rdma_cm_event *event)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
switch(event->event)
|
||||
{
|
||||
case RDMA_CM_EVENT_ADDR_RESOLVED:
|
||||
ret = infiniband_addr_resolved(event->id);
|
||||
ret = ib_addr_resolved(n, event->id);
|
||||
break;
|
||||
case RDMA_CM_EVENT_ADDR_ERROR:
|
||||
error("Address resolution (rdma_resolve_addr) failed!");
|
||||
case RDMA_CM_EVENT_ROUTE_RESOLVED:
|
||||
ret = infiniband_route_resolved(event->id);
|
||||
ret = ib_route_resolved(n, event->id);
|
||||
break;
|
||||
case RDMA_CM_EVENT_ROUTE_ERROR:
|
||||
error("Route resolution (rdma_resovle_route) failed!");
|
||||
case RDMA_CM_EVENT_CONNECT_REQUEST:
|
||||
ret = ib_connect_request(n, event->id);
|
||||
break;
|
||||
case RDMA_CM_EVENT_CONNECT_ERROR:
|
||||
error("An error has occurred trying to establish a connection!");
|
||||
|
@ -76,27 +207,38 @@ static int infiniband_event(struct rdma_cm_event *event)
|
|||
return ret;
|
||||
}
|
||||
|
||||
int infiniband_reverse(struct node *n)
|
||||
int ib_reverse(struct node *n)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int infiniband_parse(struct node *n, json_t *cfg)
|
||||
int ib_parse(struct node *n, json_t *cfg)
|
||||
{
|
||||
struct infiniband *ib = (struct infiniband *) n->_vd;
|
||||
|
||||
int ret;
|
||||
const char *local = NULL;
|
||||
const char *remote = NULL;
|
||||
const char *port_space = NULL;
|
||||
const int timeout;
|
||||
const char *port_space = "RDMA_PC_TCP";
|
||||
const char *poll_mode = "BUSY";
|
||||
const char *qp_type = "IBV_QPT_RC";
|
||||
int timeout = 1000;
|
||||
int cq_size = 10;
|
||||
int max_send_wr = 100;
|
||||
int max_recv_wr = 100;
|
||||
|
||||
json_error_t err;
|
||||
ret = json_unpack_ex(cfg, &err, 0, "{ s?: s, s?: s, s?: s, s?: i}",
|
||||
ret = json_unpack_ex(cfg, &err, 0, "{ s?: s, s?: s, s?: s, s?: i, \
|
||||
s?: s, s?: i, s?: s, s?: i, s?: i}",
|
||||
"remote", &remote,
|
||||
"local", &local,
|
||||
"rdma_port_space", &port_space,
|
||||
"resolution_timeout", &timeout
|
||||
"resolution_timeout", &timeout,
|
||||
"poll_mode", &poll_mode,
|
||||
"cq_size", &cq_size,
|
||||
"qp_type", &qp_type,
|
||||
"max_send_wr", &max_send_wr,
|
||||
"max_recv_wr", &max_recv_wr
|
||||
);
|
||||
if(ret)
|
||||
jerror(&err, "Failed to parse configuration of node %s", node_name(n));
|
||||
|
@ -108,7 +250,7 @@ int infiniband_parse(struct node *n, json_t *cfg)
|
|||
local, node_name(n), gai_strerror(ret));
|
||||
}
|
||||
|
||||
// Translate port space and create rdma_cm_id object
|
||||
// Translate port space
|
||||
if(strcmp(port_space, "RDMA_PS_IPOIB") == 0) ib->conn.port_space = RDMA_PS_IPOIB;
|
||||
else if(strcmp(port_space, "RDMA_PS_TCP") == 0) ib->conn.port_space = RDMA_PS_TCP;
|
||||
else if(strcmp(port_space, "RDMA_PS_UDP") == 0) ib->conn.port_space = RDMA_PS_UDP;
|
||||
|
@ -117,7 +259,31 @@ int infiniband_parse(struct node *n, json_t *cfg)
|
|||
error("Failed to translate rdma_port_space in node %s. %s is not a valid \
|
||||
port space supported by rdma_cma.h!", node_name(n), port_space);
|
||||
}
|
||||
|
||||
// Translate poll mode
|
||||
if(strcmp(poll_mode, "EVENT") == 0) ib->poll.poll_mode = EVENT;
|
||||
else if(strcmp(poll_mode, "BUSY") == 0) ib->poll.poll_mode = BUSY;
|
||||
else {
|
||||
error("Failed to translate poll_mode in node %s. %s is not a valid \
|
||||
poll mode!", node_name(n), poll_mode);
|
||||
}
|
||||
|
||||
// Set completion queue size
|
||||
ib->init.cq_size = cq_size;
|
||||
|
||||
// Translate QP type
|
||||
if(strcmp(qp_type, "IBV_QPT_RC") == 0) ib->init.qp_type = IBV_QPT_RC;
|
||||
else if(strcmp(qp_type, "IBV_QPT_UC") == 0) ib->init.qp_type = IBV_QPT_UC;
|
||||
else if(strcmp(qp_type, "IBV_QPT_UD") == 0) ib->init.qp_type = IBV_QPT_UD;
|
||||
else {
|
||||
error("Failed to translate qp_type in node %s. %s is not a valid \
|
||||
qp_type!", node_name(n), qp_type);
|
||||
}
|
||||
|
||||
//Set max. send and receive Work Requests
|
||||
ib->init.max_send_wr = max_send_wr;
|
||||
ib->init.max_recv_wr = max_recv_wr;
|
||||
|
||||
//Check if node is a source and connect to target
|
||||
if(remote)
|
||||
{
|
||||
|
@ -137,17 +303,17 @@ int infiniband_parse(struct node *n, json_t *cfg)
|
|||
return 0;
|
||||
}
|
||||
|
||||
char * infiniband_print(struct node *n)
|
||||
char * ib_print(struct node *n)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int infiniband_destroy(struct node *n)
|
||||
int ib_destroy(struct node *n)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int infiniband_start(struct node *n)
|
||||
int ib_start(struct node *n)
|
||||
{
|
||||
struct infiniband *ib = (struct infiniband *) n->_vd;
|
||||
struct rdma_cm_event *event = NULL;
|
||||
|
@ -180,7 +346,10 @@ int infiniband_start(struct node *n)
|
|||
if(ib->is_source)
|
||||
{
|
||||
// Resolve address
|
||||
ret = rdma_resolve_addr(ib->id, NULL, ib->conn.dst_addr->ai_addr, ib->conn.timeout);
|
||||
ret = rdma_resolve_addr(ib->id,
|
||||
NULL,
|
||||
ib->conn.dst_addr->ai_addr,
|
||||
ib->conn.timeout);
|
||||
if(ret) {
|
||||
error("Failed to resolve remote address after %ims of node %s: %s",
|
||||
ib->conn.timeout, node_name(n), gai_strerror(ret));
|
||||
|
@ -190,68 +359,70 @@ int infiniband_start(struct node *n)
|
|||
|
||||
// Several events should occur on the event channel, to make
|
||||
// sure the nodes are succesfully connected.
|
||||
info("Starting to monitor events on rdma_cm_id.\n");
|
||||
info("Starting to monitor events on rdma_cm_id on node %s.",
|
||||
node_name(n));
|
||||
|
||||
while(rdma_get_cm_event(ib->ec, &event) == 0)
|
||||
{
|
||||
struct rdma_cm_event event_copy;
|
||||
memcpy(&event_copy, event, sizeof(*event));
|
||||
|
||||
if(infiniband_event(&event_copy))
|
||||
if(ib_event(n, &event_copy))
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int infiniband_stop(struct node *n)
|
||||
int ib_stop(struct node *n)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int infiniband_init(struct super_node *n)
|
||||
int ib_init(struct super_node *n)
|
||||
{
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int infiniband_deinit()
|
||||
int ib_deinit()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int infiniband_read(struct node *n, struct sample *smps[], unsigned cnt)
|
||||
int ib_read(struct node *n, struct sample *smps[], unsigned cnt)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int infiniband_write(struct node *n, struct sample *smps[], unsigned cnt)
|
||||
int ib_write(struct node *n, struct sample *smps[], unsigned cnt)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int infiniband_fd(struct node *n)
|
||||
int ib_fd(struct node *n)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct plugin p = {
|
||||
.name = "infiniband",
|
||||
.description = "Infiniband)",
|
||||
.description = "Infiniband",
|
||||
.type = PLUGIN_TYPE_NODE,
|
||||
.node = {
|
||||
.vectorize = 0,
|
||||
.size = sizeof(struct infiniband),
|
||||
.reverse = infiniband_reverse,
|
||||
.parse = infiniband_parse,
|
||||
.print = infiniband_print,
|
||||
.start = infiniband_start,
|
||||
.destroy = infiniband_destroy,
|
||||
.stop = infiniband_stop,
|
||||
.init = infiniband_init,
|
||||
.deinit = infiniband_deinit,
|
||||
.read = infiniband_read,
|
||||
.write = infiniband_write,
|
||||
.fd = infiniband_fd
|
||||
.reverse = ib_reverse,
|
||||
.parse = ib_parse,
|
||||
.print = ib_print,
|
||||
.start = ib_start,
|
||||
.destroy = ib_destroy,
|
||||
.stop = ib_stop,
|
||||
.init = ib_init,
|
||||
.deinit = ib_deinit,
|
||||
.read = ib_read,
|
||||
.write = ib_write,
|
||||
.fd = ib_fd
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -101,8 +101,6 @@ RUN dnf -y install \
|
|||
libibumad-devel \
|
||||
perftest
|
||||
|
||||
|
||||
|
||||
# Build & Install Criterion
|
||||
RUN cd /tmp && \
|
||||
git clone --recursive https://github.com/Snaipe/Criterion && \
|
||||
|
|
Loading…
Add table
Reference in a new issue