1
0
Fork 0
mirror of https://git.rwth-aachen.de/acs/public/villas/node/ synced 2025-03-09 00:00:00 +01:00

Implemented request to resolve address and built an rdma_cm_event framework

This commit is contained in:
Dennis Potter 2018-06-22 13:02:41 +02:00
parent 1528603a88
commit 4220ff8111
2 changed files with 149 additions and 11 deletions

View file

@ -40,6 +40,8 @@ struct format_type;
struct infiniband {
struct rdma_cm_id *id;
struct rdma_event_channel *ec;
struct ibv_pd *pd;
struct ibv_cq *cq;
struct ibv_comp_channel *comp_channel;
@ -47,14 +49,18 @@ struct infiniband {
pthread_t cq_poller_thread;
struct connection_s {
char *src_ip_addr;
char *dst_ip_addr;
struct addrinfo *src_addr;
struct addrinfo *dst_addr;
const int timeout;
enum rdma_port_space port_space;
struct ibv_qp *qp;
struct ibv_mr *mr_payload;
struct r_addr_key_s *r_addr_key;
} conn;
int is_source;
};
/** @see node_type::reverse */
@ -76,7 +82,7 @@ int infiniband_destroy(struct node *n);
int infiniband_stop(struct node *n);
/** @see node_type::init */
int infiniband_init();
int infiniband_init(struct super_node *n);
/** @see node_type::deinit */
int infiniband_deinit();

View file

@ -26,25 +26,54 @@
#include <villas/plugin.h>
#include <villas/utils.h>
#include <villas/format_type.h>
#include <rdma/rdma_cma.h>
static void infiniband_log_cb(struct infiniband *ib, void *userdata, int level, const char *str)
static int infiniband_addr_resolved(struct rdma_cm_id *id)
{
return 0;
}
static void infiniband_connect_cb(struct infiniband *ib, void *userdata, int result)
static int infiniband_route_resolved(struct rdma_cm_id *id)
{
return 0;
}
static void infiniband_disconnect_cb(struct infiniband *ib, void *userdata, int result)
static int infiniband_connect_request(struct rdma_cm_id *id)
{
return 0;
}
static void infiniband_message_cb(struct infiniband *ib, void *userdata)
static int infiniband_event(struct rdma_cm_event *event)
{
}
int ret = 0;
static void infiniband_subscribe_cb(struct infiniband *ib, void *userdata, int mid, int qos_count, const int *granted_qos)
{
switch(event->event)
{
case RDMA_CM_EVENT_ADDR_RESOLVED:
ret = infiniband_addr_resolved(event->id);
break;
case RDMA_CM_EVENT_ADDR_ERROR:
error("Address resolution (rdma_resolve_addr) failed!");
case RDMA_CM_EVENT_ROUTE_RESOLVED:
ret = infiniband_route_resolved(event->id);
break;
case RDMA_CM_EVENT_ROUTE_ERROR:
error("Route resolution (rdma_resovle_route) failed!");
case RDMA_CM_EVENT_CONNECT_REQUEST:
break;
case RDMA_CM_EVENT_CONNECT_ERROR:
error("An error has occurred trying to establish a connection!");
case RDMA_CM_EVENT_REJECTED:
error("Connection request or response was rejected by the remote end point!");
case RDMA_CM_EVENT_ESTABLISHED:
ret = 1;
break;
default:
error("Unknown event occurred: %u",
event->event);
}
return ret;
}
int infiniband_reverse(struct node *n)
@ -54,6 +83,57 @@ int infiniband_reverse(struct node *n)
int infiniband_parse(struct node *n, json_t *cfg)
{
struct infiniband *ib = (struct infiniband *) n->_vd;
int ret;
const char *local = NULL;
const char *remote = NULL;
const char *port_space = NULL;
const int timeout;
json_error_t err;
ret = json_unpack_ex(cfg, &err, 0, "{ s?: s, s?: s, s?: s, s?: i}",
"remote", &remote,
"local", &local,
"rdma_port_space", &port_space,
"resolution_timeout", &timeout
);
if(ret)
jerror(&err, "Failed to parse configuration of node %s", node_name(n));
// Translate IP:PORT to a struct addrinfo
ret = getaddrinfo(local, NULL, NULL, &ib->conn.src_addr);
if(ret) {
error("Failed to resolve local address '%s' of node %s: %s",
local, node_name(n), gai_strerror(ret));
}
// Translate port space and create rdma_cm_id object
if(strcmp(port_space, "RDMA_PS_IPOIB") == 0) ib->conn.port_space = RDMA_PS_IPOIB;
else if(strcmp(port_space, "RDMA_PS_TCP") == 0) ib->conn.port_space = RDMA_PS_TCP;
else if(strcmp(port_space, "RDMA_PS_UDP") == 0) ib->conn.port_space = RDMA_PS_UDP;
else if(strcmp(port_space, "RDMA_PS_IB") == 0) ib->conn.port_space = RDMA_PS_IB;
else {
error("Failed to translate rdma_port_space in node %s. %s is not a valid \
port space supported by rdma_cma.h!", node_name(n), port_space);
}
//Check if node is a source and connect to target
if(remote)
{
ib->is_source = 1;
// Translate address info
ret = getaddrinfo(remote, NULL, NULL, &ib->conn.dst_addr);
if(ret) {
error("Failed to resolve remote address '%s' of node %s: %s",
remote, node_name(n), gai_strerror(ret));
}
}
else
ib->is_source = 0;
return 0;
}
@ -69,6 +149,57 @@ int infiniband_destroy(struct node *n)
int infiniband_start(struct node *n)
{
struct infiniband *ib = (struct infiniband *) n->_vd;
struct rdma_cm_event *event = NULL;
int ret;
// Create event channel
ib->ec = rdma_create_event_channel();
if(!ib->ec) {
error("Failed to create event channel in node %s!",
node_name(n));
}
ret = rdma_create_id(ib->ec, &ib->id, NULL, ib->conn.port_space);
if(ret) {
error("Failed to create rdma_cm_id of node %s: %s",
node_name(n), gai_strerror(ret));
}
info("Succesfully created CM RDMA ID of node %s",
node_name(n));
// Bind rdma_cm_id to the HCA
ret = rdma_bind_addr(ib->id, ib->conn.src_addr->ai_addr);
if(ret) {
error("Failed to bind to local device of node %s: %s",
node_name(n), gai_strerror(ret));
}
info("Bound to Infiniband device of node %s",
node_name(n));
if(ib->is_source)
{
// Resolve address
ret = rdma_resolve_addr(ib->id, NULL, ib->conn.dst_addr->ai_addr, ib->conn.timeout);
if(ret) {
error("Failed to resolve remote address after %ims of node %s: %s",
ib->conn.timeout, node_name(n), gai_strerror(ret));
}
}
// Several events should occur on the event channel, to make
// sure the nodes are succesfully connected.
info("Starting to monitor events on rdma_cm_id.\n");
while(rdma_get_cm_event(ib->ec, &event) == 0)
{
struct rdma_cm_event event_copy;
memcpy(&event_copy, event, sizeof(*event));
if(infiniband_event(&event_copy))
break;
}
return 0;
}
@ -77,8 +208,9 @@ int infiniband_stop(struct node *n)
return 0;
}
int infiniband_init()
int infiniband_init(struct super_node *n)
{
return 0;
}