2018-06-21 11:56:28 +02:00
|
|
|
/** Node type: infiniband
|
|
|
|
*
|
|
|
|
* @author Dennis Potter <dennis@dennispotter.eu>
|
|
|
|
* @copyright 2018, Institute for Automation of Complex Power Systems, EONERC
|
|
|
|
* @license GNU General Public License (version 3)
|
|
|
|
*
|
|
|
|
* VILLASnode
|
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*********************************************************************************/
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include <villas/nodes/infiniband.h>
|
|
|
|
#include <villas/plugin.h>
|
|
|
|
#include <villas/utils.h>
|
|
|
|
#include <villas/format_type.h>
|
2018-06-24 13:02:04 +02:00
|
|
|
#include <villas/memory.h>
|
|
|
|
#include <villas/pool.h>
|
|
|
|
|
2018-06-22 13:02:41 +02:00
|
|
|
#include <rdma/rdma_cma.h>
|
2018-06-21 11:56:28 +02:00
|
|
|
|
2018-06-28 12:46:16 +02:00
|
|
|
int ib_cleanup(struct node *n)
|
|
|
|
{
|
|
|
|
struct infiniband *ib = (struct infiniband *) n->_vd;
|
|
|
|
info("Starting to clean up");
|
|
|
|
|
|
|
|
// Destroy QP
|
|
|
|
rdma_destroy_qp(ib->ctx.id);
|
|
|
|
info("Destroyed QP");
|
|
|
|
|
|
|
|
// Deregister memory regions
|
|
|
|
ibv_dereg_mr(ib->mem.mr_recv);
|
|
|
|
if(ib->is_source)
|
|
|
|
ibv_dereg_mr(ib->mem.mr_send);
|
|
|
|
info("Deregistered memory regions");
|
|
|
|
|
|
|
|
// Destroy pools
|
|
|
|
pool_destroy(&ib->mem.p_recv);
|
|
|
|
pool_destroy(&ib->mem.p_send);
|
|
|
|
info("Destroyed memory pools");
|
|
|
|
|
|
|
|
rdma_destroy_id(ib->ctx.id);
|
|
|
|
info("Destroyed rdma_cm_id");
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2018-06-27 17:01:47 +02:00
|
|
|
|
2018-06-27 10:37:46 +02:00
|
|
|
int ib_post_recv_wrs(struct node *n)
|
|
|
|
{
|
|
|
|
struct infiniband *ib = (struct infiniband *) n->_vd;
|
|
|
|
struct ibv_recv_wr wr, *bad_wr = NULL;
|
|
|
|
int ret;
|
|
|
|
struct ibv_sge sge;
|
|
|
|
|
|
|
|
// Prepare receive Scatter/Gather element
|
|
|
|
sge.addr = (uintptr_t)pool_get(&ib->mem.p_recv);
|
|
|
|
sge.length = ib->mem.p_recv.blocksz;
|
|
|
|
sge.lkey = ib->mem.mr_recv->lkey;
|
|
|
|
|
|
|
|
// Prepare a receive Work Request
|
|
|
|
wr.wr_id = (uintptr_t)sge.addr;
|
|
|
|
wr.next = NULL;
|
|
|
|
wr.sg_list = &sge;
|
|
|
|
wr.num_sge = 1;
|
|
|
|
|
|
|
|
// Post Work Request
|
2018-06-27 17:01:47 +02:00
|
|
|
ret = ibv_post_recv(ib->ctx.id->qp, &wr, &bad_wr);
|
2018-06-27 10:37:46 +02:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-06-27 17:01:47 +02:00
|
|
|
void ib_completion_target(struct node* n, struct ibv_wc* wc, int* size)
|
2018-06-21 11:56:28 +02:00
|
|
|
{
|
2018-06-27 17:01:47 +02:00
|
|
|
//ToDo: No implementation yet. This is still handled in ib_read
|
|
|
|
}
|
2018-06-23 14:53:37 +02:00
|
|
|
|
2018-06-27 17:01:47 +02:00
|
|
|
void ib_completion_source(struct node* n, struct ibv_wc* wc, int* size)
|
|
|
|
{
|
2018-06-28 12:46:16 +02:00
|
|
|
struct infiniband *ib = (struct infiniband *)((struct node *)n)->_vd;
|
|
|
|
|
2018-06-27 17:01:47 +02:00
|
|
|
for(int i=0; i<*size; i++)
|
|
|
|
{
|
2018-06-28 12:46:16 +02:00
|
|
|
//On disconnect, the QP set to error state and will be flushed
|
|
|
|
if(wc[i].status == IBV_WC_WR_FLUSH_ERR)
|
|
|
|
{
|
|
|
|
ib->poll.stopThread = 1;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-06-27 17:01:47 +02:00
|
|
|
if(wc[i].status != IBV_WC_SUCCESS)
|
2018-06-28 12:46:16 +02:00
|
|
|
warn("Work Completion status was not IBV_WC_SUCCES in node %s: %i",
|
|
|
|
node_name(n), wc[i].status);
|
|
|
|
|
2018-06-27 17:01:47 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
void * ib_event_thread(void *n)
|
|
|
|
{
|
|
|
|
struct infiniband *ib = (struct infiniband *)((struct node *)n)->_vd;
|
|
|
|
struct ibv_wc wc[ib->cq_size];
|
|
|
|
int size;
|
|
|
|
|
|
|
|
while(1)
|
|
|
|
{
|
|
|
|
// Function blocks, until an event occurs
|
|
|
|
ibv_get_cq_event(ib->ctx.comp_channel, &ib->ctx.cq, NULL);
|
|
|
|
|
|
|
|
// Poll as long as WCs are available
|
|
|
|
while((size = ibv_poll_cq(ib->ctx.cq, ib->cq_size, wc)))
|
|
|
|
ib->poll.on_compl(n, wc, &size);
|
|
|
|
|
|
|
|
// Request a new event in the CQ and acknowledge event
|
|
|
|
ibv_req_notify_cq(ib->ctx.cq, 0);
|
|
|
|
ibv_ack_cq_events(ib->ctx.cq, 1);
|
|
|
|
}
|
2018-06-23 14:53:37 +02:00
|
|
|
}
|
|
|
|
|
2018-06-27 17:01:47 +02:00
|
|
|
void * ib_busy_poll_thread(void *n)
|
|
|
|
{
|
|
|
|
struct infiniband *ib = (struct infiniband *)((struct node *)n)->_vd;
|
|
|
|
struct ibv_wc wc[ib->cq_size];
|
|
|
|
int size;
|
|
|
|
|
|
|
|
while(1)
|
|
|
|
{
|
|
|
|
// Poll as long as WCs are available
|
|
|
|
while((size = ibv_poll_cq(ib->ctx.cq, ib->cq_size, wc)))
|
|
|
|
ib->poll.on_compl(n, wc, &size);
|
2018-06-28 12:46:16 +02:00
|
|
|
|
|
|
|
if(ib->poll.stopThread)
|
|
|
|
return NULL;
|
2018-06-27 17:01:47 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ib_init_wc_poll(struct node *n)
|
2018-06-23 14:53:37 +02:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct infiniband *ib = (struct infiniband *) n->_vd;
|
2018-06-27 17:01:47 +02:00
|
|
|
ib->ctx.comp_channel = NULL;
|
2018-06-23 14:53:37 +02:00
|
|
|
|
2018-06-27 17:01:47 +02:00
|
|
|
if(ib->poll.poll_mode == EVENT)
|
|
|
|
{
|
|
|
|
// Create completion channel
|
|
|
|
ib->ctx.comp_channel = ibv_create_comp_channel(ib->ctx.id->verbs);
|
|
|
|
if(!ib->ctx.comp_channel)
|
|
|
|
error("Could not create completion channel in node %s.", node_name(n));
|
|
|
|
}
|
2018-06-23 14:53:37 +02:00
|
|
|
|
2018-06-27 17:01:47 +02:00
|
|
|
// Create completion queue and bind to channel (or NULL)
|
|
|
|
ib->ctx.cq = ibv_create_cq(ib->ctx.id->verbs,
|
2018-06-23 19:05:33 +02:00
|
|
|
ib->cq_size,
|
2018-06-23 14:53:37 +02:00
|
|
|
NULL,
|
|
|
|
ib->ctx.comp_channel,
|
|
|
|
0);
|
|
|
|
if(!ib->ctx.cq)
|
|
|
|
error("Could not create completion queue in node %s.", node_name(n));
|
|
|
|
|
2018-06-27 17:01:47 +02:00
|
|
|
if(ib->poll.poll_mode == EVENT)
|
|
|
|
{
|
|
|
|
// Request notifications from completion queue
|
|
|
|
ret = ibv_req_notify_cq(ib->ctx.cq, 0);
|
|
|
|
if(ret)
|
|
|
|
error("Failed to request notifiy CQ in node %s: %s",
|
|
|
|
node_name(n), gai_strerror(ret));
|
|
|
|
}
|
2018-06-23 14:53:37 +02:00
|
|
|
|
2018-06-27 17:01:47 +02:00
|
|
|
// Initialize polling pthread
|
|
|
|
//ToDo: Remove if(is_source)
|
|
|
|
if(ib->is_source)
|
|
|
|
{
|
|
|
|
ret = pthread_create(&ib->poll.cq_poller_thread, NULL, ib->poll.poll_func, n);
|
|
|
|
if(ret)
|
|
|
|
{
|
|
|
|
error("Failed to create poll thread of node %s: %s",
|
|
|
|
node_name(n), gai_strerror(ret));
|
|
|
|
}
|
|
|
|
}
|
2018-06-23 14:53:37 +02:00
|
|
|
}
|
|
|
|
|
2018-06-27 11:21:28 +02:00
|
|
|
static void ib_build_ibv(struct node *n)
|
2018-06-23 14:53:37 +02:00
|
|
|
{
|
|
|
|
struct infiniband *ib = (struct infiniband *) n->_vd;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
//Allocate protection domain
|
2018-06-27 17:01:47 +02:00
|
|
|
ib->ctx.pd = ibv_alloc_pd(ib->ctx.id->verbs);
|
2018-06-23 14:53:37 +02:00
|
|
|
if(!ib->ctx.pd)
|
|
|
|
error("Could not allocate protection domain in node %s.", node_name(n));
|
2018-06-27 17:01:47 +02:00
|
|
|
info("Allocated Protection Domain");
|
2018-06-23 14:53:37 +02:00
|
|
|
|
|
|
|
// Initiate poll mode
|
2018-06-27 17:01:47 +02:00
|
|
|
ib_init_wc_poll(n);
|
2018-06-23 14:53:37 +02:00
|
|
|
|
2018-06-23 19:05:33 +02:00
|
|
|
// Prepare remaining Queue Pair (QP) attributes
|
|
|
|
ib->qp_init.send_cq = ib->ctx.cq;
|
|
|
|
ib->qp_init.recv_cq = ib->ctx.cq;
|
2018-06-23 14:53:37 +02:00
|
|
|
|
|
|
|
//ToDo: Set maximum inline data
|
|
|
|
|
|
|
|
// Create the actual QP
|
2018-06-27 17:01:47 +02:00
|
|
|
ret = rdma_create_qp(ib->ctx.id, ib->ctx.pd, &ib->qp_init);
|
2018-06-23 14:53:37 +02:00
|
|
|
if(ret)
|
|
|
|
error("Failed to create Queue Pair in node %s.", node_name(n));
|
|
|
|
|
2018-06-27 10:37:46 +02:00
|
|
|
info("Created Queue Pair.");
|
2018-06-24 13:02:04 +02:00
|
|
|
|
|
|
|
// Allocate memory
|
|
|
|
ib->mem.p_recv.state = STATE_DESTROYED;
|
|
|
|
ib->mem.p_recv.queue.state = STATE_DESTROYED;
|
|
|
|
|
|
|
|
// Set pool size to maximum size of Receive Queue
|
|
|
|
pool_init(&ib->mem.p_recv,
|
2018-06-25 18:21:44 +02:00
|
|
|
ib->qp_init.cap.max_recv_wr,
|
2018-06-27 10:37:46 +02:00
|
|
|
sizeof(double),
|
2018-06-24 13:02:04 +02:00
|
|
|
&memtype_heap);
|
2018-06-27 17:01:47 +02:00
|
|
|
if(ret)
|
|
|
|
{
|
2018-06-24 13:02:04 +02:00
|
|
|
error("Failed to init recv memory pool of node %s: %s",
|
|
|
|
node_name(n), gai_strerror(ret));
|
|
|
|
}
|
|
|
|
|
|
|
|
//ToDo: initialize r_addr_key struct if mode is RDMA
|
|
|
|
|
|
|
|
// Register memory for IB Device. Not necessary if data is send
|
|
|
|
// exclusively inline
|
|
|
|
ib->mem.mr_recv = ibv_reg_mr(
|
|
|
|
ib->ctx.pd,
|
2018-06-25 18:21:44 +02:00
|
|
|
(char*)&ib->mem.p_recv+ib->mem.p_recv.buffer_off,
|
|
|
|
ib->mem.p_recv.len,
|
2018-06-24 13:02:04 +02:00
|
|
|
IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE);
|
|
|
|
if(!ib->mem.mr_recv) {
|
|
|
|
error("Failed to register mr_recv with ibv_reg_mr of node %s.",
|
|
|
|
node_name(n));
|
|
|
|
}
|
2018-06-25 18:21:44 +02:00
|
|
|
info("Allocated receive memory.");
|
2018-06-24 13:02:04 +02:00
|
|
|
|
|
|
|
if(ib->is_source)
|
|
|
|
{
|
|
|
|
ib->mem.p_send.state = STATE_DESTROYED;
|
|
|
|
ib->mem.p_send.queue.state = STATE_DESTROYED;
|
|
|
|
|
|
|
|
// Set pool size to maximum size of Receive Queue
|
|
|
|
pool_init(&ib->mem.p_send,
|
|
|
|
ib->qp_init.cap.max_send_wr,
|
2018-06-27 10:37:46 +02:00
|
|
|
sizeof(double),
|
2018-06-24 13:02:04 +02:00
|
|
|
&memtype_heap);
|
2018-06-27 17:01:47 +02:00
|
|
|
if(ret)
|
|
|
|
{
|
2018-06-24 13:02:04 +02:00
|
|
|
error("Failed to init send memory of node %s: %s",
|
|
|
|
node_name(n), gai_strerror(ret));
|
|
|
|
}
|
|
|
|
|
|
|
|
//ToDo: initialize r_addr_key struct if mode is RDMA
|
|
|
|
|
|
|
|
// Register memory for IB Device. Not necessary if data is send
|
|
|
|
// exclusively inline
|
|
|
|
ib->mem.mr_send = ibv_reg_mr(
|
|
|
|
ib->ctx.pd,
|
2018-06-25 18:21:44 +02:00
|
|
|
(char*)&ib->mem.p_send+ib->mem.p_send.buffer_off,
|
|
|
|
ib->mem.p_send.len,
|
2018-06-24 13:02:04 +02:00
|
|
|
IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE);
|
|
|
|
if(!ib->mem.mr_send) {
|
|
|
|
error("Failed to register mr_send with ibv_reg_mr of node %s.",
|
|
|
|
node_name(n));
|
|
|
|
}
|
2018-06-25 18:21:44 +02:00
|
|
|
info("Allocated send memory.");
|
2018-06-27 10:37:46 +02:00
|
|
|
|
2018-06-24 13:02:04 +02:00
|
|
|
}
|
2018-06-27 10:37:46 +02:00
|
|
|
|
|
|
|
// Post Receive Work Requests to be able to receive data
|
|
|
|
// Fill complete Receive Queue during initialization
|
|
|
|
for(int i=0; i<ib->qp_init.cap.max_recv_wr; i++)
|
|
|
|
{
|
|
|
|
ret = ib_post_recv_wrs(n);
|
2018-06-27 17:01:47 +02:00
|
|
|
if(ret)
|
|
|
|
{
|
2018-06-27 10:37:46 +02:00
|
|
|
error("Failed to post initial receive Work Requests of node %s.",
|
|
|
|
node_name(n));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
info("Filled the complete Receive Queue.");
|
2018-06-23 14:53:37 +02:00
|
|
|
}
|
|
|
|
|
2018-06-27 11:21:28 +02:00
|
|
|
static int ib_addr_resolved(struct node *n)
|
2018-06-23 14:53:37 +02:00
|
|
|
{
|
|
|
|
struct infiniband *ib = (struct infiniband *) n->_vd;
|
|
|
|
int ret;
|
|
|
|
|
2018-06-23 19:05:33 +02:00
|
|
|
info("Successfully resolved address.");
|
|
|
|
|
2018-06-23 14:53:37 +02:00
|
|
|
// Build all components from IB Verbs
|
2018-06-27 11:21:28 +02:00
|
|
|
ib_build_ibv(n);
|
2018-06-23 14:53:37 +02:00
|
|
|
|
|
|
|
// Resolve address
|
2018-06-27 17:01:47 +02:00
|
|
|
ret = rdma_resolve_route(ib->ctx.id, ib->conn.timeout);
|
2018-06-23 14:53:37 +02:00
|
|
|
if(ret)
|
|
|
|
error("Failed to resolve route in node %s.", node_name(n));
|
|
|
|
|
|
|
|
//ToDo: create check if data can be send inline
|
|
|
|
|
2018-06-22 13:02:41 +02:00
|
|
|
return 0;
|
2018-06-21 11:56:28 +02:00
|
|
|
}
|
|
|
|
|
2018-06-27 11:21:28 +02:00
|
|
|
static int ib_route_resolved(struct node *n)
|
2018-06-21 11:56:28 +02:00
|
|
|
{
|
2018-06-27 11:21:28 +02:00
|
|
|
struct infiniband *ib = (struct infiniband *) n->_vd;
|
2018-06-23 14:53:37 +02:00
|
|
|
int ret;
|
|
|
|
|
2018-06-23 19:05:33 +02:00
|
|
|
info("Successfully resolved route.");
|
2018-06-23 14:53:37 +02:00
|
|
|
|
|
|
|
//ToDo: Post receive WRs
|
|
|
|
|
|
|
|
struct rdma_conn_param cm_params;
|
|
|
|
memset(&cm_params, 0, sizeof(cm_params));
|
|
|
|
|
|
|
|
// Send connection request
|
2018-06-27 17:01:47 +02:00
|
|
|
ret = rdma_connect(ib->ctx.id, &cm_params);
|
2018-06-23 14:53:37 +02:00
|
|
|
if(ret)
|
|
|
|
error("Failed to connect in node %s.", node_name(n));
|
|
|
|
|
2018-06-23 19:05:33 +02:00
|
|
|
info("Called rdma_connect.");
|
2018-06-23 14:53:37 +02:00
|
|
|
|
2018-06-22 13:02:41 +02:00
|
|
|
return 0;
|
2018-06-21 11:56:28 +02:00
|
|
|
}
|
|
|
|
|
2018-06-23 14:53:37 +02:00
|
|
|
static int ib_connect_request(struct node *n, struct rdma_cm_id *id)
|
2018-06-21 11:56:28 +02:00
|
|
|
{
|
2018-06-27 10:37:46 +02:00
|
|
|
struct infiniband *ib = (struct infiniband *) n->_vd;
|
2018-06-23 14:53:37 +02:00
|
|
|
int ret;
|
|
|
|
info("Received a connection request!");
|
|
|
|
|
2018-06-27 17:01:47 +02:00
|
|
|
ib->ctx.id = id;
|
2018-06-27 11:21:28 +02:00
|
|
|
ib_build_ibv(n);
|
2018-06-23 14:53:37 +02:00
|
|
|
|
|
|
|
struct rdma_conn_param cm_params;
|
|
|
|
memset(&cm_params, 0, sizeof(cm_params));
|
|
|
|
|
|
|
|
// Accept connection request
|
2018-06-27 17:01:47 +02:00
|
|
|
ret = rdma_accept(ib->ctx.id, &cm_params);
|
2018-06-23 14:53:37 +02:00
|
|
|
if(ret)
|
|
|
|
error("Failed to connect in node %s.", node_name(n));
|
|
|
|
|
|
|
|
info("Successfully accepted connection request.");
|
|
|
|
|
2018-06-22 13:02:41 +02:00
|
|
|
return 0;
|
2018-06-21 11:56:28 +02:00
|
|
|
}
|
|
|
|
|
2018-06-23 14:53:37 +02:00
|
|
|
static int ib_event(struct node *n, struct rdma_cm_event *event)
|
2018-06-21 11:56:28 +02:00
|
|
|
{
|
2018-06-22 13:02:41 +02:00
|
|
|
int ret = 0;
|
2018-06-21 11:56:28 +02:00
|
|
|
|
2018-06-22 13:02:41 +02:00
|
|
|
switch(event->event)
|
|
|
|
{
|
|
|
|
case RDMA_CM_EVENT_ADDR_RESOLVED:
|
2018-06-27 11:21:28 +02:00
|
|
|
ret = ib_addr_resolved(n);
|
2018-06-22 13:02:41 +02:00
|
|
|
break;
|
|
|
|
case RDMA_CM_EVENT_ADDR_ERROR:
|
|
|
|
error("Address resolution (rdma_resolve_addr) failed!");
|
|
|
|
case RDMA_CM_EVENT_ROUTE_RESOLVED:
|
2018-06-27 11:21:28 +02:00
|
|
|
ret = ib_route_resolved(n);
|
2018-06-22 13:02:41 +02:00
|
|
|
break;
|
|
|
|
case RDMA_CM_EVENT_ROUTE_ERROR:
|
|
|
|
error("Route resolution (rdma_resovle_route) failed!");
|
|
|
|
case RDMA_CM_EVENT_CONNECT_REQUEST:
|
2018-06-23 14:53:37 +02:00
|
|
|
ret = ib_connect_request(n, event->id);
|
2018-06-22 13:02:41 +02:00
|
|
|
break;
|
|
|
|
case RDMA_CM_EVENT_CONNECT_ERROR:
|
|
|
|
error("An error has occurred trying to establish a connection!");
|
|
|
|
case RDMA_CM_EVENT_REJECTED:
|
|
|
|
error("Connection request or response was rejected by the remote end point!");
|
|
|
|
case RDMA_CM_EVENT_ESTABLISHED:
|
2018-06-23 19:05:33 +02:00
|
|
|
info("Connection established!");
|
2018-06-22 13:02:41 +02:00
|
|
|
ret = 1;
|
|
|
|
break;
|
2018-06-28 12:46:16 +02:00
|
|
|
case RDMA_CM_EVENT_DISCONNECTED:
|
|
|
|
ret = ib_cleanup(n);
|
|
|
|
break;
|
2018-06-22 13:02:41 +02:00
|
|
|
default:
|
|
|
|
error("Unknown event occurred: %u",
|
|
|
|
event->event);
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
2018-06-21 11:56:28 +02:00
|
|
|
}
|
|
|
|
|
2018-06-23 14:53:37 +02:00
|
|
|
int ib_reverse(struct node *n)
|
2018-06-21 11:56:28 +02:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-06-23 14:53:37 +02:00
|
|
|
int ib_parse(struct node *n, json_t *cfg)
|
2018-06-21 11:56:28 +02:00
|
|
|
{
|
2018-06-22 13:02:41 +02:00
|
|
|
struct infiniband *ib = (struct infiniband *) n->_vd;
|
|
|
|
|
|
|
|
int ret;
|
|
|
|
const char *local = NULL;
|
|
|
|
const char *remote = NULL;
|
2018-06-23 14:53:37 +02:00
|
|
|
const char *port_space = "RDMA_PC_TCP";
|
|
|
|
const char *poll_mode = "BUSY";
|
|
|
|
const char *qp_type = "IBV_QPT_RC";
|
|
|
|
int timeout = 1000;
|
|
|
|
int cq_size = 10;
|
|
|
|
int max_send_wr = 100;
|
|
|
|
int max_recv_wr = 100;
|
2018-06-22 13:02:41 +02:00
|
|
|
|
|
|
|
json_error_t err;
|
2018-06-23 14:53:37 +02:00
|
|
|
ret = json_unpack_ex(cfg, &err, 0, "{ s?: s, s?: s, s?: s, s?: i, \
|
|
|
|
s?: s, s?: i, s?: s, s?: i, s?: i}",
|
2018-06-22 13:02:41 +02:00
|
|
|
"remote", &remote,
|
|
|
|
"local", &local,
|
|
|
|
"rdma_port_space", &port_space,
|
2018-06-23 14:53:37 +02:00
|
|
|
"resolution_timeout", &timeout,
|
|
|
|
"poll_mode", &poll_mode,
|
|
|
|
"cq_size", &cq_size,
|
|
|
|
"qp_type", &qp_type,
|
|
|
|
"max_send_wr", &max_send_wr,
|
|
|
|
"max_recv_wr", &max_recv_wr
|
2018-06-22 13:02:41 +02:00
|
|
|
);
|
|
|
|
if(ret)
|
|
|
|
jerror(&err, "Failed to parse configuration of node %s", node_name(n));
|
|
|
|
|
|
|
|
// Translate IP:PORT to a struct addrinfo
|
2018-06-24 13:02:04 +02:00
|
|
|
//ToDo: Fix fixed port
|
2018-06-23 19:05:33 +02:00
|
|
|
ret = getaddrinfo(local, (char *)"13337", NULL, &ib->conn.src_addr);
|
2018-06-27 17:01:47 +02:00
|
|
|
if(ret)
|
|
|
|
{
|
2018-06-22 13:02:41 +02:00
|
|
|
error("Failed to resolve local address '%s' of node %s: %s",
|
|
|
|
local, node_name(n), gai_strerror(ret));
|
|
|
|
}
|
|
|
|
|
2018-06-23 14:53:37 +02:00
|
|
|
// Translate port space
|
2018-06-22 13:02:41 +02:00
|
|
|
if(strcmp(port_space, "RDMA_PS_IPOIB") == 0) ib->conn.port_space = RDMA_PS_IPOIB;
|
|
|
|
else if(strcmp(port_space, "RDMA_PS_TCP") == 0) ib->conn.port_space = RDMA_PS_TCP;
|
|
|
|
else if(strcmp(port_space, "RDMA_PS_UDP") == 0) ib->conn.port_space = RDMA_PS_UDP;
|
|
|
|
else if(strcmp(port_space, "RDMA_PS_IB") == 0) ib->conn.port_space = RDMA_PS_IB;
|
|
|
|
else {
|
|
|
|
error("Failed to translate rdma_port_space in node %s. %s is not a valid \
|
|
|
|
port space supported by rdma_cma.h!", node_name(n), port_space);
|
|
|
|
}
|
2018-06-23 19:05:33 +02:00
|
|
|
|
|
|
|
// Set timeout
|
|
|
|
ib->conn.timeout = timeout;
|
2018-06-23 14:53:37 +02:00
|
|
|
|
|
|
|
// Translate poll mode
|
2018-06-27 17:01:47 +02:00
|
|
|
if(strcmp(poll_mode, "EVENT") == 0)
|
|
|
|
{
|
|
|
|
ib->poll.poll_mode = EVENT;
|
|
|
|
ib->poll.poll_func = ib_event_thread;
|
|
|
|
|
|
|
|
}
|
|
|
|
else if(strcmp(poll_mode, "BUSY") == 0)
|
|
|
|
{
|
|
|
|
ib->poll.poll_mode = BUSY;
|
|
|
|
ib->poll.poll_func = ib_busy_poll_thread;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2018-06-23 14:53:37 +02:00
|
|
|
error("Failed to translate poll_mode in node %s. %s is not a valid \
|
|
|
|
poll mode!", node_name(n), poll_mode);
|
|
|
|
}
|
2018-06-22 13:02:41 +02:00
|
|
|
|
2018-06-23 14:53:37 +02:00
|
|
|
// Set completion queue size
|
2018-06-23 19:05:33 +02:00
|
|
|
ib->cq_size = cq_size;
|
2018-06-23 14:53:37 +02:00
|
|
|
|
|
|
|
// Translate QP type
|
2018-06-23 19:05:33 +02:00
|
|
|
if(strcmp(qp_type, "IBV_QPT_RC") == 0) ib->qp_init.qp_type = IBV_QPT_RC;
|
|
|
|
else if(strcmp(qp_type, "IBV_QPT_UC") == 0) ib->qp_init.qp_type = IBV_QPT_UC;
|
|
|
|
else if(strcmp(qp_type, "IBV_QPT_UD") == 0) ib->qp_init.qp_type = IBV_QPT_UD;
|
2018-06-23 14:53:37 +02:00
|
|
|
else {
|
|
|
|
error("Failed to translate qp_type in node %s. %s is not a valid \
|
|
|
|
qp_type!", node_name(n), qp_type);
|
|
|
|
}
|
|
|
|
|
2018-06-23 19:05:33 +02:00
|
|
|
// Set max. send and receive Work Requests
|
2018-06-28 12:46:16 +02:00
|
|
|
//ToDo: Set hint that max_*_wr can only be a value 1<<<X>
|
2018-06-23 19:05:33 +02:00
|
|
|
ib->qp_init.cap.max_send_wr = max_send_wr;
|
|
|
|
ib->qp_init.cap.max_recv_wr = max_recv_wr;
|
|
|
|
|
|
|
|
// Set remaining QP attributes
|
|
|
|
ib->qp_init.cap.max_send_sge = 1;
|
|
|
|
ib->qp_init.cap.max_recv_sge = 1;
|
|
|
|
|
2018-06-22 13:02:41 +02:00
|
|
|
//Check if node is a source and connect to target
|
|
|
|
if(remote)
|
|
|
|
{
|
|
|
|
ib->is_source = 1;
|
|
|
|
|
|
|
|
// Translate address info
|
2018-06-24 13:02:04 +02:00
|
|
|
//ToDo: Fix fixed port
|
2018-06-23 19:05:33 +02:00
|
|
|
ret = getaddrinfo(remote, (char *)"13337", NULL, &ib->conn.dst_addr);
|
2018-06-27 17:01:47 +02:00
|
|
|
if(ret)
|
|
|
|
{
|
2018-06-22 13:02:41 +02:00
|
|
|
error("Failed to resolve remote address '%s' of node %s: %s",
|
|
|
|
remote, node_name(n), gai_strerror(ret));
|
|
|
|
}
|
2018-06-27 17:01:47 +02:00
|
|
|
|
|
|
|
// Set correct Work Completion function
|
|
|
|
ib->poll.on_compl = ib_completion_source;
|
2018-06-22 13:02:41 +02:00
|
|
|
}
|
|
|
|
else
|
2018-06-27 17:01:47 +02:00
|
|
|
{
|
2018-06-22 13:02:41 +02:00
|
|
|
ib->is_source = 0;
|
2018-06-27 17:01:47 +02:00
|
|
|
|
|
|
|
// Set correct Work Completion function
|
|
|
|
ib->poll.on_compl = ib_completion_target;
|
|
|
|
}
|
2018-06-22 13:02:41 +02:00
|
|
|
|
2018-06-21 11:56:28 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-06-23 14:53:37 +02:00
|
|
|
char * ib_print(struct node *n)
|
2018-06-21 11:56:28 +02:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-06-23 14:53:37 +02:00
|
|
|
int ib_destroy(struct node *n)
|
2018-06-21 11:56:28 +02:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-06-28 12:46:16 +02:00
|
|
|
void * ib_stop_thread(void *n)
|
|
|
|
{
|
|
|
|
struct node *node = (struct node *)n;
|
|
|
|
struct infiniband *ib = (struct infiniband *)((struct node *)n)->_vd;
|
|
|
|
struct rdma_cm_event *event;
|
|
|
|
while(rdma_get_cm_event(ib->ctx.ec, &event) == 0)
|
|
|
|
{
|
|
|
|
if(event->event == RDMA_CM_EVENT_DISCONNECTED)
|
|
|
|
{
|
|
|
|
ib->conn.rdma_disconnect_called = 1;
|
|
|
|
node_stop(node);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2018-06-23 14:53:37 +02:00
|
|
|
int ib_start(struct node *n)
|
2018-06-21 11:56:28 +02:00
|
|
|
{
|
2018-06-22 13:02:41 +02:00
|
|
|
struct infiniband *ib = (struct infiniband *) n->_vd;
|
|
|
|
struct rdma_cm_event *event = NULL;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
// Create event channel
|
2018-06-27 17:01:47 +02:00
|
|
|
ib->ctx.ec = rdma_create_event_channel();
|
|
|
|
if(!ib->ctx.ec) {
|
2018-06-22 13:02:41 +02:00
|
|
|
error("Failed to create event channel in node %s!",
|
|
|
|
node_name(n));
|
|
|
|
}
|
|
|
|
|
2018-06-27 17:01:47 +02:00
|
|
|
ret = rdma_create_id(ib->ctx.ec, &ib->ctx.id, NULL, ib->conn.port_space);
|
|
|
|
if(ret)
|
|
|
|
{
|
2018-06-22 13:02:41 +02:00
|
|
|
error("Failed to create rdma_cm_id of node %s: %s",
|
|
|
|
node_name(n), gai_strerror(ret));
|
|
|
|
}
|
2018-06-23 19:05:33 +02:00
|
|
|
info("Succesfully created rdma_cm_id.");
|
2018-06-22 13:02:41 +02:00
|
|
|
|
|
|
|
// Bind rdma_cm_id to the HCA
|
2018-06-27 17:01:47 +02:00
|
|
|
ret = rdma_bind_addr(ib->ctx.id, ib->conn.src_addr->ai_addr);
|
|
|
|
if(ret)
|
|
|
|
{
|
2018-06-22 13:02:41 +02:00
|
|
|
error("Failed to bind to local device of node %s: %s",
|
|
|
|
node_name(n), gai_strerror(ret));
|
|
|
|
}
|
2018-06-23 19:05:33 +02:00
|
|
|
info("Bound rdma_cm_id to Infiniband device.");
|
2018-06-22 13:02:41 +02:00
|
|
|
|
|
|
|
if(ib->is_source)
|
|
|
|
{
|
|
|
|
// Resolve address
|
2018-06-27 17:01:47 +02:00
|
|
|
ret = rdma_resolve_addr(ib->ctx.id,
|
2018-06-23 14:53:37 +02:00
|
|
|
NULL,
|
|
|
|
ib->conn.dst_addr->ai_addr,
|
|
|
|
ib->conn.timeout);
|
2018-06-27 17:01:47 +02:00
|
|
|
if(ret)
|
|
|
|
{
|
2018-06-22 13:02:41 +02:00
|
|
|
error("Failed to resolve remote address after %ims of node %s: %s",
|
|
|
|
ib->conn.timeout, node_name(n), gai_strerror(ret));
|
|
|
|
}
|
2018-06-23 19:05:33 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2018-06-27 11:21:28 +02:00
|
|
|
// The ID will be overwritten for the target. If the event type is
|
|
|
|
// RDMA_CM_EVENT_CONNECT_REQUEST, >then this references a new id for
|
|
|
|
// that communication.
|
2018-06-27 17:01:47 +02:00
|
|
|
ib->ctx.listen_id = ib->ctx.id;
|
2018-06-27 11:21:28 +02:00
|
|
|
|
2018-06-23 19:05:33 +02:00
|
|
|
// Listen on rdma_cm_id for events
|
2018-06-27 17:01:47 +02:00
|
|
|
ret = rdma_listen(ib->ctx.listen_id, 10);
|
|
|
|
if(ret)
|
|
|
|
{
|
2018-06-23 19:05:33 +02:00
|
|
|
error("Failed to listen to rdma_cm_id on node %s", node_name(n));
|
|
|
|
}
|
2018-06-22 13:02:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Several events should occur on the event channel, to make
|
|
|
|
// sure the nodes are succesfully connected.
|
2018-06-23 19:05:33 +02:00
|
|
|
info("Starting to monitor events on rdma_cm_id.");
|
2018-06-23 14:53:37 +02:00
|
|
|
|
2018-06-27 17:01:47 +02:00
|
|
|
while(rdma_get_cm_event(ib->ctx.ec, &event) == 0)
|
2018-06-22 13:02:41 +02:00
|
|
|
{
|
|
|
|
struct rdma_cm_event event_copy;
|
|
|
|
memcpy(&event_copy, event, sizeof(*event));
|
|
|
|
|
2018-06-23 19:05:33 +02:00
|
|
|
rdma_ack_cm_event(event);
|
|
|
|
|
2018-06-23 14:53:37 +02:00
|
|
|
if(ib_event(n, &event_copy))
|
2018-06-22 13:02:41 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2018-06-28 12:46:16 +02:00
|
|
|
ret = pthread_create(&ib->conn.stop_thread, NULL, ib_stop_thread, n);
|
|
|
|
if(ret)
|
|
|
|
{
|
|
|
|
error("Failed to create thread to monitor disconnects in node %s: %s",
|
|
|
|
node_name(n), gai_strerror(ret));
|
|
|
|
}
|
|
|
|
|
2018-06-21 11:56:28 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-06-23 14:53:37 +02:00
|
|
|
int ib_stop(struct node *n)
|
2018-06-21 11:56:28 +02:00
|
|
|
{
|
2018-06-28 12:46:16 +02:00
|
|
|
struct infiniband *ib = (struct infiniband *) n->_vd;
|
|
|
|
struct rdma_cm_event *event = NULL;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
// Call RDMA disconnect function
|
|
|
|
// Will flush all outstanding WRs to the Completion Queue and
|
|
|
|
// will call RDMA_CM_EVENT_DISCONNECTED if that is done.
|
|
|
|
ret = rdma_disconnect(ib->ctx.id);
|
|
|
|
if(ret)
|
|
|
|
{
|
|
|
|
error("Error while calling rdma_disconnect in node %s: %s",
|
|
|
|
node_name(n), gai_strerror(ret));
|
|
|
|
}
|
|
|
|
info("Called rdma_disconnect.");
|
|
|
|
|
|
|
|
// If disconnected event already occured, directly call cleanup function
|
|
|
|
if(ib->conn.rdma_disconnect_called)
|
|
|
|
{
|
|
|
|
ib_cleanup(n);
|
|
|
|
}
|
|
|
|
// Else, wait for event to occur
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ib->conn.rdma_disconnect_called = 1;
|
|
|
|
rdma_get_cm_event(ib->ctx.ec, &event);
|
|
|
|
|
|
|
|
ib_event(n, event);
|
|
|
|
|
|
|
|
rdma_ack_cm_event(event);
|
|
|
|
}
|
|
|
|
|
2018-06-21 11:56:28 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-06-23 14:53:37 +02:00
|
|
|
int ib_init(struct super_node *n)
|
2018-06-21 11:56:28 +02:00
|
|
|
{
|
2018-06-22 13:02:41 +02:00
|
|
|
|
2018-06-21 11:56:28 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-06-23 14:53:37 +02:00
|
|
|
int ib_deinit()
|
2018-06-21 11:56:28 +02:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-06-23 14:53:37 +02:00
|
|
|
int ib_read(struct node *n, struct sample *smps[], unsigned cnt)
|
2018-06-21 11:56:28 +02:00
|
|
|
{
|
2018-06-25 18:21:44 +02:00
|
|
|
//Create separate thread for polling! This impelemtation is just
|
|
|
|
//for testing purposes
|
|
|
|
struct infiniband *ib = (struct infiniband *) n->_vd;
|
|
|
|
int ret;
|
2018-06-27 10:37:46 +02:00
|
|
|
struct ibv_wc wc[cnt];
|
|
|
|
union {
|
|
|
|
double f;
|
|
|
|
int64_t i;
|
|
|
|
} *data;
|
2018-06-25 18:21:44 +02:00
|
|
|
|
2018-06-27 10:37:46 +02:00
|
|
|
ret = ibv_poll_cq(ib->ctx.cq, cnt, wc);
|
2018-06-25 18:21:44 +02:00
|
|
|
|
2018-06-27 10:37:46 +02:00
|
|
|
if(ret)
|
|
|
|
{
|
|
|
|
data = malloc(ret*sizeof(double));
|
2018-06-25 18:21:44 +02:00
|
|
|
|
2018-06-27 10:37:46 +02:00
|
|
|
for(int i=0; i<ret; i++)
|
|
|
|
{
|
2018-06-28 12:46:16 +02:00
|
|
|
if(wc[i].status == IBV_WC_WR_FLUSH_ERR)
|
|
|
|
return 0;
|
2018-06-27 10:37:46 +02:00
|
|
|
|
2018-06-28 12:46:16 +02:00
|
|
|
if(wc[i].status != IBV_WC_SUCCESS)
|
|
|
|
{
|
|
|
|
warn("Work Completion status was not IBV_WC_SUCCES in node %s", node_name(n));
|
|
|
|
ret--;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
data[i].f = *(double*)(wc[i].wr_id);
|
2018-06-27 10:37:46 +02:00
|
|
|
}
|
|
|
|
smps[0]->length = ret;
|
|
|
|
smps[0]->capacity = cnt;
|
|
|
|
memcpy(smps[0]->data, data, ret*sizeof(double));
|
|
|
|
}
|
2018-06-25 18:21:44 +02:00
|
|
|
|
|
|
|
return ret;
|
2018-06-21 11:56:28 +02:00
|
|
|
}
|
|
|
|
|
2018-06-23 14:53:37 +02:00
|
|
|
int ib_write(struct node *n, struct sample *smps[], unsigned cnt)
|
2018-06-21 11:56:28 +02:00
|
|
|
{
|
2018-06-27 10:37:46 +02:00
|
|
|
/* Send pool is not used at this moment! */
|
2018-06-25 18:21:44 +02:00
|
|
|
struct infiniband *ib = (struct infiniband *) n->_vd;
|
|
|
|
int ret;
|
2018-06-27 10:37:46 +02:00
|
|
|
struct ibv_send_wr wr[smps[0]->length], *bad_wr = NULL;
|
|
|
|
struct ibv_sge sge[smps[0]->length];
|
2018-06-25 18:21:44 +02:00
|
|
|
|
|
|
|
memset(&wr, 0, sizeof(wr));
|
|
|
|
|
2018-06-27 10:37:46 +02:00
|
|
|
//ToDo: Place this into configuration and create checks if settings are valid
|
|
|
|
int send_inline = 1;
|
2018-06-25 18:21:44 +02:00
|
|
|
|
2018-06-27 10:37:46 +02:00
|
|
|
for(int i=0; i<smps[0]->length; i++)
|
2018-06-25 18:21:44 +02:00
|
|
|
{
|
2018-06-27 10:37:46 +02:00
|
|
|
// If data is send inline, it is not necessary to copy data to protected
|
|
|
|
// memory region first.
|
|
|
|
if(send_inline)
|
|
|
|
{
|
|
|
|
sge[i].addr = (uint64_t)&smps[0]->data[i].f;
|
|
|
|
sge[i].length = sizeof(double);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
//- copy value to send_region
|
|
|
|
//- give pointer to start of array
|
|
|
|
}
|
2018-06-25 18:21:44 +02:00
|
|
|
|
2018-06-27 10:37:46 +02:00
|
|
|
// Set Send Work Request
|
|
|
|
wr[i].wr_id = 0; //ToDo: set this to a useful value
|
|
|
|
wr[i].sg_list = &sge[i];
|
|
|
|
wr[i].num_sge = 1; //ToDo: Right now only smps[0] is sg_list. This can be extended
|
|
|
|
//furthermore we should break the transaction up if inline mode
|
|
|
|
//is selected
|
|
|
|
|
|
|
|
if(i == (smps[0]->length-1))
|
|
|
|
wr[i].next = NULL;
|
|
|
|
else
|
|
|
|
wr[i].next = &wr[i+1];
|
|
|
|
wr[i].send_flags = IBV_SEND_SIGNALED | (send_inline<<3);
|
|
|
|
wr[i].imm_data = htonl(0); //ToDo: set this to a useful value
|
|
|
|
wr[i].opcode = IBV_WR_SEND_WITH_IMM;
|
2018-06-23 19:05:33 +02:00
|
|
|
|
|
|
|
}
|
2018-06-25 18:21:44 +02:00
|
|
|
|
2018-06-27 10:37:46 +02:00
|
|
|
//Send linked list of Work Requests
|
2018-06-27 17:01:47 +02:00
|
|
|
ret = ibv_post_send(ib->ctx.id->qp, wr, &bad_wr);
|
2018-06-25 18:21:44 +02:00
|
|
|
if(ret)
|
|
|
|
{
|
|
|
|
error("Failed to send message in node %s: %s",
|
|
|
|
node_name(n), gai_strerror(ret));
|
|
|
|
|
|
|
|
return -ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return cnt;
|
2018-06-21 11:56:28 +02:00
|
|
|
}
|
|
|
|
|
2018-06-23 14:53:37 +02:00
|
|
|
int ib_fd(struct node *n)
|
2018-06-21 11:56:28 +02:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct plugin p = {
|
2018-06-27 17:01:47 +02:00
|
|
|
.name = "infiniband",
|
2018-06-23 14:53:37 +02:00
|
|
|
.description = "Infiniband",
|
2018-06-27 17:01:47 +02:00
|
|
|
.type = PLUGIN_TYPE_NODE,
|
|
|
|
.node = {
|
|
|
|
.vectorize = 0,
|
|
|
|
.size = sizeof(struct infiniband),
|
|
|
|
.reverse = ib_reverse,
|
|
|
|
.parse = ib_parse,
|
|
|
|
.print = ib_print,
|
|
|
|
.start = ib_start,
|
|
|
|
.destroy = ib_destroy,
|
|
|
|
.stop = ib_stop,
|
|
|
|
.init = ib_init,
|
|
|
|
.deinit = ib_deinit,
|
|
|
|
.read = ib_read,
|
|
|
|
.write = ib_write,
|
|
|
|
.fd = ib_fd
|
2018-06-21 11:56:28 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
REGISTER_PLUGIN(&p)
|
|
|
|
LIST_INIT_STATIC(&p.node.instances)
|