2018-06-21 11:56:28 +02:00
/** Node type: infiniband
*
* @ author Dennis Potter < dennis @ dennispotter . eu >
* @ copyright 2018 , Institute for Automation of Complex Power Systems , EONERC
* @ license GNU General Public License ( version 3 )
*
* VILLASnode
*
* This program is free software : you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation , either version 3 of the License , or
* any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program . If not , see < http : //www.gnu.org/licenses/>.
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# include <string.h>
2018-06-28 17:24:28 +02:00
# include <math.h>
2018-06-21 11:56:28 +02:00
# include <villas/nodes/infiniband.h>
# include <villas/plugin.h>
# include <villas/utils.h>
# include <villas/format_type.h>
2018-06-24 13:02:04 +02:00
# include <villas/memory.h>
2018-07-04 15:26:22 +02:00
# include <villas/memory/ib.h>
2018-06-24 13:02:04 +02:00
2018-07-16 10:54:15 +02:00
static int ib_disconnect ( struct node * n )
2018-06-28 12:46:16 +02:00
{
2018-07-03 17:39:06 +02:00
struct infiniband * ib = ( struct infiniband * ) n - > _vd ;
2018-07-05 13:57:25 +02:00
debug ( LOG_IB | 1 , " Starting to clean up " ) ;
2018-06-30 18:20:30 +02:00
2018-07-07 15:34:07 +02:00
rdma_disconnect ( ib - > ctx . id ) ;
2018-07-03 17:39:06 +02:00
// Destroy QP
rdma_destroy_qp ( ib - > ctx . id ) ;
2018-07-05 13:57:25 +02:00
debug ( LOG_IB | 3 , " Destroyed QP " ) ;
2018-06-28 12:46:16 +02:00
2018-07-16 10:54:15 +02:00
// Set available receive WRs and stack top to zero
2018-07-07 14:36:23 +02:00
ib - > conn . available_recv_wrs = 0 ;
2018-07-14 16:46:23 +02:00
ib - > conn . send_wc_stack . top = 0 ;
2018-07-08 15:00:47 +02:00
return ib - > stopThreads ;
2018-06-28 12:46:16 +02:00
}
2018-06-27 17:01:47 +02:00
2018-07-08 15:32:28 +02:00
static void ib_build_ibv ( struct node * n )
2018-06-23 14:53:37 +02:00
{
2018-07-03 18:01:49 +02:00
struct infiniband * ib = ( struct infiniband * ) n - > _vd ;
2018-07-08 15:32:28 +02:00
int ret ;
2018-07-05 13:57:25 +02:00
2018-07-08 15:32:28 +02:00
debug ( LOG_IB | 1 , " Starting to build IBV components " ) ;
2018-07-03 18:01:49 +02:00
2018-07-08 15:32:28 +02:00
// Create completion queues. No completion channel!)
2018-07-15 13:51:18 +02:00
ib - > ctx . recv_cq = ibv_create_cq ( ib - > ctx . id - > verbs , ib - > recv_cq_size , NULL , NULL , 0 ) ;
2018-07-04 19:04:08 +02:00
if ( ! ib - > ctx . recv_cq )
2018-07-05 13:57:25 +02:00
error ( " Could not create receive completion queue in node %s " , node_name ( n ) ) ;
debug ( LOG_IB | 3 , " Created receive Completion Queue " ) ;
2018-07-03 18:01:49 +02:00
2018-07-15 13:51:18 +02:00
ib - > ctx . send_cq = ibv_create_cq ( ib - > ctx . id - > verbs , ib - > send_cq_size , NULL , NULL , 0 ) ;
2018-07-04 19:04:08 +02:00
if ( ! ib - > ctx . send_cq )
2018-07-05 13:57:25 +02:00
error ( " Could not create send completion queue in node %s " , node_name ( n ) ) ;
debug ( LOG_IB | 3 , " Created send Completion Queue " ) ;
2018-07-03 18:01:49 +02:00
// Prepare remaining Queue Pair (QP) attributes
ib - > qp_init . send_cq = ib - > ctx . send_cq ;
ib - > qp_init . recv_cq = ib - > ctx . recv_cq ;
// Create the actual QP
ret = rdma_create_qp ( ib - > ctx . id , ib - > ctx . pd , & ib - > qp_init ) ;
2018-07-04 19:04:08 +02:00
if ( ret )
2018-07-05 13:57:25 +02:00
error ( " Failed to create Queue Pair in node %s " , node_name ( n ) ) ;
2018-07-03 18:01:49 +02:00
2018-07-05 13:57:25 +02:00
debug ( LOG_IB | 3 , " Created Queue Pair with %i receive and %i send elements " ,
2018-07-03 18:01:49 +02:00
ib - > qp_init . cap . max_recv_wr , ib - > qp_init . cap . max_send_wr ) ;
2018-07-15 13:51:18 +02:00
if ( ib - > conn . send_inline )
2018-07-14 15:20:24 +02:00
info ( " Maximum inline size is set to %i byte " , ib - > qp_init . cap . max_inline_data ) ;
2018-06-23 14:53:37 +02:00
}
2018-06-27 11:21:28 +02:00
static int ib_addr_resolved ( struct node * n )
2018-06-23 14:53:37 +02:00
{
2018-07-03 17:39:06 +02:00
struct infiniband * ib = ( struct infiniband * ) n - > _vd ;
int ret ;
2018-06-23 14:53:37 +02:00
2018-07-05 13:57:25 +02:00
debug ( LOG_IB | 1 , " Successfully resolved address " ) ;
2018-06-23 19:05:33 +02:00
2018-07-03 17:39:06 +02:00
// Build all components from IB Verbs
ib_build_ibv ( n ) ;
2018-06-23 14:53:37 +02:00
2018-07-03 17:39:06 +02:00
// Resolve address
ret = rdma_resolve_route ( ib - > ctx . id , ib - > conn . timeout ) ;
2018-07-04 19:04:08 +02:00
if ( ret )
2018-07-05 13:57:25 +02:00
error ( " Failed to resolve route in node %s " , node_name ( n ) ) ;
2018-06-23 14:53:37 +02:00
2018-07-03 17:39:06 +02:00
return 0 ;
2018-06-21 11:56:28 +02:00
}
2018-06-27 11:21:28 +02:00
static int ib_route_resolved ( struct node * n )
2018-06-21 11:56:28 +02:00
{
2018-07-03 17:39:06 +02:00
struct infiniband * ib = ( struct infiniband * ) n - > _vd ;
int ret ;
2018-06-23 14:53:37 +02:00
2018-07-03 17:39:06 +02:00
struct rdma_conn_param cm_params ;
memset ( & cm_params , 0 , sizeof ( cm_params ) ) ;
2018-06-23 14:53:37 +02:00
2018-07-03 17:39:06 +02:00
// Send connection request
ret = rdma_connect ( ib - > ctx . id , & cm_params ) ;
2018-07-04 19:04:08 +02:00
if ( ret )
2018-07-05 13:57:25 +02:00
error ( " Failed to connect in node %s " , node_name ( n ) ) ;
2018-06-23 14:53:37 +02:00
2018-07-05 13:57:25 +02:00
debug ( LOG_IB | 1 , " Called rdma_connect " ) ;
2018-06-30 18:20:30 +02:00
2018-07-03 17:39:06 +02:00
return 0 ;
2018-06-21 11:56:28 +02:00
}
2018-06-23 14:53:37 +02:00
static int ib_connect_request ( struct node * n , struct rdma_cm_id * id )
2018-06-21 11:56:28 +02:00
{
2018-07-03 17:39:06 +02:00
struct infiniband * ib = ( struct infiniband * ) n - > _vd ;
int ret ;
2018-07-05 13:57:25 +02:00
debug ( LOG_IB | 1 , " Received a connection request! " ) ;
2018-06-23 14:53:37 +02:00
2018-07-03 17:39:06 +02:00
ib - > ctx . id = id ;
ib_build_ibv ( n ) ;
2018-06-30 18:20:30 +02:00
2018-07-03 17:39:06 +02:00
struct rdma_conn_param cm_params ;
memset ( & cm_params , 0 , sizeof ( cm_params ) ) ;
2018-06-23 14:53:37 +02:00
2018-07-03 17:39:06 +02:00
// Accept connection request
ret = rdma_accept ( ib - > ctx . id , & cm_params ) ;
2018-07-04 19:04:08 +02:00
if ( ret )
2018-07-05 13:57:25 +02:00
error ( " Failed to connect in node %s " , node_name ( n ) ) ;
2018-06-23 14:53:37 +02:00
2018-07-05 13:57:25 +02:00
info ( " Successfully accepted connection request in node %s " , node_name ( n ) ) ;
2018-06-30 18:20:30 +02:00
2018-07-03 17:39:06 +02:00
return 0 ;
2018-06-21 11:56:28 +02:00
}
2018-06-23 14:53:37 +02:00
int ib_reverse ( struct node * n )
2018-06-21 11:56:28 +02:00
{
2018-07-16 10:34:50 +02:00
struct infiniband * ib = ( struct infiniband * ) n - > _vd ;
SWAP ( ib - > conn . src_addr , ib - > conn . dst_addr ) ;
2018-07-03 18:01:49 +02:00
return 0 ;
2018-06-21 11:56:28 +02:00
}
2018-06-23 14:53:37 +02:00
int ib_parse ( struct node * n , json_t * cfg )
2018-06-21 11:56:28 +02:00
{
2018-07-03 18:01:49 +02:00
struct infiniband * ib = ( struct infiniband * ) n - > _vd ;
int ret ;
char * local = NULL ;
char * remote = NULL ;
2018-07-15 13:51:18 +02:00
const char * port_space = " RDMA_PS_TCP " ;
2018-07-03 18:01:49 +02:00
const char * poll_mode = " BUSY " ;
const char * qp_type = " IBV_QPT_RC " ;
int timeout = 1000 ;
2018-07-15 13:51:18 +02:00
int recv_cq_size = 128 ;
int send_cq_size = 128 ;
2018-07-03 18:01:49 +02:00
int max_send_wr = 128 ;
int max_recv_wr = 128 ;
2018-07-13 13:50:30 +02:00
int max_inline_data = 0 ;
2018-07-15 13:51:18 +02:00
int send_inline = 1 ;
int vectorize_in = 1 ;
int vectorize_out = 1 ;
int buffer_subtraction = 2 ;
2018-07-03 18:01:49 +02:00
2018-07-16 10:54:15 +02:00
// Parse JSON files and copy to local variables
2018-07-15 13:51:18 +02:00
json_t * json_in = NULL ;
json_t * json_out = NULL ;
2018-07-03 18:01:49 +02:00
json_error_t err ;
2018-07-15 13:51:18 +02:00
ret = json_unpack_ex ( cfg , & err , 0 , " {s?: o, s?: o, s?: s, s?: s} " ,
" in " , & json_in ,
" out " , & json_out ,
2018-07-03 18:01:49 +02:00
" qp_type " , & qp_type ,
2018-07-15 13:51:18 +02:00
" rdma_port_space " , & port_space
2018-07-03 18:01:49 +02:00
) ;
2018-07-04 19:04:08 +02:00
if ( ret )
2018-07-15 13:51:18 +02:00
jerror ( & err , " Failed to parse in/out json blocks " ) ;
if ( json_in ) {
ret = json_unpack_ex ( json_in , & err , 0 , " { s?: s, s?: s, s?: i, s?: i, s?: i, s?: i} " ,
" address " , & local ,
" poll_mode " , & poll_mode ,
" cq_size " , & recv_cq_size ,
" max_wrs " , & max_recv_wr ,
" vectorize " , & vectorize_in ,
" buffer_subtraction " , & buffer_subtraction
) ;
if ( ret )
jerror ( & err , " Failed to parse input configuration of node %s " , node_name ( n ) ) ;
}
if ( json_out ) {
ret = json_unpack_ex ( json_out , & err , 0 , " { s?: s, s?: i, s?: i, s?: i, s?: i, s?: i, s?: i,} " ,
" address " , & remote ,
" resolution_timeout " , & timeout ,
" cq_size " , & send_cq_size ,
" max_wrs " , & max_send_wr ,
" max_inline_data " , & max_inline_data ,
" send_inline " , & send_inline ,
" vectorize " , & vectorize_out
) ;
if ( ret )
jerror ( & err , " Failed to parse output configuration of node %s " , node_name ( n ) ) ;
ib - > is_source = 1 ;
debug ( LOG_IB | 3 , " Node %s is up as source and target " , node_name ( n ) ) ;
}
else {
ib - > is_source = 0 ;
debug ( LOG_IB | 3 , " Node %s is up as target " , node_name ( n ) ) ;
}
// Set vectorize mode. Do not print, since framework will print this information
n - > in . vectorize = vectorize_in ;
n - > out . vectorize = vectorize_out ;
// Set buffer subtraction
ib - > conn . buffer_subtraction = buffer_subtraction ;
debug ( LOG_IB | 4 , " Set buffer subtraction to %i in node %s " , buffer_subtraction , node_name ( n ) ) ;
2018-07-03 18:01:49 +02:00
// Translate IP:PORT to a struct addrinfo
char * ip_adr = strtok ( local , " : " ) ;
char * port = strtok ( NULL , " : " ) ;
2018-07-04 19:04:08 +02:00
2018-07-03 18:01:49 +02:00
ret = getaddrinfo ( ip_adr , port , NULL , & ib - > conn . src_addr ) ;
2018-07-04 19:04:08 +02:00
if ( ret )
2018-07-03 18:01:49 +02:00
error ( " Failed to resolve local address '%s' of node %s: %s " ,
local , node_name ( n ) , gai_strerror ( ret ) ) ;
2018-07-05 13:57:25 +02:00
debug ( LOG_IB | 4 , " Translated %s:%s to a struct addrinfo in node %s " , ip_adr , port , node_name ( n ) ) ;
2018-07-03 18:01:49 +02:00
// Translate port space
2018-07-04 19:04:08 +02:00
if ( strcmp ( port_space , " RDMA_PS_IPOIB " ) = = 0 ) ib - > conn . port_space = RDMA_PS_IPOIB ;
else if ( strcmp ( port_space , " RDMA_PS_TCP " ) = = 0 ) ib - > conn . port_space = RDMA_PS_TCP ;
else if ( strcmp ( port_space , " RDMA_PS_UDP " ) = = 0 ) ib - > conn . port_space = RDMA_PS_UDP ;
else if ( strcmp ( port_space , " RDMA_PS_IB " ) = = 0 ) ib - > conn . port_space = RDMA_PS_IB ;
else
2018-07-03 18:01:49 +02:00
error ( " Failed to translate rdma_port_space in node %s. %s is not a valid \
port space supported by rdma_cma . h ! " , node_name(n), port_space);
2018-07-05 13:57:25 +02:00
debug ( LOG_IB | 4 , " Translated %s to enum rdma_port_space in node %s " , port_space , node_name ( n ) ) ;
2018-07-03 18:01:49 +02:00
// Set timeout
ib - > conn . timeout = timeout ;
2018-07-05 13:57:25 +02:00
debug ( LOG_IB | 4 , " Set timeout to %i in node %s " , timeout , node_name ( n ) ) ;
2018-07-03 18:01:49 +02:00
// Translate poll mode
2018-07-08 15:00:47 +02:00
if ( strcmp ( poll_mode , " EVENT " ) = = 0 )
2018-07-16 10:54:15 +02:00
ib - > poll_mode = EVENT ;
2018-07-08 15:00:47 +02:00
else if ( strcmp ( poll_mode , " BUSY " ) = = 0 )
2018-07-16 10:54:15 +02:00
ib - > poll_mode = BUSY ;
2018-07-03 18:01:49 +02:00
else
error ( " Failed to translate poll_mode in node %s. %s is not a valid \
poll mode ! " , node_name(n), poll_mode);
2018-07-05 13:57:25 +02:00
debug ( LOG_IB | 4 , " Set poll mode to %s in node %s " , poll_mode , node_name ( n ) ) ;
2018-07-03 18:01:49 +02:00
// Set completion queue size
2018-07-15 13:51:18 +02:00
ib - > recv_cq_size = recv_cq_size ;
ib - > send_cq_size = send_cq_size ;
2018-07-03 18:01:49 +02:00
2018-07-15 13:51:18 +02:00
debug ( LOG_IB | 4 , " Set Completion Queue size to %i & %i (in & out) in node %s " ,
recv_cq_size , send_cq_size , node_name ( n ) ) ;
2018-07-05 13:57:25 +02:00
2018-07-03 18:01:49 +02:00
// Translate QP type
2018-07-04 19:04:08 +02:00
if ( strcmp ( qp_type , " IBV_QPT_RC " ) = = 0 ) ib - > qp_init . qp_type = IBV_QPT_RC ;
else if ( strcmp ( qp_type , " IBV_QPT_UC " ) = = 0 ) ib - > qp_init . qp_type = IBV_QPT_UC ;
else if ( strcmp ( qp_type , " IBV_QPT_UD " ) = = 0 ) ib - > qp_init . qp_type = IBV_QPT_UD ;
else
error ( " Failed to translate qp_type in node %s. %s is not a valid \
qp_type ! " , node_name(n), qp_type);
2018-07-05 13:57:25 +02:00
debug ( LOG_IB | 4 , " Set Queue Pair type to %s in node %s " , qp_type , node_name ( n ) ) ;
2018-07-03 18:01:49 +02:00
2018-07-13 13:50:30 +02:00
// Translate inline mode
2018-07-15 13:51:18 +02:00
ib - > conn . send_inline = send_inline ;
2018-07-13 13:50:30 +02:00
2018-07-15 13:51:18 +02:00
debug ( LOG_IB | 4 , " Set send_inline to %i in node %s " , send_inline , node_name ( n ) ) ;
2018-07-13 13:50:30 +02:00
2018-07-03 18:01:49 +02:00
// Set max. send and receive Work Requests
ib - > qp_init . cap . max_send_wr = max_send_wr ;
ib - > qp_init . cap . max_recv_wr = max_recv_wr ;
2018-07-05 13:57:25 +02:00
debug ( LOG_IB | 4 , " Set max_send_wr and max_recv_wr in node %s to %i and %i, respectively " ,
node_name ( n ) , max_send_wr , max_recv_wr ) ;
2018-07-03 18:01:49 +02:00
// Set available receive Work Requests to 0
ib - > conn . available_recv_wrs = 0 ;
// Set remaining QP attributes
ib - > qp_init . cap . max_send_sge = 1 ;
ib - > qp_init . cap . max_recv_sge = 1 ;
2018-07-13 13:50:30 +02:00
// Set number of bytes to be send inline
ib - > qp_init . cap . max_inline_data = max_inline_data ;
2018-07-16 10:54:15 +02:00
// If node will send data, set remote address
2018-07-15 13:51:18 +02:00
if ( ib - > is_source ) {
2018-07-03 18:01:49 +02:00
// Translate address info
char * ip_adr = strtok ( remote , " : " ) ;
char * port = strtok ( NULL , " : " ) ;
2018-07-04 19:04:08 +02:00
2018-07-03 18:01:49 +02:00
ret = getaddrinfo ( ip_adr , port , NULL , & ib - > conn . dst_addr ) ;
2018-07-04 19:04:08 +02:00
if ( ret )
2018-07-03 18:01:49 +02:00
error ( " Failed to resolve remote address '%s' of node %s: %s " ,
remote , node_name ( n ) , gai_strerror ( ret ) ) ;
2018-07-07 14:36:23 +02:00
debug ( LOG_IB | 4 , " Translated %s:%s to a struct addrinfo " , ip_adr , port ) ;
2018-07-03 18:01:49 +02:00
}
return 0 ;
2018-06-21 11:56:28 +02:00
}
2018-07-05 15:30:33 +02:00
int ib_check ( struct node * n )
{
struct infiniband * ib = ( struct infiniband * ) n - > _vd ;
info ( " Starting check of node %s " , node_name ( n ) ) ;
2018-07-15 13:51:18 +02:00
// Check if read substraction makes sense
if ( ib - > conn . buffer_subtraction < 2 * n - > in . vectorize )
error ( " The buffer substraction value must be bigger than 2 * in.vectorize " ) ;
if ( ib - > conn . buffer_subtraction > = ib - > qp_init . cap . max_recv_wr - n - > in . vectorize )
error ( " The buffer substraction value cannot be smaller than in.max_wrs - in.vectorize " ) ;
2018-07-05 15:30:33 +02:00
// Check if the set value is a power of 2, and warn the user if this is not the case
int max_send_pow = ( int ) pow ( 2 , ceil ( log2 ( ib - > qp_init . cap . max_send_wr ) ) ) ;
int max_recv_pow = ( int ) pow ( 2 , ceil ( log2 ( ib - > qp_init . cap . max_recv_wr ) ) ) ;
2018-07-13 12:21:59 +02:00
if ( ib - > qp_init . cap . max_send_wr ! = max_send_pow ) {
2018-07-07 14:36:23 +02:00
warn ( " Max nr. of send WRs (%i) is not a power of 2! It will be changed to a power of 2: %i " ,
2018-07-05 15:30:33 +02:00
ib - > qp_init . cap . max_send_wr , max_send_pow ) ;
2018-07-13 12:21:59 +02:00
// Change it now, because otherwise errors are possible in ib_start().
ib - > qp_init . cap . max_send_wr = max_send_pow ;
}
if ( ib - > qp_init . cap . max_recv_wr ! = max_recv_pow ) {
2018-07-07 14:36:23 +02:00
warn ( " Max nr. of recv WRs (%i) is not a power of 2! It will be changed to a power of 2: %i " ,
2018-07-05 15:30:33 +02:00
ib - > qp_init . cap . max_recv_wr , max_recv_pow ) ;
2018-07-13 12:21:59 +02:00
// Change it now, because otherwise errors are possible in ib_start().
ib - > qp_init . cap . max_recv_wr = max_recv_pow ;
}
2018-07-05 15:30:33 +02:00
// Check maximum size of max_recv_wr and max_send_wr
if ( ib - > qp_init . cap . max_send_wr > 8192 )
warn ( " Max number of send WRs (%i) is bigger than send queue! " , ib - > qp_init . cap . max_send_wr ) ;
if ( ib - > qp_init . cap . max_recv_wr > 8192 )
warn ( " Max number of receive WRs (%i) is bigger than send queue! " , ib - > qp_init . cap . max_recv_wr ) ;
2018-07-13 13:50:30 +02:00
// Warn user if he changed the default inline value
if ( ib - > qp_init . cap . max_inline_data ! = 0 )
warn ( " You changed the default value of max_inline_data. This might influence the maximum number of outstanding Work Requests in the Queue Pair and can be a reason for the Queue Pair creation to fail " ) ;
2018-07-14 15:20:24 +02:00
// Check if inline mode is set to a valid value
2018-07-15 13:51:18 +02:00
if ( ib - > conn . send_inline ! = 0 & & ib - > conn . send_inline ! = 1 )
error ( " send_inline has to be set to either 0 or 1! %i is not a valid value " , ib - > conn . send_inline ) ;
2018-07-14 15:20:24 +02:00
2018-07-05 15:30:33 +02:00
info ( " Finished check of node %s " , node_name ( n ) ) ;
return 0 ;
}
2018-06-23 14:53:37 +02:00
char * ib_print ( struct node * n )
2018-06-21 11:56:28 +02:00
{
2018-07-03 18:01:49 +02:00
return 0 ;
2018-06-21 11:56:28 +02:00
}
2018-06-23 14:53:37 +02:00
int ib_destroy ( struct node * n )
2018-06-21 11:56:28 +02:00
{
2018-07-03 18:01:49 +02:00
return 0 ;
2018-06-21 11:56:28 +02:00
}
2018-07-16 10:54:15 +02:00
static void ib_create_bind_id ( struct node * n )
2018-07-15 16:37:52 +02:00
{
struct infiniband * ib = ( struct infiniband * ) n - > _vd ;
int ret ;
2018-07-16 10:54:15 +02:00
// Create rdma_cm_id
2018-07-15 16:37:52 +02:00
ret = rdma_create_id ( ib - > ctx . ec , & ib - > ctx . id , NULL , ib - > conn . port_space ) ;
if ( ret )
error ( " Failed to create rdma_cm_id of node %s: %s " , node_name ( n ) , gai_strerror ( ret ) ) ;
debug ( LOG_IB | 3 , " Created rdma_cm_id " ) ;
// Bind rdma_cm_id to the HCA
ret = rdma_bind_addr ( ib - > ctx . id , ib - > conn . src_addr - > ai_addr ) ;
if ( ret )
error ( " Failed to bind to local device of node %s: %s " ,
node_name ( n ) , gai_strerror ( ret ) ) ;
debug ( LOG_IB | 3 , " Bound rdma_cm_id to Infiniband device " ) ;
// The ID will be overwritten for the target. If the event type is
// RDMA_CM_EVENT_CONNECT_REQUEST, >then this references a new id for
// that communication.
ib - > ctx . listen_id = ib - > ctx . id ;
}
2018-07-16 10:54:15 +02:00
static void ib_continue_as_listen ( struct node * n , struct rdma_cm_event * event )
2018-07-15 16:37:52 +02:00
{
struct infiniband * ib = ( struct infiniband * ) n - > _vd ;
int ret ;
warn ( " Trying to continue as listening node " ) ;
// Acknowledge event
rdma_ack_cm_event ( event ) ;
// Destroy ID
rdma_destroy_id ( ib - > ctx . listen_id ) ;
// Create rdma_cm_id and bind to device
ib_create_bind_id ( n ) ;
// Listen to id for events
ret = rdma_listen ( ib - > ctx . listen_id , 10 ) ;
if ( ret )
error ( " Failed to listen to rdma_cm_id on node %s " , node_name ( n ) ) ;
2018-07-16 10:54:15 +02:00
// Node is not a source (and will not send data
2018-07-15 16:37:52 +02:00
ib - > is_source = 0 ;
info ( " Node %s is set to listening mode " , node_name ( n ) ) ;
}
2018-07-14 16:46:23 +02:00
static void sigHandler ( int signo )
{
info ( " Node was already disconnected. Exiting thread with pthread_exit() " ) ;
pthread_exit ( NULL ) ;
}
2018-07-15 16:37:52 +02:00
2018-07-05 18:26:32 +02:00
void * ib_rdma_cm_event_thread ( void * n )
{
struct node * node = ( struct node * ) n ;
2018-07-07 12:49:22 +02:00
struct infiniband * ib = ( struct infiniband * ) node - > _vd ;
2018-07-05 18:26:32 +02:00
struct rdma_cm_event * event ;
2018-07-14 16:46:23 +02:00
struct sigaction sa ;
2018-07-07 12:49:22 +02:00
int ret = 0 ;
2018-07-14 16:46:23 +02:00
// Register signal handler, in case event channel blocks and we can't exit thread
sa . sa_handler = sigHandler ;
sigaction ( SIGUSR1 , & sa , NULL ) ;
2018-07-07 13:08:08 +02:00
debug ( LOG_IB | 1 , " Started rdma_cm_event thread of node %s " , node_name ( node ) ) ;
2018-07-05 18:26:32 +02:00
2018-07-07 12:49:22 +02:00
// Wait until node is completely started
2018-07-07 13:08:08 +02:00
while ( node - > state ! = STATE_STARTED ) ;
2018-07-07 12:49:22 +02:00
2018-07-07 13:08:08 +02:00
// Monitor event channel
2018-07-07 12:49:22 +02:00
while ( rdma_get_cm_event ( ib - > ctx . ec , & event ) = = 0 ) {
2018-07-05 18:26:32 +02:00
2018-07-07 13:08:08 +02:00
switch ( event - > event ) {
2018-07-07 12:49:22 +02:00
case RDMA_CM_EVENT_ADDR_RESOLVED :
debug ( LOG_IB | 2 , " Received RDMA_CM_EVENT_ADDR_RESOLVED " ) ;
2018-07-05 18:26:32 +02:00
2018-07-07 12:49:22 +02:00
ret = ib_addr_resolved ( n ) ;
break ;
2018-07-05 18:26:32 +02:00
2018-07-07 12:49:22 +02:00
case RDMA_CM_EVENT_ADDR_ERROR :
debug ( LOG_IB | 2 , " Received RDMA_CM_EVENT_ADDR_ERROR " ) ;
2018-07-15 16:37:52 +02:00
warn ( " Address resolution (rdma_resolve_addr) failed! " ) ;
ib_continue_as_listen ( n , event ) ;
2018-07-03 18:01:49 +02:00
2018-07-07 12:49:22 +02:00
break ;
case RDMA_CM_EVENT_ROUTE_RESOLVED :
debug ( LOG_IB | 2 , " Received RDMA_CM_EVENT_ROUTE_RESOLVED " ) ;
2018-07-05 13:57:25 +02:00
2018-07-07 12:49:22 +02:00
ret = ib_route_resolved ( n ) ;
break ;
2018-07-05 13:57:25 +02:00
2018-07-07 12:49:22 +02:00
case RDMA_CM_EVENT_ROUTE_ERROR :
debug ( LOG_IB | 2 , " Received RDMA_CM_EVENT_ROUTE_ERROR " ) ;
2018-07-15 16:37:52 +02:00
warn ( " Route resolution (rdma_resovle_route) failed! " ) ;
ib_continue_as_listen ( n , event ) ;
2018-07-03 18:01:49 +02:00
2018-07-07 12:49:22 +02:00
break ;
case RDMA_CM_EVENT_CONNECT_REQUEST :
debug ( LOG_IB | 2 , " Received RDMA_CM_EVENT_CONNECT_REQUEST " ) ;
2018-07-07 13:08:08 +02:00
ret = ib_connect_request ( n , event - > id ) ;
2018-07-07 12:49:22 +02:00
break ;
case RDMA_CM_EVENT_CONNECT_ERROR :
debug ( LOG_IB | 2 , " Received RDMA_CM_EVENT_CONNECT_ERROR " ) ;
2018-07-15 16:37:52 +02:00
warn ( " An error has occurred trying to establish a connection! " ) ;
ib_continue_as_listen ( n , event ) ;
2018-07-07 12:49:22 +02:00
break ;
case RDMA_CM_EVENT_REJECTED :
debug ( LOG_IB | 2 , " Received RDMA_CM_EVENT_REJECTED " ) ;
2018-07-15 16:37:52 +02:00
warn ( " Connection request or response was rejected by the remote end point! " ) ;
ib_continue_as_listen ( n , event ) ;
2018-07-07 12:49:22 +02:00
break ;
2018-07-15 16:37:52 +02:00
2018-07-07 12:49:22 +02:00
case RDMA_CM_EVENT_ESTABLISHED :
debug ( LOG_IB | 2 , " Received RDMA_CM_EVENT_ESTABLISHED " ) ;
2018-07-07 13:08:08 +02:00
node - > state = STATE_CONNECTED ;
2018-07-07 12:49:22 +02:00
info ( " Connection established in node %s " , node_name ( n ) ) ;
break ;
case RDMA_CM_EVENT_DISCONNECTED :
debug ( LOG_IB | 2 , " Received RDMA_CM_EVENT_DISCONNECTED " ) ;
2018-07-07 14:36:23 +02:00
node - > state = STATE_STARTED ;
ret = ib_disconnect ( n ) ;
2018-07-14 16:46:23 +02:00
if ( ! ret )
info ( " Host disconnected. Ready to accept new connections. " ) ;
2018-07-07 15:34:07 +02:00
2018-07-07 14:36:23 +02:00
break ;
case RDMA_CM_EVENT_TIMEWAIT_EXIT :
2018-07-07 12:49:22 +02:00
break ;
default :
2018-07-07 13:08:08 +02:00
error ( " Unknown event occurred: %u " , event - > event ) ;
2018-07-03 18:01:49 +02:00
}
2018-07-07 12:49:22 +02:00
2018-07-07 13:08:08 +02:00
rdma_ack_cm_event ( event ) ;
2018-07-07 12:49:22 +02:00
2018-07-08 15:00:47 +02:00
if ( ret )
2018-07-07 12:49:22 +02:00
break ;
2018-07-03 18:01:49 +02:00
}
2018-07-07 12:49:22 +02:00
2018-07-03 18:01:49 +02:00
return NULL ;
2018-06-28 12:46:16 +02:00
}
2018-06-23 14:53:37 +02:00
int ib_start ( struct node * n )
2018-06-21 11:56:28 +02:00
{
2018-07-03 18:01:49 +02:00
struct infiniband * ib = ( struct infiniband * ) n - > _vd ;
int ret ;
2018-07-05 13:57:25 +02:00
debug ( LOG_IB | 1 , " Started ib_start " ) ;
2018-07-03 18:01:49 +02:00
// Create event channel
2018-07-07 12:49:22 +02:00
ib - > ctx . ec = rdma_create_event_channel ( ) ;
if ( ! ib - > ctx . ec )
2018-07-03 18:01:49 +02:00
error ( " Failed to create event channel in node %s! " , node_name ( n ) ) ;
2018-07-05 13:57:25 +02:00
debug ( LOG_IB | 3 , " Created event channel " ) ;
2018-07-15 16:37:52 +02:00
// Create rdma_cm_id and bind to device
ib_create_bind_id ( n ) ;
2018-07-07 15:34:07 +02:00
2018-07-13 12:21:59 +02:00
// Initialize send Work Completion stack
ib - > conn . send_wc_stack . top = 0 ;
ib - > conn . send_wc_stack . array = alloc ( ib - > qp_init . cap . max_recv_wr * sizeof ( uint64_t ) ) ;
debug ( LOG_IB | 3 , " Initialized Work Completion Stack " ) ;
2018-07-07 15:34:07 +02:00
2018-07-16 10:54:15 +02:00
// Resolve address or listen to rdma_cm_id
2018-07-04 19:04:08 +02:00
if ( ib - > is_source ) {
2018-07-03 18:01:49 +02:00
// Resolve address
2018-07-07 14:36:23 +02:00
ret = rdma_resolve_addr ( ib - > ctx . id , NULL , ib - > conn . dst_addr - > ai_addr , ib - > conn . timeout ) ;
2018-07-04 19:04:08 +02:00
if ( ret )
2018-07-03 18:01:49 +02:00
error ( " Failed to resolve remote address after %ims of node %s: %s " ,
ib - > conn . timeout , node_name ( n ) , gai_strerror ( ret ) ) ;
}
2018-07-04 19:04:08 +02:00
else {
2018-07-03 18:01:49 +02:00
// Listen on rdma_cm_id for events
ret = rdma_listen ( ib - > ctx . listen_id , 10 ) ;
2018-07-04 19:04:08 +02:00
if ( ret )
2018-07-03 18:01:49 +02:00
error ( " Failed to listen to rdma_cm_id on node %s " , node_name ( n ) ) ;
2018-07-05 13:57:25 +02:00
debug ( LOG_IB | 3 , " Started to listen to rdma_cm_id " ) ;
2018-07-03 18:01:49 +02:00
}
2018-07-07 12:49:22 +02:00
//Allocate protection domain
ib - > ctx . pd = ibv_alloc_pd ( ib - > ctx . id - > verbs ) ;
if ( ! ib - > ctx . pd )
error ( " Could not allocate protection domain in node %s " , node_name ( n ) ) ;
debug ( LOG_IB | 3 , " Allocated Protection Domain " ) ;
2018-07-03 18:01:49 +02:00
// Several events should occur on the event channel, to make
// sure the nodes are succesfully connected.
2018-07-05 13:57:25 +02:00
debug ( LOG_IB | 1 , " Starting to monitor events on rdma_cm_id " ) ;
2018-07-03 18:01:49 +02:00
2018-07-05 18:26:32 +02:00
//Create thread to monitor rdma_cm_event channel
2018-07-07 12:49:22 +02:00
ret = pthread_create ( & ib - > conn . rdma_cm_event_thread , NULL , ib_rdma_cm_event_thread , n ) ;
2018-07-04 19:04:08 +02:00
if ( ret )
2018-07-07 12:49:22 +02:00
error ( " Failed to create thread to monitor rdma_cm events in node %s: %s " ,
2018-07-03 18:01:49 +02:00
node_name ( n ) , gai_strerror ( ret ) ) ;
return 0 ;
2018-06-21 11:56:28 +02:00
}
2018-06-23 14:53:37 +02:00
int ib_stop ( struct node * n )
2018-06-21 11:56:28 +02:00
{
2018-07-03 17:39:06 +02:00
struct infiniband * ib = ( struct infiniband * ) n - > _vd ;
int ret ;
2018-06-28 12:46:16 +02:00
2018-07-07 14:36:23 +02:00
debug ( LOG_IB | 1 , " Called ib_stop " ) ;
ib - > stopThreads = 1 ;
2018-07-03 17:39:06 +02:00
// Call RDMA disconnect function
// Will flush all outstanding WRs to the Completion Queue and
// will call RDMA_CM_EVENT_DISCONNECTED if that is done.
2018-07-14 16:46:23 +02:00
if ( n - > state = = STATE_CONNECTED ) {
2018-07-07 15:34:07 +02:00
ret = rdma_disconnect ( ib - > ctx . id ) ;
2018-07-04 19:04:08 +02:00
2018-07-14 16:46:23 +02:00
if ( ret )
error ( " Error while calling rdma_disconnect in node %s: %s " ,
node_name ( n ) , gai_strerror ( ret ) ) ;
debug ( LOG_IB | 3 , " Called rdma_disconnect " ) ;
}
else {
2018-07-16 10:54:15 +02:00
// Since cannot use an event to unblock rdma_cm_get_event, we send
// SIGUSR1 to the thread and kill it.
2018-07-14 16:46:23 +02:00
pthread_kill ( ib - > conn . rdma_cm_event_thread , SIGUSR1 ) ;
debug ( LOG_IB | 3 , " Called pthread_kill() " ) ;
}
info ( " Disconnecting... Waiting for threads to join. " ) ;
2018-07-07 14:36:23 +02:00
// Wait for event thread to join
ret = pthread_join ( ib - > conn . rdma_cm_event_thread , NULL ) ;
if ( ret )
error ( " Error while joining rdma_cm_event_thread in node %s: %i " , node_name ( n ) , ret ) ;
debug ( LOG_IB | 3 , " Joined rdma_cm_event_thread " ) ;
// Destroy RDMA CM ID
rdma_destroy_id ( ib - > ctx . id ) ;
debug ( LOG_IB | 3 , " Destroyed rdma_cm_id " ) ;
// Dealloc Protection Domain
ibv_dealloc_pd ( ib - > ctx . pd ) ;
debug ( LOG_IB | 3 , " Destroyed protection domain " ) ;
// Destroy event channel
rdma_destroy_event_channel ( ib - > ctx . ec ) ;
debug ( LOG_IB | 3 , " Destroyed event channel " ) ;
2018-06-30 18:20:30 +02:00
2018-07-07 14:36:23 +02:00
info ( " Successfully stopped %s " , node_name ( n ) ) ;
2018-06-28 12:46:16 +02:00
2018-07-03 17:39:06 +02:00
return 0 ;
2018-06-21 11:56:28 +02:00
}
2018-07-11 18:14:29 +02:00
int ib_read ( struct node * n , struct sample * smps [ ] , unsigned cnt , unsigned * release )
2018-06-21 11:56:28 +02:00
{
2018-07-01 12:56:03 +02:00
struct infiniband * ib = ( struct infiniband * ) n - > _vd ;
2018-07-12 17:49:17 +02:00
struct ibv_wc wc [ cnt ] ;
2018-07-11 18:14:29 +02:00
struct ibv_recv_wr wr [ cnt ] , * bad_wr = NULL ;
struct ibv_sge sge [ cnt ] ;
2018-07-04 19:04:08 +02:00
struct ibv_mr * mr ;
2018-07-12 17:49:17 +02:00
int ret = 0 , wcs = 0 , read_values = 0 , max_wr_post ;
2018-07-01 12:56:03 +02:00
2018-07-05 13:57:25 +02:00
debug ( LOG_IB | 15 , " ib_read is called " ) ;
2018-07-07 12:49:22 +02:00
if ( n - > state = = STATE_CONNECTED ) {
2018-07-03 17:39:06 +02:00
2018-07-12 17:49:17 +02:00
max_wr_post = cnt ;
2018-07-03 17:39:06 +02:00
2018-07-12 17:49:17 +02:00
// Poll Completion Queue
// If we've already posted enough receive WRs, try to pull cnt
2018-07-15 13:51:18 +02:00
if ( ib - > conn . available_recv_wrs > = ( ib - > qp_init . cap . max_recv_wr - ib - > conn . buffer_subtraction ) ) {
2018-07-12 17:49:17 +02:00
while ( 1 ) {
wcs = ibv_poll_cq ( ib - > ctx . recv_cq , cnt , wc ) ;
if ( wcs ) {
debug ( LOG_IB | 10 , " Received %i Work Completions " , wcs ) ;
2018-07-03 17:39:06 +02:00
2018-07-12 17:49:17 +02:00
read_values = wcs ; // Value to return
max_wr_post = wcs ; // Make space free in smps[]
2018-07-08 15:00:47 +02:00
2018-07-07 12:49:22 +02:00
break ;
}
}
2018-07-01 12:56:03 +02:00
2018-07-12 17:49:17 +02:00
// All samples (wcs * received + unposted) should be released. Let
// *release be equal to allocated.
//
// This is set in the framework, before this function was called
}
else {
ib - > conn . available_recv_wrs + = max_wr_post ;
* release = 0 ; // While we fill the receive queue, we always use all samples
2018-07-07 12:49:22 +02:00
}
2018-07-05 13:57:25 +02:00
2018-07-12 17:49:17 +02:00
// Get Memory Region
mr = memory_ib_get_mr ( smps [ 0 ] ) ;
2018-07-07 12:49:22 +02:00
2018-07-12 17:49:17 +02:00
for ( int i = 0 ; i < max_wr_post ; i + + ) {
// Prepare receive Scatter/Gather element
sge [ i ] . addr = ( uint64_t ) & smps [ i ] - > data ;
sge [ i ] . length = SAMPLE_DATA_LEN ( DEFAULT_SAMPLELEN ) ;
sge [ i ] . lkey = mr - > lkey ;
// Prepare a receive Work Request
wr [ i ] . wr_id = ( uintptr_t ) smps [ i ] ;
wr [ i ] . next = & wr [ i + 1 ] ;
wr [ i ] . sg_list = & sge [ i ] ;
wr [ i ] . num_sge = 1 ;
}
2018-07-07 12:49:22 +02:00
2018-07-12 17:49:17 +02:00
wr [ max_wr_post - 1 ] . next = NULL ;
2018-07-07 12:49:22 +02:00
2018-07-12 17:49:17 +02:00
debug ( LOG_IB | 5 , " Prepared %i new receive Work Requests " , max_wr_post ) ;
debug ( LOG_IB | 5 , " %i receive Work Requests in Receive Queue " , ib - > conn . available_recv_wrs ) ;
2018-07-07 12:49:22 +02:00
2018-07-12 17:49:17 +02:00
// Post list of Work Requests
ret = ibv_post_recv ( ib - > ctx . id - > qp , & wr [ 0 ] , & bad_wr ) ;
2018-07-07 12:49:22 +02:00
2018-07-12 17:49:17 +02:00
if ( ret )
error ( " Was unable to post receive WR in node %s: %i, bad WR ID: 0x%lx " ,
node_name ( n ) , ret , bad_wr - > wr_id ) ;
2018-07-01 12:56:03 +02:00
2018-07-12 17:49:17 +02:00
debug ( LOG_IB | 10 , " Succesfully posted receive Work Requests " ) ;
2018-07-08 14:05:48 +02:00
2018-07-12 17:49:17 +02:00
// Doesn't start, if wcs == 0
for ( int j = 0 ; j < wcs ; j + + ) {
if ( ! ( ( wc [ j ] . opcode & IBV_WC_RECV ) & & wc [ j ] . status = = IBV_WC_SUCCESS ) )
read_values - - ;
2018-07-08 14:05:48 +02:00
2018-07-12 17:49:17 +02:00
if ( wc [ j ] . status = = IBV_WC_WR_FLUSH_ERR )
debug ( LOG_IB | 5 , " Received IBV_WC_WR_FLUSH_ERR (ib_read). Ignore it. " ) ;
else if ( wc [ j ] . status ! = IBV_WC_SUCCESS )
warn ( " Work Completion status was not IBV_WC_SUCCES in node %s: %i " ,
node_name ( n ) , wc [ j ] . status ) ;
smps [ j ] = ( struct sample * ) ( wc [ j ] . wr_id ) ;
smps [ j ] - > length = wc [ j ] . byte_len / sizeof ( double ) ;
}
2018-07-08 14:05:48 +02:00
}
2018-07-12 17:49:17 +02:00
return read_values ;
2018-06-21 11:56:28 +02:00
}
2018-07-12 17:49:17 +02:00
int ib_write ( struct node * n , struct sample * smps [ ] , unsigned cnt , unsigned * release )
2018-06-21 11:56:28 +02:00
{
2018-07-01 12:56:03 +02:00
struct infiniband * ib = ( struct infiniband * ) n - > _vd ;
2018-07-11 18:14:29 +02:00
struct ibv_send_wr wr [ cnt ] , * bad_wr = NULL ;
struct ibv_sge sge [ cnt ] ;
2018-07-12 17:49:17 +02:00
struct ibv_wc wc [ cnt ] ;
2018-07-04 19:04:08 +02:00
struct ibv_mr * mr ;
2018-07-12 17:49:17 +02:00
2018-07-01 12:56:03 +02:00
int ret ;
2018-07-12 17:49:17 +02:00
int sent = 0 ; //Used for first loop: prepare work requests to post to send queue
2018-07-01 12:56:03 +02:00
2018-07-05 13:57:25 +02:00
debug ( LOG_IB | 10 , " ib_write is called " ) ;
2018-07-12 17:49:17 +02:00
* release = 0 ;
2018-07-07 12:49:22 +02:00
if ( n - > state = = STATE_CONNECTED ) {
2018-07-08 15:00:47 +02:00
// First, write
2018-07-05 13:57:25 +02:00
2018-07-07 12:49:22 +02:00
// Get Memory Region
mr = memory_ib_get_mr ( smps [ 0 ] ) ;
2018-07-01 12:56:03 +02:00
2018-07-12 17:49:17 +02:00
for ( sent = 0 ; sent < cnt ; sent + + ) {
// Set Scatter/Gather element to data of sample
sge [ sent ] . addr = ( uint64_t ) & smps [ sent ] - > data ;
sge [ sent ] . length = smps [ sent ] - > length * sizeof ( double ) ;
sge [ sent ] . lkey = mr - > lkey ;
2018-07-01 12:56:03 +02:00
2018-07-13 13:50:30 +02:00
// Check if data can be send inline
int send_inline = ( sge [ sent ] . length < ib - > qp_init . cap . max_inline_data ) ?
2018-07-15 13:51:18 +02:00
ib - > conn . send_inline : 0 ;
2018-07-13 13:50:30 +02:00
debug ( LOG_IB | 10 , " Sample will be send inline [0/1]: %i " , send_inline ) ;
2018-07-07 12:49:22 +02:00
// Set Send Work Request
2018-07-12 17:49:17 +02:00
wr [ sent ] . wr_id = send_inline ? 0 : ( uintptr_t ) smps [ sent ] ; // This way the sample can be release in WC
wr [ sent ] . sg_list = & sge [ sent ] ;
wr [ sent ] . num_sge = 1 ;
2018-07-14 15:20:24 +02:00
wr [ sent ] . next = & wr [ sent + 1 ] ;
2018-07-07 12:49:22 +02:00
2018-07-12 17:49:17 +02:00
wr [ sent ] . send_flags = IBV_SEND_SIGNALED | ( send_inline < < 3 ) ;
wr [ sent ] . imm_data = htonl ( 0 ) ; //ToDo: set this to a useful value
wr [ sent ] . opcode = IBV_WR_SEND_WITH_IMM ;
2018-07-05 13:57:25 +02:00
}
2018-07-01 12:56:03 +02:00
2018-07-14 15:20:24 +02:00
debug ( LOG_IB | 10 , " Prepared %i send Work Requests " , cnt ) ;
wr [ cnt - 1 ] . next = NULL ;
2018-07-12 17:49:17 +02:00
// Send linked list of Work Requests
2018-07-07 12:49:22 +02:00
ret = ibv_post_send ( ib - > ctx . id - > qp , wr , & bad_wr ) ;
2018-07-12 17:49:17 +02:00
debug ( LOG_IB | 4 , " Posted send Work Requests " ) ;
// Reorder list. Place inline and unposted samples to the top
// m will always be equal or smaller than *release
for ( int m = 0 ; m < cnt ; m + + ) {
// We can't use wr_id as identifier, since it is 0 for inline
// elements
if ( ret & & ( wr [ m ] . sg_list = = bad_wr - > sg_list ) ) {
// The remaining work requests will be bad. Ripple through list
// and prepare them to be released
debug ( LOG_IB | 4 , " Bad WR occured with ID: 0x%lx and S/G address: 0x%px: %i " ,
bad_wr - > wr_id , bad_wr - > sg_list , ret ) ;
while ( 1 ) {
smps [ * release ] = smps [ m ] ;
( * release ) + + ; // Increment number of samples to be released
sent - - ; // Decrement the number of succesfully posted elements
if ( + + m = = cnt ) break ;
}
}
else if ( wr [ m ] . send_flags & IBV_SEND_INLINE ) {
smps [ * release ] = smps [ m ] ;
( * release ) + + ;
}
2018-07-01 12:56:03 +02:00
2018-07-07 12:49:22 +02:00
}
2018-07-01 12:56:03 +02:00
2018-07-12 17:49:17 +02:00
debug ( LOG_IB | 4 , " %i samples will be released " , * release ) ;
2018-07-08 15:00:47 +02:00
2018-07-13 12:21:59 +02:00
// Always poll cnt items from Receive Queue. If there is not enough space in
// smps, we temporarily save it on a stack
ret = ibv_poll_cq ( ib - > ctx . send_cq , cnt , wc ) ;
2018-07-08 15:00:47 +02:00
2018-07-12 17:49:17 +02:00
for ( int i = 0 ; i < ret ; i + + ) {
if ( wc [ i ] . status ! = IBV_WC_SUCCESS & & wc [ i ] . status ! = IBV_WC_WR_FLUSH_ERR )
2018-07-08 15:00:47 +02:00
warn ( " Work Completion status was not IBV_WC_SUCCES in node %s: %i " ,
2018-07-12 17:49:17 +02:00
node_name ( n ) , wc [ i ] . status ) ;
2018-07-08 15:00:47 +02:00
2018-07-12 17:49:17 +02:00
// Release only samples which were not send inline
if ( wc [ i ] . wr_id ) {
2018-07-13 12:21:59 +02:00
if ( cnt - * release > 0 ) {
smps [ * release ] = ( struct sample * ) ( wc [ i ] . wr_id ) ;
( * release ) + + ;
}
else {
ib - > conn . send_wc_stack . array [ ib - > conn . send_wc_stack . top ] = wc [ i ] . wr_id ;
ib - > conn . send_wc_stack . top + + ;
}
}
}
// Check if we still have some space and try to get rid of some addresses on our stack
if ( ib - > conn . send_wc_stack . top > 0 ) {
int empty_smps = cnt - * release ;
for ( int i = 0 ; i < empty_smps ; i + + ) {
ib - > conn . send_wc_stack . top - - ;
smps [ * release ] = ( struct sample * ) ib - > conn . send_wc_stack . array [ ib - > conn . send_wc_stack . top ] ;
2018-07-12 17:49:17 +02:00
( * release ) + + ;
2018-07-13 12:21:59 +02:00
if ( ib - > conn . send_wc_stack . top = = 0 ) break ;
2018-07-12 17:49:17 +02:00
}
2018-07-08 15:00:47 +02:00
}
2018-07-01 12:56:03 +02:00
}
2018-07-12 17:49:17 +02:00
return sent ;
2018-06-21 11:56:28 +02:00
}
2018-06-23 14:53:37 +02:00
int ib_fd ( struct node * n )
2018-06-21 11:56:28 +02:00
{
2018-07-03 17:39:06 +02:00
return 0 ;
2018-06-21 11:56:28 +02:00
}
static struct plugin p = {
2018-07-16 08:10:22 +02:00
. name = " infiniband " ,
. description = " Infiniband interface (libibverbs, librdmacm) " ,
. type = PLUGIN_TYPE_NODE ,
. node = {
. vectorize = 0 ,
. size = sizeof ( struct infiniband ) ,
. reverse = ib_reverse ,
. parse = ib_parse ,
2018-07-05 15:30:33 +02:00
. check = ib_check ,
2018-07-16 08:10:22 +02:00
. print = ib_print ,
. start = ib_start ,
. destroy = ib_destroy ,
. stop = ib_stop ,
. read = ib_read ,
. write = ib_write ,
. fd = ib_fd ,
. memory_type = memory_ib
2018-07-03 17:39:06 +02:00
}
2018-06-21 11:56:28 +02:00
} ;
REGISTER_PLUGIN ( & p )
LIST_INIT_STATIC ( & p . node . instances )