1
0
Fork 0
mirror of https://github.com/hermitcore/libhermit.git synced 2025-03-09 00:00:03 +01:00

Added other pingpong types: UC, UD, SRQ, XSRQ

This commit is contained in:
Annika Wierichs 2018-02-19 14:31:31 +01:00
parent f7b14f39ed
commit 4de2db0ccc
14 changed files with 4095 additions and 374 deletions

View file

@ -1,6 +1,6 @@
/*
* Copyright (c) 2010, Stefan Lankes, RWTH Aachen University
* Copyright (c) 2018, Annika Wierichs, RWTH Aachen University
* 2018, Annika Wierichs, RWTH Aachen University
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -69,99 +69,95 @@ extern const size_t image_size;
// KVM I/O ports corresponding to verbs API functions
#define UHYVE_PORT_SET_IB_POOL_ADDR 0x610
#define UHYVE_PORT_IBV_WC_STATUS_STR 0x611
#define UHYVE_PORT_IBV_RATE_TO_MULT 0x612
#define UHYVE_PORT_MULT_TO_IBV_RATE 0x613
#define UHYVE_PORT_IBV_RATE_TO_MBPS 0x614
#define UHYVE_PORT_MBPS_TO_IBV_RATE 0x615
#define UHYVE_PORT_IBV_CQ_EX_TO_CQ 0x616
#define UHYVE_PORT_IBV_START_POLL 0x617
#define UHYVE_PORT_IBV_NEXT_POLL 0x618
#define UHYVE_PORT_IBV_END_POLL 0x619
#define UHYVE_PORT_IBV_WC_READ_OPCODE 0x61A
#define UHYVE_PORT_IBV_WC_READ_VENDOR_ERR 0x61B
#define UHYVE_PORT_IBV_WC_READ_BYTE_LEN 0x61C
#define UHYVE_PORT_IBV_WC_READ_IMM_DATA 0x61D
#define UHYVE_PORT_IBV_WC_READ_INVALIDATED_RKEY 0x61E
#define UHYVE_PORT_IBV_WC_READ_QP_NUM 0x61F
#define UHYVE_PORT_IBV_WC_READ_SRC_QP 0x620
#define UHYVE_PORT_IBV_WC_READ_WC_FLAGS 0x621
#define UHYVE_PORT_IBV_WC_READ_SLID 0x622
#define UHYVE_PORT_IBV_WC_READ_SL 0x623
#define UHYVE_PORT_IBV_WC_READ_DLID_PATH_BITS 0x624
#define UHYVE_PORT_IBV_WC_READ_COMPLETION_TS 0x625
#define UHYVE_PORT_IBV_WC_READ_CVLAN 0x626
#define UHYVE_PORT_IBV_WC_READ_FLOW_TAG 0x627
#define UHYVE_PORT_IBV_POST_WQ_RECV 0x628
#define UHYVE_PORT_IBV_GET_DEVICE_LIST 0x629
#define UHYVE_PORT_IBV_FREE_DEVICE_LIST 0x62A
#define UHYVE_PORT_IBV_GET_DEVICE_NAME 0x62B
#define UHYVE_PORT_IBV_GET_DEVICE_GUID 0x62C
#define UHYVE_PORT_IBV_OPEN_DEVICE 0x62D
#define UHYVE_PORT_IBV_CLOSE_DEVICE 0x62E
#define UHYVE_PORT_IBV_GET_ASYNC_EVENT 0x62F
#define UHYVE_PORT_IBV_ACK_ASYNC_EVENT 0x630
#define UHYVE_PORT_IBV_QUERY_DEVICE 0x631
#define UHYVE_PORT_IBV_QUERY_PORT 0x632
#define UHYVE_PORT_IBV_QUERY_GID 0x633
#define UHYVE_PORT_IBV_QUERY_PKEY 0x634
#define UHYVE_PORT_IBV_ALLOC_PD 0x635
#define UHYVE_PORT_IBV_DEALLOC_PD 0x636
#define UHYVE_PORT_IBV_CREATE_FLOW 0x637
#define UHYVE_PORT_IBV_DESTROY_FLOW 0x638
#define UHYVE_PORT_IBV_OPEN_XRCD 0x639
#define UHYVE_PORT_IBV_CLOSE_XRCD 0x63A
#define UHYVE_PORT_IBV_REG_MR 0x63B
#define UHYVE_PORT_IBV_REREG_MR 0x63C
#define UHYVE_PORT_IBV_DEREG_MR 0x63D
#define UHYVE_PORT_IBV_ALLOC_MW 0x63E
#define UHYVE_PORT_IBV_DEALLOC_MW 0x63F
#define UHYVE_PORT_IBV_INC_RKEY 0x640
#define UHYVE_PORT_IBV_BIND_MW 0x641
#define UHYVE_PORT_IBV_CREATE_COMP_CHANNEL 0x642
#define UHYVE_PORT_IBV_DESTROY_COMP_CHANNEL 0x643
#define UHYVE_PORT_IBV_CREATE_CQ 0x644
#define UHYVE_PORT_IBV_CREATE_CQ_EX 0x645
#define UHYVE_PORT_IBV_RESIZE_CQ 0x646
#define UHYVE_PORT_IBV_DESTROY_CQ 0x647
#define UHYVE_PORT_IBV_GET_CQ_EVENT 0x648
#define UHYVE_PORT_IBV_ACK_CQ_EVENTS 0x649
#define UHYVE_PORT_IBV_POLL_CQ 0x64A
#define UHYVE_PORT_IBV_REQ_NOTIFY_CQ 0x64B
#define UHYVE_PORT_IBV_CREATE_SRQ 0x64C
#define UHYVE_PORT_IBV_CREATE_SRQ_EX 0x64D
#define UHYVE_PORT_IBV_MODIFY_SRQ 0x64E
#define UHYVE_PORT_IBV_QUERY_SRQ 0x64F
#define UHYVE_PORT_IBV_GET_SRQ_NUM 0x650
#define UHYVE_PORT_IBV_DESTROY_SRQ 0x651
#define UHYVE_PORT_IBV_POST_SRQ_RECV 0x652
#define UHYVE_PORT_IBV_CREATE_QP 0x653
#define UHYVE_PORT_IBV_CREATE_QP_EX 0x654
#define UHYVE_PORT_IBV_QUERY_RT_VALUES_EX 0x655
#define UHYVE_PORT_IBV_QUERY_DEVICE_EX 0x656
#define UHYVE_PORT_IBV_OPEN_QP 0x657
#define UHYVE_PORT_IBV_MODIFY_QP 0x658
#define UHYVE_PORT_IBV_QUERY_QP 0x659
#define UHYVE_PORT_IBV_DESTROY_QP 0x65A
#define UHYVE_PORT_IBV_CREATE_WQ 0x65B
#define UHYVE_PORT_IBV_MODIFY_WQ 0x65C
#define UHYVE_PORT_IBV_DESTROY_WQ 0x65D
#define UHYVE_PORT_IBV_CREATE_RWQ_IND_TABLE 0x65E
#define UHYVE_PORT_IBV_DESTROY_RWQ_IND_TABLE 0x65F
#define UHYVE_PORT_IBV_POST_SEND 0x660
#define UHYVE_PORT_IBV_POST_RECV 0x661
#define UHYVE_PORT_IBV_CREATE_AH 0x662
#define UHYVE_PORT_IBV_INIT_AH_FROM_WC 0x663
#define UHYVE_PORT_IBV_CREATE_AH_FROM_WC 0x664
#define UHYVE_PORT_IBV_DESTROY_AH 0x665
#define UHYVE_PORT_IBV_ATTACH_MCAST 0x666
#define UHYVE_PORT_IBV_DETACH_MCAST 0x667
#define UHYVE_PORT_IBV_FORK_INIT 0x668
#define UHYVE_PORT_IBV_NODE_TYPE_STR 0x669
#define UHYVE_PORT_IBV_PORT_STATE_STR 0x66A
#define UHYVE_PORT_IBV_EVENT_TYPE_STR 0x66B
// #define UHYVE_PORT_IBV_RESOLVE_ETH_L2_FROM_GID 0x66C
#define UHYVE_PORT_IBV_IS_QPT_SUPPORTED 0x66D
#define UHYVE_PORT_IBV_RATE_TO_MULT 0x611
#define UHYVE_PORT_MULT_TO_IBV_RATE 0x612
#define UHYVE_PORT_IBV_RATE_TO_MBPS 0x613
#define UHYVE_PORT_MBPS_TO_IBV_RATE 0x614
#define UHYVE_PORT_IBV_CQ_EX_TO_CQ 0x615
#define UHYVE_PORT_IBV_START_POLL 0x616
#define UHYVE_PORT_IBV_NEXT_POLL 0x617
#define UHYVE_PORT_IBV_END_POLL 0x618
#define UHYVE_PORT_IBV_WC_READ_OPCODE 0x619
#define UHYVE_PORT_IBV_WC_READ_VENDOR_ERR 0x61A
#define UHYVE_PORT_IBV_WC_READ_BYTE_LEN 0x61B
#define UHYVE_PORT_IBV_WC_READ_IMM_DATA 0x61C
#define UHYVE_PORT_IBV_WC_READ_INVALIDATED_RKEY 0x61D
#define UHYVE_PORT_IBV_WC_READ_QP_NUM 0x61E
#define UHYVE_PORT_IBV_WC_READ_SRC_QP 0x61F
#define UHYVE_PORT_IBV_WC_READ_WC_FLAGS 0x620
#define UHYVE_PORT_IBV_WC_READ_SLID 0x621
#define UHYVE_PORT_IBV_WC_READ_SL 0x622
#define UHYVE_PORT_IBV_WC_READ_DLID_PATH_BITS 0x623
#define UHYVE_PORT_IBV_WC_READ_COMPLETION_TS 0x624
#define UHYVE_PORT_IBV_WC_READ_CVLAN 0x625
#define UHYVE_PORT_IBV_WC_READ_FLOW_TAG 0x626
#define UHYVE_PORT_IBV_POST_WQ_RECV 0x627
#define UHYVE_PORT_IBV_GET_DEVICE_LIST 0x628
#define UHYVE_PORT_IBV_FREE_DEVICE_LIST 0x629
#define UHYVE_PORT_IBV_GET_DEVICE_NAME 0x62A
#define UHYVE_PORT_IBV_GET_DEVICE_GUID 0x62B
#define UHYVE_PORT_IBV_OPEN_DEVICE 0x62C
#define UHYVE_PORT_IBV_CLOSE_DEVICE 0x62D
#define UHYVE_PORT_IBV_GET_ASYNC_EVENT 0x62E
#define UHYVE_PORT_IBV_ACK_ASYNC_EVENT 0x62F
#define UHYVE_PORT_IBV_QUERY_DEVICE 0x630
#define UHYVE_PORT_IBV_QUERY_PORT 0x631
#define UHYVE_PORT_IBV_QUERY_GID 0x632
#define UHYVE_PORT_IBV_QUERY_PKEY 0x633
#define UHYVE_PORT_IBV_ALLOC_PD 0x634
#define UHYVE_PORT_IBV_DEALLOC_PD 0x635
#define UHYVE_PORT_IBV_CREATE_FLOW 0x636
#define UHYVE_PORT_IBV_DESTROY_FLOW 0x637
#define UHYVE_PORT_IBV_OPEN_XRCD 0x638
#define UHYVE_PORT_IBV_CLOSE_XRCD 0x639
#define UHYVE_PORT_IBV_REG_MR 0x63A
#define UHYVE_PORT_IBV_REREG_MR 0x63B
#define UHYVE_PORT_IBV_DEREG_MR 0x63C
#define UHYVE_PORT_IBV_ALLOC_MW 0x63D
#define UHYVE_PORT_IBV_DEALLOC_MW 0x63E
#define UHYVE_PORT_IBV_INC_RKEY 0x63F
#define UHYVE_PORT_IBV_BIND_MW 0x640
#define UHYVE_PORT_IBV_CREATE_COMP_CHANNEL 0x641
#define UHYVE_PORT_IBV_DESTROY_COMP_CHANNEL 0x642
#define UHYVE_PORT_IBV_CREATE_CQ 0x643
#define UHYVE_PORT_IBV_CREATE_CQ_EX 0x644
#define UHYVE_PORT_IBV_RESIZE_CQ 0x645
#define UHYVE_PORT_IBV_DESTROY_CQ 0x646
#define UHYVE_PORT_IBV_GET_CQ_EVENT 0x647
#define UHYVE_PORT_IBV_ACK_CQ_EVENTS 0x648
#define UHYVE_PORT_IBV_POLL_CQ 0x649
#define UHYVE_PORT_IBV_REQ_NOTIFY_CQ 0x64A
#define UHYVE_PORT_IBV_CREATE_SRQ 0x64B
#define UHYVE_PORT_IBV_CREATE_SRQ_EX 0x64C
#define UHYVE_PORT_IBV_MODIFY_SRQ 0x64D
#define UHYVE_PORT_IBV_QUERY_SRQ 0x64E
#define UHYVE_PORT_IBV_GET_SRQ_NUM 0x64F
#define UHYVE_PORT_IBV_DESTROY_SRQ 0x650
#define UHYVE_PORT_IBV_POST_SRQ_RECV 0x651
#define UHYVE_PORT_IBV_CREATE_QP 0x652
#define UHYVE_PORT_IBV_CREATE_QP_EX 0x653
#define UHYVE_PORT_IBV_QUERY_RT_VALUES_EX 0x654
#define UHYVE_PORT_IBV_QUERY_DEVICE_EX 0x655
#define UHYVE_PORT_IBV_OPEN_QP 0x656
#define UHYVE_PORT_IBV_MODIFY_QP 0x657
#define UHYVE_PORT_IBV_QUERY_QP 0x658
#define UHYVE_PORT_IBV_DESTROY_QP 0x659
#define UHYVE_PORT_IBV_CREATE_WQ 0x65A
#define UHYVE_PORT_IBV_MODIFY_WQ 0x65B
#define UHYVE_PORT_IBV_DESTROY_WQ 0x65C
#define UHYVE_PORT_IBV_CREATE_RWQ_IND_TABLE 0x65D
#define UHYVE_PORT_IBV_DESTROY_RWQ_IND_TABLE 0x65E
#define UHYVE_PORT_IBV_POST_SEND 0x65F
#define UHYVE_PORT_IBV_POST_RECV 0x660
#define UHYVE_PORT_IBV_CREATE_AH 0x661
#define UHYVE_PORT_IBV_INIT_AH_FROM_WC 0x662
#define UHYVE_PORT_IBV_CREATE_AH_FROM_WC 0x663
#define UHYVE_PORT_IBV_DESTROY_AH 0x664
#define UHYVE_PORT_IBV_ATTACH_MCAST 0x665
#define UHYVE_PORT_IBV_DETACH_MCAST 0x666
#define UHYVE_PORT_IBV_FORK_INIT 0x667
// #define UHYVE_PORT_IBV_RESOLVE_ETH_L2_FROM_GID 0x668
#define UHYVE_PORT_IBV_IS_QPT_SUPPORTED 0x669
#define BUILTIN_EXPECT(exp, b) __builtin_expect((exp), (b))
//#define BUILTIN_EXPECT(exp, b) (exp)

View file

@ -425,7 +425,9 @@ enum ibv_wc_status {
IBV_WC_INV_EEC_STATE_ERR,
IBV_WC_FATAL_ERR,
IBV_WC_RESP_TIMEOUT_ERR,
IBV_WC_GENERAL_ERR
IBV_WC_GENERAL_ERR,
IBV_WC_TM_ERR,
IBV_WC_TM_RNDV_INCOMPLETE,
};
enum ibv_wc_opcode {

View file

@ -29,7 +29,22 @@
* This file implements the verbs API functions. Each function performs necessary
* pointer conversions for its parameters, writes its arguments struct to uhyve's
* KVM I/O port that belongs to the function, and reverts changes to the parameters
* before returning. Functions requiring non-trivial conversions are listed first.
* before returning.
*
* Functions requiring non-trivial conversions are listed first:
* - ibv_post_send
* - ibv_post_wq_recv
* - ibv_post_srq_recv
* - ibv_post_recv
* - ibv_create_rwq_ind_table
* - ibv_open_xrcd
*
* A few trivial functions that match enum values with a const char* string are not
* forwarded to uhyve and entirely implemented in HermitCore; they are listed second:
* - ibv_wc_status_str
* - ibv_node_type_str
* - ibv_port_state_str
* - ibv_event_type_str
*/
@ -52,7 +67,6 @@
/* } */
/*
* ibv_post_send
*/
@ -478,20 +492,115 @@ struct ibv_xrcd * ibv_open_xrcd(struct ibv_context * context, struct ibv_xrcd_in
* ibv_wc_status_str
*/
typedef struct {
// Parameters:
enum ibv_wc_status status;
// Return value:
const char * ret;
} __attribute__((packed)) uhyve_ibv_wc_status_str_t;
const char * ibv_wc_status_str(enum ibv_wc_status status) {
uhyve_ibv_wc_status_str_t uhyve_args;
uhyve_args.status = status;
static const char *const wc_status_str[] = {
[IBV_WC_SUCCESS] = "success",
[IBV_WC_LOC_LEN_ERR] = "local length error",
[IBV_WC_LOC_QP_OP_ERR] = "local QP operation error",
[IBV_WC_LOC_EEC_OP_ERR] = "local EE context operation error",
[IBV_WC_LOC_PROT_ERR] = "local protection error",
[IBV_WC_WR_FLUSH_ERR] = "Work Request Flushed Error",
[IBV_WC_MW_BIND_ERR] = "memory management operation error",
[IBV_WC_BAD_RESP_ERR] = "bad response error",
[IBV_WC_LOC_ACCESS_ERR] = "local access error",
[IBV_WC_REM_INV_REQ_ERR] = "remote invalid request error",
[IBV_WC_REM_ACCESS_ERR] = "remote access error",
[IBV_WC_REM_OP_ERR] = "remote operation error",
[IBV_WC_RETRY_EXC_ERR] = "transport retry counter exceeded",
[IBV_WC_RNR_RETRY_EXC_ERR] = "RNR retry counter exceeded",
[IBV_WC_LOC_RDD_VIOL_ERR] = "local RDD violation error",
[IBV_WC_REM_INV_RD_REQ_ERR] = "remote invalid RD request",
[IBV_WC_REM_ABORT_ERR] = "aborted error",
[IBV_WC_INV_EECN_ERR] = "invalid EE context number",
[IBV_WC_INV_EEC_STATE_ERR] = "invalid EE context state",
[IBV_WC_FATAL_ERR] = "fatal error",
[IBV_WC_RESP_TIMEOUT_ERR] = "response timeout error",
[IBV_WC_GENERAL_ERR] = "general error",
[IBV_WC_TM_ERR] = "TM error",
[IBV_WC_TM_RNDV_INCOMPLETE] = "TM software rendezvous",
};
uhyve_send(UHYVE_PORT_IBV_WC_STATUS_STR, (unsigned) virt_to_phys((size_t) &uhyve_args));
if (status < IBV_WC_SUCCESS || status > IBV_WC_TM_RNDV_INCOMPLETE)
return "unknown";
return uhyve_args.ret;
return wc_status_str[status];
}
/*
* ibv_node_type_str
*/
const char * ibv_node_type_str(enum ibv_node_type node_type) {
static const char *const node_type_str[] = {
[IBV_NODE_CA] = "InfiniBand channel adapter",
[IBV_NODE_SWITCH] = "InfiniBand switch",
[IBV_NODE_ROUTER] = "InfiniBand router",
[IBV_NODE_RNIC] = "iWARP NIC",
[IBV_NODE_USNIC] = "usNIC",
[IBV_NODE_USNIC_UDP] = "usNIC UDP",
};
if (node_type < IBV_NODE_CA || node_type > IBV_NODE_USNIC_UDP)
return "unknown";
return node_type_str[node_type];
}
/*
* ibv_port_state_str
*/
const char * ibv_port_state_str(enum ibv_port_state port_state) {
static const char *const port_state_str[] = {
[IBV_PORT_NOP] = "no state change (NOP)",
[IBV_PORT_DOWN] = "down",
[IBV_PORT_INIT] = "init",
[IBV_PORT_ARMED] = "armed",
[IBV_PORT_ACTIVE] = "active",
[IBV_PORT_ACTIVE_DEFER] = "active defer"
};
if (port_state < IBV_PORT_NOP || port_state > IBV_PORT_ACTIVE_DEFER)
return "unknown";
return port_state_str[port_state];
}
/*
* ibv_event_type_str
*/
const char * ibv_event_type_str(enum ibv_event_type event) {
static const char *const event_type_str[] = {
[IBV_EVENT_CQ_ERR] = "CQ error",
[IBV_EVENT_QP_FATAL] = "local work queue catastrophic error",
[IBV_EVENT_QP_REQ_ERR] = "invalid request local work queue error",
[IBV_EVENT_QP_ACCESS_ERR] = "local access violation work queue error",
[IBV_EVENT_COMM_EST] = "communication established",
[IBV_EVENT_SQ_DRAINED] = "send queue drained",
[IBV_EVENT_PATH_MIG] = "path migrated",
[IBV_EVENT_PATH_MIG_ERR] = "path migration request error",
[IBV_EVENT_DEVICE_FATAL] = "local catastrophic error",
[IBV_EVENT_PORT_ACTIVE] = "port active",
[IBV_EVENT_PORT_ERR] = "port error",
[IBV_EVENT_LID_CHANGE] = "LID change",
[IBV_EVENT_PKEY_CHANGE] = "P_Key change",
[IBV_EVENT_SM_CHANGE] = "SM change",
[IBV_EVENT_SRQ_ERR] = "SRQ catastrophic error",
[IBV_EVENT_SRQ_LIMIT_REACHED] = "SRQ limit reached",
[IBV_EVENT_QP_LAST_WQE_REACHED] = "last WQE reached",
[IBV_EVENT_CLIENT_REREGISTER] = "client reregistration",
[IBV_EVENT_GID_CHANGE] = "GID table change",
[IBV_EVENT_WQ_FATAL] = "WQ fatal"
};
if (event < IBV_EVENT_CQ_ERR || event > IBV_EVENT_GID_CHANGE)
return "unknown";
return event_type_str[event];
}
@ -1840,7 +1949,7 @@ typedef struct {
int ibv_get_srq_num(struct ibv_srq * srq, uint32_t * srq_num) {
uhyve_ibv_get_srq_num_t uhyve_args;
uhyve_args.srq = srq;
uhyve_args.srq_num = srq_num;
uhyve_args.srq_num = (uint32_t *) guest_to_host((size_t) srq_num);
uhyve_send(UHYVE_PORT_IBV_GET_SRQ_NUM, (unsigned) virt_to_phys((size_t) &uhyve_args));
@ -2320,69 +2429,6 @@ int ibv_fork_init() {
}
/*
* ibv_node_type_str
*/
typedef struct {
// Parameters:
enum ibv_node_type node_type;
// Return value:
const char * ret;
} __attribute__((packed)) uhyve_ibv_node_type_str_t;
const char * ibv_node_type_str(enum ibv_node_type node_type) {
uhyve_ibv_node_type_str_t uhyve_args;
uhyve_args.node_type = node_type;
uhyve_send(UHYVE_PORT_IBV_NODE_TYPE_STR, (unsigned) virt_to_phys((size_t) &uhyve_args));
return uhyve_args.ret;
}
/*
* ibv_port_state_str
*/
typedef struct {
// Parameters:
enum ibv_port_state port_state;
// Return value:
const char * ret;
} __attribute__((packed)) uhyve_ibv_port_state_str_t;
const char * ibv_port_state_str(enum ibv_port_state port_state) {
uhyve_ibv_port_state_str_t uhyve_args;
uhyve_args.port_state = port_state;
uhyve_send(UHYVE_PORT_IBV_PORT_STATE_STR, (unsigned) virt_to_phys((size_t) &uhyve_args));
return uhyve_args.ret;
}
/*
* ibv_event_type_str
*/
typedef struct {
// Parameters:
enum ibv_event_type event;
// Return value:
const char * ret;
} __attribute__((packed)) uhyve_ibv_event_type_str_t;
const char * ibv_event_type_str(enum ibv_event_type event) {
uhyve_ibv_event_type_str_t uhyve_args;
uhyve_args.event = event;
uhyve_send(UHYVE_PORT_IBV_EVENT_TYPE_STR, (unsigned) virt_to_phys((size_t) &uhyve_args));
return uhyve_args.ret;
}
/*
* ibv_resolve_eth_l2_from_gid
*/

View file

@ -49,20 +49,6 @@ static inline unsigned long long rdtsc() {
return val;
}
/*
* ibv_wc_status_str
*/
void call_ibv_wc_status_str(struct kvm_run * run, uint8_t * guest_mem) {
printf("LOG: UHYVE - call_ibv_wc_status_str\n");
unsigned data = *((unsigned*) ((size_t) run + run->io.data_offset));
uhyve_ibv_wc_status_str_t * args = (uhyve_ibv_wc_status_str_t *) (guest_mem + data);
use_ib_mem_pool = true;
args->ret = ibv_wc_status_str(args->status);
use_ib_mem_pool = false;
}
/*
* ibv_rate_to_mult
@ -1386,51 +1372,6 @@ void call_ibv_fork_init(struct kvm_run * run, uint8_t * guest_mem) {
}
/*
* ibv_node_type_str
*/
void call_ibv_node_type_str(struct kvm_run * run, uint8_t * guest_mem) {
printf("LOG: UHYVE - call_ibv_node_type_str\n");
unsigned data = *((unsigned*) ((size_t) run + run->io.data_offset));
uhyve_ibv_node_type_str_t * args = (uhyve_ibv_node_type_str_t *) (guest_mem + data);
use_ib_mem_pool = true;
args->ret = ibv_node_type_str(args->node_type);
use_ib_mem_pool = false;
}
/*
* ibv_port_state_str
*/
void call_ibv_port_state_str(struct kvm_run * run, uint8_t * guest_mem) {
printf("LOG: UHYVE - call_ibv_port_state_str\n");
unsigned data = *((unsigned*) ((size_t) run + run->io.data_offset));
uhyve_ibv_port_state_str_t * args = (uhyve_ibv_port_state_str_t *) (guest_mem + data);
use_ib_mem_pool = true;
args->ret = ibv_port_state_str(args->port_state);
use_ib_mem_pool = false;
}
/*
* ibv_event_type_str
*/
void call_ibv_event_type_str(struct kvm_run * run, uint8_t * guest_mem) {
printf("LOG: UHYVE - call_ibv_event_type_str\n");
unsigned data = *((unsigned*) ((size_t) run + run->io.data_offset));
uhyve_ibv_event_type_str_t * args = (uhyve_ibv_event_type_str_t *) (guest_mem + data);
use_ib_mem_pool = true;
args->ret = ibv_event_type_str(args->event);
use_ib_mem_pool = false;
}
/*
* ibv_resolve_eth_l2_from_gid
*/

View file

@ -51,108 +51,98 @@ extern bool use_ib_mem_pool;
typedef enum {
UHYVE_PORT_SET_IB_POOL_ADDR = 0x610,
UHYVE_PORT_IBV_WC_STATUS_STR = 0x611,
UHYVE_PORT_IBV_RATE_TO_MULT = 0x612,
UHYVE_PORT_MULT_TO_IBV_RATE = 0x613,
UHYVE_PORT_IBV_RATE_TO_MBPS = 0x614,
UHYVE_PORT_MBPS_TO_IBV_RATE = 0x615,
UHYVE_PORT_IBV_CQ_EX_TO_CQ = 0x616,
UHYVE_PORT_IBV_START_POLL = 0x617,
UHYVE_PORT_IBV_NEXT_POLL = 0x618,
UHYVE_PORT_IBV_END_POLL = 0x619,
UHYVE_PORT_IBV_WC_READ_OPCODE = 0x61A,
UHYVE_PORT_IBV_WC_READ_VENDOR_ERR = 0x61B,
UHYVE_PORT_IBV_WC_READ_BYTE_LEN = 0x61C,
UHYVE_PORT_IBV_WC_READ_IMM_DATA = 0x61D,
UHYVE_PORT_IBV_WC_READ_INVALIDATED_RKEY = 0x61E,
UHYVE_PORT_IBV_WC_READ_QP_NUM = 0x61F,
UHYVE_PORT_IBV_WC_READ_SRC_QP = 0x620,
UHYVE_PORT_IBV_WC_READ_WC_FLAGS = 0x621,
UHYVE_PORT_IBV_WC_READ_SLID = 0x622,
UHYVE_PORT_IBV_WC_READ_SL = 0x623,
UHYVE_PORT_IBV_WC_READ_DLID_PATH_BITS = 0x624,
UHYVE_PORT_IBV_WC_READ_COMPLETION_TS = 0x625,
UHYVE_PORT_IBV_WC_READ_CVLAN = 0x626,
UHYVE_PORT_IBV_WC_READ_FLOW_TAG = 0x627,
UHYVE_PORT_IBV_POST_WQ_RECV = 0x628,
UHYVE_PORT_IBV_GET_DEVICE_LIST = 0x629,
UHYVE_PORT_IBV_FREE_DEVICE_LIST = 0x62A,
UHYVE_PORT_IBV_GET_DEVICE_NAME = 0x62B,
UHYVE_PORT_IBV_GET_DEVICE_GUID = 0x62C,
UHYVE_PORT_IBV_OPEN_DEVICE = 0x62D,
UHYVE_PORT_IBV_CLOSE_DEVICE = 0x62E,
UHYVE_PORT_IBV_GET_ASYNC_EVENT = 0x62F,
UHYVE_PORT_IBV_ACK_ASYNC_EVENT = 0x630,
UHYVE_PORT_IBV_QUERY_DEVICE = 0x631,
UHYVE_PORT_IBV_QUERY_PORT = 0x632,
UHYVE_PORT_IBV_QUERY_GID = 0x633,
UHYVE_PORT_IBV_QUERY_PKEY = 0x634,
UHYVE_PORT_IBV_ALLOC_PD = 0x635,
UHYVE_PORT_IBV_DEALLOC_PD = 0x636,
UHYVE_PORT_IBV_CREATE_FLOW = 0x637,
UHYVE_PORT_IBV_DESTROY_FLOW = 0x638,
UHYVE_PORT_IBV_OPEN_XRCD = 0x639,
UHYVE_PORT_IBV_CLOSE_XRCD = 0x63A,
UHYVE_PORT_IBV_REG_MR = 0x63B,
UHYVE_PORT_IBV_REREG_MR = 0x63C,
UHYVE_PORT_IBV_DEREG_MR = 0x63D,
UHYVE_PORT_IBV_ALLOC_MW = 0x63E,
UHYVE_PORT_IBV_DEALLOC_MW = 0x63F,
UHYVE_PORT_IBV_INC_RKEY = 0x640,
UHYVE_PORT_IBV_BIND_MW = 0x641,
UHYVE_PORT_IBV_CREATE_COMP_CHANNEL = 0x642,
UHYVE_PORT_IBV_DESTROY_COMP_CHANNEL = 0x643,
UHYVE_PORT_IBV_CREATE_CQ = 0x644,
UHYVE_PORT_IBV_CREATE_CQ_EX = 0x645,
UHYVE_PORT_IBV_RESIZE_CQ = 0x646,
UHYVE_PORT_IBV_DESTROY_CQ = 0x647,
UHYVE_PORT_IBV_GET_CQ_EVENT = 0x648,
UHYVE_PORT_IBV_ACK_CQ_EVENTS = 0x649,
UHYVE_PORT_IBV_POLL_CQ = 0x64A,
UHYVE_PORT_IBV_REQ_NOTIFY_CQ = 0x64B,
UHYVE_PORT_IBV_CREATE_SRQ = 0x64C,
UHYVE_PORT_IBV_CREATE_SRQ_EX = 0x64D,
UHYVE_PORT_IBV_MODIFY_SRQ = 0x64E,
UHYVE_PORT_IBV_QUERY_SRQ = 0x64F,
UHYVE_PORT_IBV_GET_SRQ_NUM = 0x650,
UHYVE_PORT_IBV_DESTROY_SRQ = 0x651,
UHYVE_PORT_IBV_POST_SRQ_RECV = 0x652,
UHYVE_PORT_IBV_CREATE_QP = 0x653,
UHYVE_PORT_IBV_CREATE_QP_EX = 0x654,
UHYVE_PORT_IBV_QUERY_RT_VALUES_EX = 0x655,
UHYVE_PORT_IBV_QUERY_DEVICE_EX = 0x656,
UHYVE_PORT_IBV_OPEN_QP = 0x657,
UHYVE_PORT_IBV_MODIFY_QP = 0x658,
UHYVE_PORT_IBV_QUERY_QP = 0x659,
UHYVE_PORT_IBV_DESTROY_QP = 0x65A,
UHYVE_PORT_IBV_CREATE_WQ = 0x65B,
UHYVE_PORT_IBV_MODIFY_WQ = 0x65C,
UHYVE_PORT_IBV_DESTROY_WQ = 0x65D,
UHYVE_PORT_IBV_CREATE_RWQ_IND_TABLE = 0x65E,
UHYVE_PORT_IBV_DESTROY_RWQ_IND_TABLE = 0x65F,
UHYVE_PORT_IBV_POST_SEND = 0x660,
UHYVE_PORT_IBV_POST_RECV = 0x661,
UHYVE_PORT_IBV_CREATE_AH = 0x662,
UHYVE_PORT_IBV_INIT_AH_FROM_WC = 0x663,
UHYVE_PORT_IBV_CREATE_AH_FROM_WC = 0x664,
UHYVE_PORT_IBV_DESTROY_AH = 0x665,
UHYVE_PORT_IBV_ATTACH_MCAST = 0x666,
UHYVE_PORT_IBV_DETACH_MCAST = 0x667,
UHYVE_PORT_IBV_FORK_INIT = 0x668,
UHYVE_PORT_IBV_NODE_TYPE_STR = 0x669,
UHYVE_PORT_IBV_PORT_STATE_STR = 0x66A,
UHYVE_PORT_IBV_EVENT_TYPE_STR = 0x66B,
// UHYVE_PORT_IBV_RESOLVE_ETH_L2_FROM_GID = 0x66C,
UHYVE_PORT_IBV_IS_QPT_SUPPORTED = 0x66D,
UHYVE_PORT_IBV_RATE_TO_MULT = 0x611,
UHYVE_PORT_MULT_TO_IBV_RATE = 0x612,
UHYVE_PORT_IBV_RATE_TO_MBPS = 0x613,
UHYVE_PORT_MBPS_TO_IBV_RATE = 0x614,
UHYVE_PORT_IBV_CQ_EX_TO_CQ = 0x615,
UHYVE_PORT_IBV_START_POLL = 0x616,
UHYVE_PORT_IBV_NEXT_POLL = 0x617,
UHYVE_PORT_IBV_END_POLL = 0x618,
UHYVE_PORT_IBV_WC_READ_OPCODE = 0x619,
UHYVE_PORT_IBV_WC_READ_VENDOR_ERR = 0x61A,
UHYVE_PORT_IBV_WC_READ_BYTE_LEN = 0x61B,
UHYVE_PORT_IBV_WC_READ_IMM_DATA = 0x61C,
UHYVE_PORT_IBV_WC_READ_INVALIDATED_RKEY = 0x61D,
UHYVE_PORT_IBV_WC_READ_QP_NUM = 0x61E,
UHYVE_PORT_IBV_WC_READ_SRC_QP = 0x61F,
UHYVE_PORT_IBV_WC_READ_WC_FLAGS = 0x620,
UHYVE_PORT_IBV_WC_READ_SLID = 0x621,
UHYVE_PORT_IBV_WC_READ_SL = 0x622,
UHYVE_PORT_IBV_WC_READ_DLID_PATH_BITS = 0x623,
UHYVE_PORT_IBV_WC_READ_COMPLETION_TS = 0x624,
UHYVE_PORT_IBV_WC_READ_CVLAN = 0x625,
UHYVE_PORT_IBV_WC_READ_FLOW_TAG = 0x626,
UHYVE_PORT_IBV_POST_WQ_RECV = 0x627,
UHYVE_PORT_IBV_GET_DEVICE_LIST = 0x628,
UHYVE_PORT_IBV_FREE_DEVICE_LIST = 0x629,
UHYVE_PORT_IBV_GET_DEVICE_NAME = 0x62A,
UHYVE_PORT_IBV_GET_DEVICE_GUID = 0x62B,
UHYVE_PORT_IBV_OPEN_DEVICE = 0x62C,
UHYVE_PORT_IBV_CLOSE_DEVICE = 0x62D,
UHYVE_PORT_IBV_GET_ASYNC_EVENT = 0x62E,
UHYVE_PORT_IBV_ACK_ASYNC_EVENT = 0x62F,
UHYVE_PORT_IBV_QUERY_DEVICE = 0x630,
UHYVE_PORT_IBV_QUERY_PORT = 0x631,
UHYVE_PORT_IBV_QUERY_GID = 0x632,
UHYVE_PORT_IBV_QUERY_PKEY = 0x633,
UHYVE_PORT_IBV_ALLOC_PD = 0x634,
UHYVE_PORT_IBV_DEALLOC_PD = 0x635,
UHYVE_PORT_IBV_CREATE_FLOW = 0x636,
UHYVE_PORT_IBV_DESTROY_FLOW = 0x637,
UHYVE_PORT_IBV_OPEN_XRCD = 0x638,
UHYVE_PORT_IBV_CLOSE_XRCD = 0x639,
UHYVE_PORT_IBV_REG_MR = 0x63A,
UHYVE_PORT_IBV_REREG_MR = 0x63B,
UHYVE_PORT_IBV_DEREG_MR = 0x63C,
UHYVE_PORT_IBV_ALLOC_MW = 0x63D,
UHYVE_PORT_IBV_DEALLOC_MW = 0x63E,
UHYVE_PORT_IBV_INC_RKEY = 0x63F,
UHYVE_PORT_IBV_BIND_MW = 0x640,
UHYVE_PORT_IBV_CREATE_COMP_CHANNEL = 0x641,
UHYVE_PORT_IBV_DESTROY_COMP_CHANNEL = 0x642,
UHYVE_PORT_IBV_CREATE_CQ = 0x643,
UHYVE_PORT_IBV_CREATE_CQ_EX = 0x644,
UHYVE_PORT_IBV_RESIZE_CQ = 0x645,
UHYVE_PORT_IBV_DESTROY_CQ = 0x646,
UHYVE_PORT_IBV_GET_CQ_EVENT = 0x647,
UHYVE_PORT_IBV_ACK_CQ_EVENTS = 0x648,
UHYVE_PORT_IBV_POLL_CQ = 0x649,
UHYVE_PORT_IBV_REQ_NOTIFY_CQ = 0x64A,
UHYVE_PORT_IBV_CREATE_SRQ = 0x64B,
UHYVE_PORT_IBV_CREATE_SRQ_EX = 0x64C,
UHYVE_PORT_IBV_MODIFY_SRQ = 0x64D,
UHYVE_PORT_IBV_QUERY_SRQ = 0x64E,
UHYVE_PORT_IBV_GET_SRQ_NUM = 0x64F,
UHYVE_PORT_IBV_DESTROY_SRQ = 0x650,
UHYVE_PORT_IBV_POST_SRQ_RECV = 0x651,
UHYVE_PORT_IBV_CREATE_QP = 0x652,
UHYVE_PORT_IBV_CREATE_QP_EX = 0x653,
UHYVE_PORT_IBV_QUERY_RT_VALUES_EX = 0x654,
UHYVE_PORT_IBV_QUERY_DEVICE_EX = 0x655,
UHYVE_PORT_IBV_OPEN_QP = 0x656,
UHYVE_PORT_IBV_MODIFY_QP = 0x657,
UHYVE_PORT_IBV_QUERY_QP = 0x658,
UHYVE_PORT_IBV_DESTROY_QP = 0x659,
UHYVE_PORT_IBV_CREATE_WQ = 0x65A,
UHYVE_PORT_IBV_MODIFY_WQ = 0x65B,
UHYVE_PORT_IBV_DESTROY_WQ = 0x65C,
UHYVE_PORT_IBV_CREATE_RWQ_IND_TABLE = 0x65D,
UHYVE_PORT_IBV_DESTROY_RWQ_IND_TABLE = 0x65E,
UHYVE_PORT_IBV_POST_SEND = 0x65F,
UHYVE_PORT_IBV_POST_RECV = 0x660,
UHYVE_PORT_IBV_CREATE_AH = 0x661,
UHYVE_PORT_IBV_INIT_AH_FROM_WC = 0x662,
UHYVE_PORT_IBV_CREATE_AH_FROM_WC = 0x663,
UHYVE_PORT_IBV_DESTROY_AH = 0x664,
UHYVE_PORT_IBV_ATTACH_MCAST = 0x665,
UHYVE_PORT_IBV_DETACH_MCAST = 0x666,
UHYVE_PORT_IBV_FORK_INIT = 0x667,
// UHYVE_PORT_IBV_RESOLVE_ETH_L2_FROM_GID = 0x668,
UHYVE_PORT_IBV_IS_QPT_SUPPORTED = 0x669,
} uhyve_ibv_t;
typedef struct {
// Parameters:
enum ibv_wc_status status;
// Return value:
const char * ret;
} __attribute__((packed)) uhyve_ibv_wc_status_str_t;
typedef struct {
// Parameters:
@ -831,27 +821,6 @@ typedef struct {
int ret;
} __attribute__((packed)) uhyve_ibv_fork_init_t;
typedef struct {
// Parameters:
enum ibv_node_type node_type;
// Return value:
const char * ret;
} __attribute__((packed)) uhyve_ibv_node_type_str_t;
typedef struct {
// Parameters:
enum ibv_port_state port_state;
// Return value:
const char * ret;
} __attribute__((packed)) uhyve_ibv_port_state_str_t;
typedef struct {
// Parameters:
enum ibv_event_type event;
// Return value:
const char * ret;
} __attribute__((packed)) uhyve_ibv_event_type_str_t;
// typedef struct {
// // Parameters:
// struct ibv_context * context;
@ -872,7 +841,6 @@ typedef struct {
void call_ibv_wc_status_str (struct kvm_run * run, uint8_t * guest_mem);
void call_ibv_rate_to_mult (struct kvm_run * run, uint8_t * guest_mem);
void call_mult_to_ibv_rate (struct kvm_run * run, uint8_t * guest_mem);
void call_ibv_rate_to_mbps (struct kvm_run * run, uint8_t * guest_mem);
@ -960,9 +928,6 @@ void call_ibv_destroy_ah (struct kvm_run * run, uint8_t * guest_me
void call_ibv_attach_mcast (struct kvm_run * run, uint8_t * guest_mem);
void call_ibv_detach_mcast (struct kvm_run * run, uint8_t * guest_mem);
void call_ibv_fork_init (struct kvm_run * run, uint8_t * guest_mem);
void call_ibv_node_type_str (struct kvm_run * run, uint8_t * guest_mem);
void call_ibv_port_state_str (struct kvm_run * run, uint8_t * guest_mem);
void call_ibv_event_type_str (struct kvm_run * run, uint8_t * guest_mem);
// void call_ibv_resolve_eth_l2_from_gid(struct kvm_run * run, uint8_t * guest_mem);
void call_ibv_is_qpt_supported (struct kvm_run * run, uint8_t * guest_mem);

View file

@ -1276,9 +1276,6 @@ static int vcpu_loop(void)
call_ibv_post_srq_recv(run, guest_mem);
break;
case UHYVE_PORT_IBV_WC_STATUS_STR:
call_ibv_wc_status_str(run, guest_mem);
break;
case UHYVE_PORT_IBV_RATE_TO_MULT:
call_ibv_rate_to_mult(run, guest_mem);
break;
@ -1525,15 +1522,6 @@ static int vcpu_loop(void)
case UHYVE_PORT_IBV_FORK_INIT:
call_ibv_fork_init(run, guest_mem);
break;
case UHYVE_PORT_IBV_NODE_TYPE_STR:
call_ibv_node_type_str(run, guest_mem);
break;
case UHYVE_PORT_IBV_PORT_STATE_STR:
call_ibv_port_state_str(run, guest_mem);
break;
case UHYVE_PORT_IBV_EVENT_TYPE_STR:
call_ibv_event_type_str(run, guest_mem);
break;
/* case UHYVE_PORT_IBV_RESOLVE_ETH_L2_FROM_GID: */
/* call_ibv_resolve_eth_l2_from_gid(run, guest_mem); */
/* break; */

View file

@ -10,8 +10,18 @@ add_executable(hello++ hello++.cpp)
add_executable(hellof hellof.f90)
add_executable(pi pi.go)
#add_executable(ib-pingpong ib/pingpong.c ib/pingpong.h ib/pingpong-ud.c)
# InfiniBand Pingpongs
# Reliable Connected
add_executable(ib-rc-pingpong ib/pingpong.c ib/rc_pingpong.c)
# Unrealiable Datagram
add_executable(ib-ud-pingpong ib/pingpong.c ib/ud_pingpong.c)
# Unreliable Connected
add_executable(ib-uc-pingpong ib/pingpong.c ib/uc_pingpong.c)
# Shared Receive Queue
add_executable(ib-srq-pingpong ib/pingpong.c ib/srq_pingpong.c)
# Shared Receive Queue, eXtended Reliable Connected
add_executable(ib-xsrq-pingpong ib/pingpong.c ib/xsrq_pingpong.c)
add_executable(test-malloc test-malloc.c)
add_executable(test-malloc-mt test-malloc-mt.c)

View file

@ -1,6 +1,7 @@
/*
* Copyright (c) 2006 Cisco Systems. All rights reserved.
* 2018 Annika Wierichs, RWTH Aachen. All rights reserved.
* Copyright (c) 2006 Cisco Systems.
* 2018 Annika Wierichs, RWTH Aachen.
* All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU

View file

@ -1,6 +1,7 @@
/*
* Copyright (c) 2006 Cisco Systems. All rights reserved.
* 2018 Annika Wierichs, RWTH Aachen. All rights reserved.
* Copyright (c) 2006 Cisco Systems.
* 2018 Annika Wierichs, RWTH Aachen.
* All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU

View file

@ -1,6 +1,8 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* 2018 Annika Wierichs, RWTH Aachen. All rights reserved.
* Copyright (c) 2005 Topspin Communications.
* 2018 Annika Wierichs, RWTH Aachen.
* All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file

995
usr/tests/ib/srq_pingpong.c Normal file
View file

@ -0,0 +1,995 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define _GNU_SOURCE
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <netdb.h>
#include <malloc.h>
#include <getopt.h>
#include <netinet/in.h>
#include <time.h>
#include "pingpong.h"
enum {
PINGPONG_RECV_WRID = 1,
PINGPONG_SEND_WRID = 2,
MAX_QP = 256,
};
static int page_size;
static int validate_buf;
struct pingpong_context {
struct ibv_context *context;
struct ibv_comp_channel *channel;
struct ibv_pd *pd;
struct ibv_mr *mr;
struct ibv_cq *cq;
struct ibv_srq *srq;
struct ibv_qp *qp[MAX_QP];
char *buf;
int size;
int send_flags;
int num_qp;
int rx_depth;
int pending[MAX_QP];
struct ibv_port_attr portinfo;
};
struct pingpong_dest {
int lid;
int qpn;
int psn;
union ibv_gid gid;
};
static int pp_connect_ctx(struct pingpong_context *ctx, int port, enum ibv_mtu mtu,
int sl, const struct pingpong_dest *my_dest,
const struct pingpong_dest *dest, int sgid_idx)
{
int i;
for (i = 0; i < ctx->num_qp; ++i) {
struct ibv_qp_attr attr = {
.qp_state = IBV_QPS_RTR,
.path_mtu = mtu,
.dest_qp_num = dest[i].qpn,
.rq_psn = dest[i].psn,
.max_dest_rd_atomic = 1,
.min_rnr_timer = 12,
.ah_attr = {
.is_global = 0,
.dlid = dest[i].lid,
.sl = sl,
.src_path_bits = 0,
.port_num = port
}
};
if (dest->gid.global.interface_id) {
attr.ah_attr.is_global = 1;
attr.ah_attr.grh.hop_limit = 1;
attr.ah_attr.grh.dgid = dest->gid;
attr.ah_attr.grh.sgid_index = sgid_idx;
}
if (ibv_modify_qp(ctx->qp[i], &attr,
IBV_QP_STATE |
IBV_QP_AV |
IBV_QP_PATH_MTU |
IBV_QP_DEST_QPN |
IBV_QP_RQ_PSN |
IBV_QP_MAX_DEST_RD_ATOMIC |
IBV_QP_MIN_RNR_TIMER)) {
fprintf(stderr, "Failed to modify QP[%d] to RTR\n", i);
return 1;
}
attr.qp_state = IBV_QPS_RTS;
attr.timeout = 14;
attr.retry_cnt = 7;
attr.rnr_retry = 7;
attr.sq_psn = my_dest[i].psn;
attr.max_rd_atomic = 1;
if (ibv_modify_qp(ctx->qp[i], &attr,
IBV_QP_STATE |
IBV_QP_TIMEOUT |
IBV_QP_RETRY_CNT |
IBV_QP_RNR_RETRY |
IBV_QP_SQ_PSN |
IBV_QP_MAX_QP_RD_ATOMIC)) {
fprintf(stderr, "Failed to modify QP[%d] to RTS\n", i);
return 1;
}
}
return 0;
}
static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
const struct pingpong_dest *my_dest)
{
struct addrinfo *res, *t;
struct addrinfo hints = {
.ai_family = AF_UNSPEC,
.ai_socktype = SOCK_STREAM
};
char *service;
char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
int n;
int r;
int i;
int sockfd = -1;
struct pingpong_dest *rem_dest = NULL;
char gid[33];
if (asprintf(&service, "%d", port) < 0)
return NULL;
n = getaddrinfo(servername, service, &hints, &res);
if (n < 0) {
fprintf(stderr, "Error for %s:%d\n", servername, port);
free(service);
return NULL;
}
for (t = res; t; t = t->ai_next) {
sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
if (sockfd >= 0) {
if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
break;
close(sockfd);
sockfd = -1;
}
}
freeaddrinfo(res);
free(service);
if (sockfd < 0) {
fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
return NULL;
}
for (i = 0; i < MAX_QP; ++i) {
gid_to_wire_gid(&my_dest[i].gid, gid);
sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid,
my_dest[i].qpn, my_dest[i].psn, gid);
if (write(sockfd, msg, sizeof msg) != sizeof msg) {
fprintf(stderr, "Couldn't send local address\n");
goto out;
}
}
rem_dest = malloc(MAX_QP * sizeof *rem_dest);
if (!rem_dest)
goto out;
for (i = 0; i < MAX_QP; ++i) {
n = 0;
while (n < sizeof msg) {
r = read(sockfd, msg + n, sizeof msg - n);
if (r < 0) {
perror("client read");
fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
n, (int) sizeof msg, i);
goto out;
}
n += r;
}
sscanf(msg, "%x:%x:%x:%s", &rem_dest[i].lid, &rem_dest[i].qpn,
&rem_dest[i].psn, gid);
wire_gid_to_gid(gid, &rem_dest[i].gid);
}
if (write(sockfd, "done", sizeof "done") != sizeof "done") {
perror("client write");
goto out;
}
out:
close(sockfd);
return rem_dest;
}
static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
int ib_port, enum ibv_mtu mtu,
int port, int sl,
const struct pingpong_dest *my_dest,
int sgid_idx)
{
struct addrinfo *res, *t;
struct addrinfo hints = {
.ai_flags = AI_PASSIVE,
.ai_family = AF_UNSPEC,
.ai_socktype = SOCK_STREAM
};
char *service;
char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
int n;
int r;
int i;
int sockfd = -1, connfd;
struct pingpong_dest *rem_dest = NULL;
char gid[33];
if (asprintf(&service, "%d", port) < 0)
return NULL;
n = getaddrinfo(NULL, service, &hints, &res);
if (n < 0) {
fprintf(stderr, "Error for port %d\n", port);
free(service);
return NULL;
}
for (t = res; t; t = t->ai_next) {
sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
if (sockfd >= 0) {
n = 1;
setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
break;
close(sockfd);
sockfd = -1;
}
}
freeaddrinfo(res);
free(service);
if (sockfd < 0) {
fprintf(stderr, "Couldn't listen to port %d\n", port);
return NULL;
}
listen(sockfd, 1);
connfd = accept(sockfd, NULL, NULL);
close(sockfd);
if (connfd < 0) {
fprintf(stderr, "accept() failed\n");
return NULL;
}
rem_dest = malloc(MAX_QP * sizeof *rem_dest);
if (!rem_dest)
goto out;
for (i = 0; i < MAX_QP; ++i) {
n = 0;
while (n < sizeof msg) {
r = read(connfd, msg + n, sizeof msg - n);
if (r < 0) {
perror("server read");
fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
n, (int) sizeof msg, i);
goto out;
}
n += r;
}
sscanf(msg, "%x:%x:%x:%s", &rem_dest[i].lid, &rem_dest[i].qpn,
&rem_dest[i].psn, gid);
wire_gid_to_gid(gid, &rem_dest[i].gid);
}
if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest,
sgid_idx)) {
fprintf(stderr, "Couldn't connect to remote QP\n");
free(rem_dest);
rem_dest = NULL;
goto out;
}
for (i = 0; i < MAX_QP; ++i) {
gid_to_wire_gid(&my_dest[i].gid, gid);
sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid,
my_dest[i].qpn, my_dest[i].psn, gid);
if (write(connfd, msg, sizeof msg) != sizeof msg) {
fprintf(stderr, "Couldn't send local address\n");
free(rem_dest);
rem_dest = NULL;
goto out;
}
}
if (read(connfd, msg, sizeof msg) != sizeof "done") {
perror("client write");
free(rem_dest);
rem_dest = NULL;
goto out;
}
out:
close(connfd);
return rem_dest;
}
static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
int num_qp, int rx_depth, int port,
int use_event)
{
struct pingpong_context *ctx;
int i;
ctx = calloc(1, sizeof *ctx);
if (!ctx)
return NULL;
ctx->size = size;
ctx->send_flags = IBV_SEND_SIGNALED;
ctx->num_qp = num_qp;
ctx->rx_depth = rx_depth;
ctx->buf = memalign(page_size, size);
if (!ctx->buf) {
fprintf(stderr, "Couldn't allocate work buf.\n");
goto clean_ctx;
}
memset(ctx->buf, 0, size);
ctx->context = ibv_open_device(ib_dev);
if (!ctx->context) {
fprintf(stderr, "Couldn't get context for %s\n",
ibv_get_device_name(ib_dev));
goto clean_buffer;
}
if (use_event) {
ctx->channel = ibv_create_comp_channel(ctx->context);
if (!ctx->channel) {
fprintf(stderr, "Couldn't create completion channel\n");
goto clean_device;
}
} else
ctx->channel = NULL;
ctx->pd = ibv_alloc_pd(ctx->context);
if (!ctx->pd) {
fprintf(stderr, "Couldn't allocate PD\n");
goto clean_comp_channel;
}
ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE);
if (!ctx->mr) {
fprintf(stderr, "Couldn't register MR\n");
goto clean_pd;
}
ctx->cq = ibv_create_cq(ctx->context, rx_depth + num_qp, NULL,
ctx->channel, 0);
if (!ctx->cq) {
fprintf(stderr, "Couldn't create CQ\n");
goto clean_mr;
}
{
struct ibv_srq_init_attr attr = {
.attr = {
.max_wr = rx_depth,
.max_sge = 1
}
};
ctx->srq = ibv_create_srq(ctx->pd, &attr);
if (!ctx->srq) {
fprintf(stderr, "Couldn't create SRQ\n");
goto clean_cq;
}
}
for (i = 0; i < num_qp; ++i) {
struct ibv_qp_attr attr;
struct ibv_qp_init_attr init_attr = {
.send_cq = ctx->cq,
.recv_cq = ctx->cq,
.srq = ctx->srq,
.cap = {
.max_send_wr = 1,
.max_send_sge = 1,
},
.qp_type = IBV_QPT_RC
};
ctx->qp[i] = ibv_create_qp(ctx->pd, &init_attr);
if (!ctx->qp[i]) {
fprintf(stderr, "Couldn't create QP[%d]\n", i);
goto clean_qps;
}
ibv_query_qp(ctx->qp[i], &attr, IBV_QP_CAP, &init_attr);
if (init_attr.cap.max_inline_data >= size) {
ctx->send_flags |= IBV_SEND_INLINE;
}
}
for (i = 0; i < num_qp; ++i) {
struct ibv_qp_attr attr = {
.qp_state = IBV_QPS_INIT,
.pkey_index = 0,
.port_num = port,
.qp_access_flags = 0
};
if (ibv_modify_qp(ctx->qp[i], &attr,
IBV_QP_STATE |
IBV_QP_PKEY_INDEX |
IBV_QP_PORT |
IBV_QP_ACCESS_FLAGS)) {
fprintf(stderr, "Failed to modify QP[%d] to INIT\n", i);
goto clean_qps_full;
}
}
return ctx;
clean_qps_full:
i = num_qp;
clean_qps:
for (--i; i >= 0; --i)
ibv_destroy_qp(ctx->qp[i]);
ibv_destroy_srq(ctx->srq);
clean_cq:
ibv_destroy_cq(ctx->cq);
clean_mr:
ibv_dereg_mr(ctx->mr);
clean_pd:
ibv_dealloc_pd(ctx->pd);
clean_comp_channel:
if (ctx->channel)
ibv_destroy_comp_channel(ctx->channel);
clean_device:
ibv_close_device(ctx->context);
clean_buffer:
free(ctx->buf);
clean_ctx:
free(ctx);
return NULL;
}
static int pp_close_ctx(struct pingpong_context *ctx, int num_qp)
{
int i;
for (i = 0; i < num_qp; ++i) {
if (ibv_destroy_qp(ctx->qp[i])) {
fprintf(stderr, "Couldn't destroy QP[%d]\n", i);
return 1;
}
}
if (ibv_destroy_srq(ctx->srq)) {
fprintf(stderr, "Couldn't destroy SRQ\n");
return 1;
}
if (ibv_destroy_cq(ctx->cq)) {
fprintf(stderr, "Couldn't destroy CQ\n");
return 1;
}
if (ibv_dereg_mr(ctx->mr)) {
fprintf(stderr, "Couldn't deregister MR\n");
return 1;
}
if (ibv_dealloc_pd(ctx->pd)) {
fprintf(stderr, "Couldn't deallocate PD\n");
return 1;
}
if (ctx->channel) {
if (ibv_destroy_comp_channel(ctx->channel)) {
fprintf(stderr, "Couldn't destroy completion channel\n");
return 1;
}
}
if (ibv_close_device(ctx->context)) {
fprintf(stderr, "Couldn't release context\n");
return 1;
}
free(ctx->buf);
free(ctx);
return 0;
}
static int pp_post_recv(struct pingpong_context *ctx, int n)
{
struct ibv_sge list = {
.addr = (uintptr_t) ctx->buf,
.length = ctx->size,
.lkey = ctx->mr->lkey
};
struct ibv_recv_wr wr = {
.wr_id = PINGPONG_RECV_WRID,
.sg_list = &list,
.num_sge = 1,
};
struct ibv_recv_wr *bad_wr;
int i;
for (i = 0; i < n; ++i)
if (ibv_post_srq_recv(ctx->srq, &wr, &bad_wr))
break;
return i;
}
static int pp_post_send(struct pingpong_context *ctx, int qp_index)
{
struct ibv_sge list = {
.addr = (uintptr_t) ctx->buf,
.length = ctx->size,
.lkey = ctx->mr->lkey
};
struct ibv_send_wr wr = {
.wr_id = PINGPONG_SEND_WRID,
.sg_list = &list,
.num_sge = 1,
.opcode = IBV_WR_SEND,
.send_flags = ctx->send_flags,
};
struct ibv_send_wr *bad_wr;
return ibv_post_send(ctx->qp[qp_index], &wr, &bad_wr);
}
static int find_qp(int qpn, struct pingpong_context *ctx, int num_qp)
{
int i;
for (i = 0; i < num_qp; ++i)
if (ctx->qp[i]->qp_num == qpn)
return i;
return -1;
}
static void usage(const char *argv0)
{
printf("Usage:\n");
printf(" %s start a server and wait for connection\n", argv0);
printf(" %s <host> connect to server at <host>\n", argv0);
printf("\n");
printf("Options:\n");
printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n");
printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n");
printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n");
printf(" -s, --size=<size> size of message to exchange (default 4096)\n");
printf(" -m, --mtu=<size> path MTU (default 1024)\n");
printf(" -q, --num-qp=<num> number of QPs to use (default 16)\n");
printf(" -r, --rx-depth=<dep> number of receives to post at a time (default 500)\n");
printf(" -n, --iters=<iters> number of exchanges per QP(default 1000)\n");
printf(" -l, --sl=<sl> service level value\n");
printf(" -e, --events sleep on CQ events (default poll)\n");
printf(" -g, --gid-idx=<gid index> local port gid index\n");
printf(" -c, --chk validate received buffer\n");
}
int main(int argc, char *argv[])
{
struct ibv_device **dev_list;
struct ibv_device *ib_dev;
struct ibv_wc *wc;
struct pingpong_context *ctx;
struct pingpong_dest my_dest[MAX_QP];
struct pingpong_dest *rem_dest;
struct timeval start, end;
char *ib_devname = NULL;
char *servername = NULL;
unsigned int port = 18515;
int ib_port = 1;
unsigned int size = 4096;
enum ibv_mtu mtu = IBV_MTU_1024;
unsigned int num_qp = 16;
unsigned int rx_depth = 500;
unsigned int iters = 1000;
int use_event = 0;
int routs;
int rcnt, scnt;
int num_wc;
int i;
int num_cq_events = 0;
int sl = 0;
int gidx = -1;
char gid[33];
srand48(getpid() * time(NULL));
while (1) {
int c;
static struct option long_options[] = {
{ .name = "port", .has_arg = 1, .val = 'p' },
{ .name = "ib-dev", .has_arg = 1, .val = 'd' },
{ .name = "ib-port", .has_arg = 1, .val = 'i' },
{ .name = "size", .has_arg = 1, .val = 's' },
{ .name = "mtu", .has_arg = 1, .val = 'm' },
{ .name = "num-qp", .has_arg = 1, .val = 'q' },
{ .name = "rx-depth", .has_arg = 1, .val = 'r' },
{ .name = "iters", .has_arg = 1, .val = 'n' },
{ .name = "sl", .has_arg = 1, .val = 'l' },
{ .name = "events", .has_arg = 0, .val = 'e' },
{ .name = "gid-idx", .has_arg = 1, .val = 'g' },
{ .name = "chk", .has_arg = 0, .val = 'c' },
{}
};
c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:eg:c:",
long_options, NULL);
if (c == -1)
break;
switch (c) {
case 'p':
port = strtoul(optarg, NULL, 0);
if (port > 65535) {
usage(argv[0]);
return 1;
}
break;
case 'd':
ib_devname = strdupa(optarg);
break;
case 'i':
ib_port = strtol(optarg, NULL, 0);
if (ib_port < 1) {
usage(argv[0]);
return 1;
}
break;
case 's':
size = strtoul(optarg, NULL, 0);
if (size < 1) {
usage(argv[0]);
return 1;
}
break;
case 'm':
mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
if (mtu == 0) {
usage(argv[0]);
return 1;
}
break;
case 'q':
num_qp = strtoul(optarg, NULL, 0);
break;
case 'r':
rx_depth = strtoul(optarg, NULL, 0);
break;
case 'n':
iters = strtoul(optarg, NULL, 0);
break;
case 'l':
sl = strtol(optarg, NULL, 0);
break;
case 'e':
++use_event;
break;
case 'g':
gidx = strtol(optarg, NULL, 0);
break;
case 'c':
validate_buf = 1;
break;
default:
usage(argv[0]);
return 1;
}
}
if (optind == argc - 1)
servername = strdupa(argv[optind]);
else if (optind < argc) {
usage(argv[0]);
return 1;
}
if (num_qp > rx_depth) {
fprintf(stderr, "rx_depth %d is too small for %d QPs -- "
"must have at least one receive per QP.\n",
rx_depth, num_qp);
return 1;
}
num_wc = num_qp + rx_depth;
wc = alloca(num_wc * sizeof *wc);
page_size = PAGE_SIZE;
dev_list = ibv_get_device_list(NULL);
if (!dev_list) {
perror("Failed to get IB devices list");
return 1;
}
if (!ib_devname) {
ib_dev = *dev_list;
if (!ib_dev) {
fprintf(stderr, "No IB devices found\n");
return 1;
}
} else {
for (i = 0; dev_list[i]; ++i)
if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname))
break;
ib_dev = dev_list[i];
if (!ib_dev) {
fprintf(stderr, "IB device %s not found\n", ib_devname);
return 1;
}
}
ctx = pp_init_ctx(ib_dev, size, num_qp, rx_depth, ib_port, use_event);
if (!ctx)
return 1;
routs = pp_post_recv(ctx, ctx->rx_depth);
if (routs < ctx->rx_depth) {
fprintf(stderr, "Couldn't post receive (%d)\n", routs);
return 1;
}
if (use_event)
if (ibv_req_notify_cq(ctx->cq, 0)) {
fprintf(stderr, "Couldn't request CQ notification\n");
return 1;
}
memset(my_dest, 0, sizeof my_dest);
if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
fprintf(stderr, "Couldn't get port info\n");
return 1;
}
for (i = 0; i < num_qp; ++i) {
my_dest[i].qpn = ctx->qp[i]->qp_num;
my_dest[i].psn = lrand48() & 0xffffff;
my_dest[i].lid = ctx->portinfo.lid;
if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET
&& !my_dest[i].lid) {
fprintf(stderr, "Couldn't get local LID\n");
return 1;
}
if (gidx >= 0) {
if (ibv_query_gid(ctx->context, ib_port, gidx,
&my_dest[i].gid)) {
fprintf(stderr, "Could not get local gid for "
"gid index %d\n", gidx);
return 1;
}
} else
memset(&my_dest[i].gid, 0, sizeof my_dest[i].gid);
printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, \n",
my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn);
}
if (servername)
rem_dest = pp_client_exch_dest(servername, port, my_dest);
else
rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl,
my_dest, gidx);
if (!rem_dest)
return 1;
for (i = 0; i < num_qp; ++i) {
printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
rem_dest[i].lid, rem_dest[i].qpn, rem_dest[i].psn);
}
if (servername)
if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest, gidx))
return 1;
if (servername) {
if (validate_buf)
for (i = 0; i < size; i += page_size)
ctx->buf[i] = i / page_size % sizeof(char);
for (i = 0; i < num_qp; ++i) {
if (pp_post_send(ctx, i)) {
fprintf(stderr, "Couldn't post send\n");
return 1;
}
ctx->pending[i] = PINGPONG_SEND_WRID | PINGPONG_RECV_WRID;
}
} else
for (i = 0; i < num_qp; ++i)
ctx->pending[i] = PINGPONG_RECV_WRID;
if (gettimeofday(&start, NULL)) {
perror("gettimeofday");
return 1;
}
rcnt = scnt = 0;
while (rcnt < iters || scnt < iters) {
if (use_event) {
struct ibv_cq *ev_cq;
void *ev_ctx;
if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
fprintf(stderr, "Failed to get cq_event\n");
return 1;
}
++num_cq_events;
if (ev_cq != ctx->cq) {
fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
return 1;
}
if (ibv_req_notify_cq(ctx->cq, 0)) {
fprintf(stderr, "Couldn't request CQ notification\n");
return 1;
}
}
{
int ne, qp_ind;
do {
ne = ibv_poll_cq(ctx->cq, num_wc, wc);
if (ne < 0) {
fprintf(stderr, "poll CQ failed %d\n", ne);
return 1;
}
} while (!use_event && ne < 1);
for (i = 0; i < ne; ++i) {
if (wc[i].status != IBV_WC_SUCCESS) {
fprintf(stderr, "Failed status %s (%d) for wr_id %d\n",
ibv_wc_status_str(wc[i].status), wc[i].status, (int) wc[i].wr_id);
return 1;
}
qp_ind = find_qp(wc[i].qp_num, ctx, num_qp);
if (qp_ind < 0) {
fprintf(stderr, "Couldn't find QPN %06x\n",
wc[i].qp_num);
return 1;
}
switch ((int) wc[i].wr_id) {
case PINGPONG_SEND_WRID:
++scnt;
break;
case PINGPONG_RECV_WRID:
if (--routs <= num_qp) {
routs += pp_post_recv(ctx, ctx->rx_depth - routs);
if (routs < ctx->rx_depth) {
fprintf(stderr,
"Couldn't post receive (%d)\n",
routs);
return 1;
}
}
++rcnt;
break;
default:
fprintf(stderr, "Completion for unknown wr_id %d\n",
(int) wc[i].wr_id);
return 1;
}
ctx->pending[qp_ind] &= ~(int) wc[i].wr_id;
if (scnt < iters && !ctx->pending[qp_ind]) {
if (pp_post_send(ctx, qp_ind)) {
fprintf(stderr, "Couldn't post send\n");
return 1;
}
ctx->pending[qp_ind] = PINGPONG_RECV_WRID |
PINGPONG_SEND_WRID;
}
}
}
}
if (gettimeofday(&end, NULL)) {
perror("gettimeofday");
return 1;
}
{
float usec = (end.tv_sec - start.tv_sec) * 1000000 +
(end.tv_usec - start.tv_usec);
long long bytes = (long long) size * iters * 2;
printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
bytes, usec / 1000000., bytes * 8. / usec);
printf("%d iters in %.2f seconds = %.2f usec/iter\n",
iters, usec / 1000000., usec / iters);
if ((!servername) && (validate_buf)) {
for (i = 0; i < size; i += page_size)
if (ctx->buf[i] != i / page_size % sizeof(char))
printf("invalid data in page %d\n",
i / page_size);
}
}
ibv_ack_cq_events(ctx->cq, num_cq_events);
if (pp_close_ctx(ctx, num_qp))
return 1;
ibv_free_device_list(dev_list);
free(rem_dest);
return 0;
}

872
usr/tests/ib/uc_pingpong.c Normal file
View file

@ -0,0 +1,872 @@
/*
* Copyright (c) 2005 Topspin Communications.
* 2018 Annika Wierichs, RWTH Aachen.
* All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <netdb.h>
#include <netinet/in.h>
#include <malloc.h>
#include <getopt.h>
#include <time.h>
#include "pingpong.h"
enum {
PINGPONG_RECV_WRID = 1,
PINGPONG_SEND_WRID = 2,
};
static int page_size;
static int validate_buf;
struct pingpong_context {
struct ibv_context *context;
struct ibv_comp_channel *channel;
struct ibv_pd *pd;
struct ibv_mr *mr;
struct ibv_cq *cq;
struct ibv_qp *qp;
char *buf;
int size;
int send_flags;
int rx_depth;
int pending;
struct ibv_port_attr portinfo;
};
struct pingpong_dest {
int lid;
int qpn;
int psn;
union ibv_gid gid;
};
static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
enum ibv_mtu mtu, int sl,
struct pingpong_dest *dest, int sgid_idx)
{
struct ibv_qp_attr attr = {
.qp_state = IBV_QPS_RTR,
.path_mtu = mtu,
.dest_qp_num = dest->qpn,
.rq_psn = dest->psn,
.ah_attr = {
.is_global = 0,
.dlid = dest->lid,
.sl = sl,
.src_path_bits = 0,
.port_num = port
}
};
if (dest->gid.global.interface_id) {
attr.ah_attr.is_global = 1;
attr.ah_attr.grh.hop_limit = 1;
attr.ah_attr.grh.dgid = dest->gid;
attr.ah_attr.grh.sgid_index = sgid_idx;
}
if (ibv_modify_qp(ctx->qp, &attr,
IBV_QP_STATE |
IBV_QP_AV |
IBV_QP_PATH_MTU |
IBV_QP_DEST_QPN |
IBV_QP_RQ_PSN)) {
fprintf(stderr, "Failed to modify QP to RTR\n");
return 1;
}
attr.qp_state = IBV_QPS_RTS;
attr.sq_psn = my_psn;
if (ibv_modify_qp(ctx->qp, &attr,
IBV_QP_STATE |
IBV_QP_SQ_PSN)) {
fprintf(stderr, "Failed to modify QP to RTS\n");
return 1;
}
return 0;
}
static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
const struct pingpong_dest *my_dest)
{
struct addrinfo *res, *t;
struct addrinfo hints = {
.ai_family = AF_UNSPEC,
.ai_socktype = SOCK_STREAM
};
char *service;
char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
int n;
int sockfd = -1;
struct pingpong_dest *rem_dest = NULL;
char gid[33];
if (asprintf(&service, "%d", port) < 0)
return NULL;
n = getaddrinfo(servername, service, &hints, &res);
if (n < 0) {
fprintf(stderr, "Error for %s:%d\n", servername, port);
free(service);
return NULL;
}
for (t = res; t; t = t->ai_next) {
sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
if (sockfd >= 0) {
if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
break;
close(sockfd);
sockfd = -1;
}
}
freeaddrinfo(res);
free(service);
if (sockfd < 0) {
fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
return NULL;
}
gid_to_wire_gid(&my_dest->gid, gid);
sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn,
my_dest->psn, gid);
if (write(sockfd, msg, sizeof msg) != sizeof msg) {
fprintf(stderr, "Couldn't send local address\n");
goto out;
}
if (read(sockfd, msg, sizeof msg) != sizeof msg ||
write(sockfd, "done", sizeof "done") != sizeof "done") {
perror("client read/write");
fprintf(stderr, "Couldn't read/write remote address\n");
goto out;
}
rem_dest = malloc(sizeof *rem_dest);
if (!rem_dest)
goto out;
sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn,
&rem_dest->psn, gid);
wire_gid_to_gid(gid, &rem_dest->gid);
out:
close(sockfd);
return rem_dest;
}
static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
int ib_port, enum ibv_mtu mtu,
int port, int sl,
const struct pingpong_dest *my_dest,
int sgid_idx)
{
struct addrinfo *res, *t;
struct addrinfo hints = {
.ai_flags = AI_PASSIVE,
.ai_family = AF_UNSPEC,
.ai_socktype = SOCK_STREAM
};
char *service;
char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
int n;
int sockfd = -1, connfd;
struct pingpong_dest *rem_dest = NULL;
char gid[33];
if (asprintf(&service, "%d", port) < 0)
return NULL;
n = getaddrinfo(NULL, service, &hints, &res);
if (n < 0) {
fprintf(stderr, "Error for port %d\n", port);
free(service);
return NULL;
}
for (t = res; t; t = t->ai_next) {
sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
if (sockfd >= 0) {
n = 1;
setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
break;
close(sockfd);
sockfd = -1;
}
}
freeaddrinfo(res);
free(service);
if (sockfd < 0) {
fprintf(stderr, "Couldn't listen to port %d\n", port);
return NULL;
}
listen(sockfd, 1);
connfd = accept(sockfd, NULL, NULL);
close(sockfd);
if (connfd < 0) {
fprintf(stderr, "accept() failed\n");
return NULL;
}
n = read(connfd, msg, sizeof msg);
if (n != sizeof msg) {
perror("server read");
fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg);
goto out;
}
rem_dest = malloc(sizeof *rem_dest);
if (!rem_dest)
goto out;
sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn,
&rem_dest->psn, gid);
wire_gid_to_gid(gid, &rem_dest->gid);
if (pp_connect_ctx(ctx, ib_port, my_dest->psn, mtu, sl, rem_dest,
sgid_idx)) {
fprintf(stderr, "Couldn't connect to remote QP\n");
free(rem_dest);
rem_dest = NULL;
goto out;
}
gid_to_wire_gid(&my_dest->gid, gid);
sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn,
my_dest->psn, gid);
if (write(connfd, msg, sizeof msg) != sizeof msg ||
read(connfd, msg, sizeof msg) != sizeof "done") {
fprintf(stderr, "Couldn't send/recv local address\n");
free(rem_dest);
rem_dest = NULL;
goto out;
}
out:
close(connfd);
return rem_dest;
}
static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
int rx_depth, int port,
int use_event)
{
struct pingpong_context *ctx;
ctx = calloc(1, sizeof *ctx);
if (!ctx)
return NULL;
ctx->size = size;
ctx->send_flags = IBV_SEND_SIGNALED;
ctx->rx_depth = rx_depth;
ctx->buf = memalign(page_size, size);
if (!ctx->buf) {
fprintf(stderr, "Couldn't allocate work buf.\n");
goto clean_ctx;
}
/* FIXME memset(ctx->buf, 0, size); */
memset(ctx->buf, 0x7b, size);
ctx->context = ibv_open_device(ib_dev);
if (!ctx->context) {
fprintf(stderr, "Couldn't get context for %s\n",
ibv_get_device_name(ib_dev));
goto clean_buffer;
}
if (use_event) {
ctx->channel = ibv_create_comp_channel(ctx->context);
if (!ctx->channel) {
fprintf(stderr, "Couldn't create completion channel\n");
goto clean_device;
}
} else
ctx->channel = NULL;
ctx->pd = ibv_alloc_pd(ctx->context);
if (!ctx->pd) {
fprintf(stderr, "Couldn't allocate PD\n");
goto clean_comp_channel;
}
ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE);
if (!ctx->mr) {
fprintf(stderr, "Couldn't register MR\n");
goto clean_pd;
}
ctx->cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL,
ctx->channel, 0);
if (!ctx->cq) {
fprintf(stderr, "Couldn't create CQ\n");
goto clean_mr;
}
{
struct ibv_qp_attr attr;
struct ibv_qp_init_attr init_attr = {
.send_cq = ctx->cq,
.recv_cq = ctx->cq,
.cap = {
.max_send_wr = 1,
.max_recv_wr = rx_depth,
.max_send_sge = 1,
.max_recv_sge = 1
},
.qp_type = IBV_QPT_UC
};
ctx->qp = ibv_create_qp(ctx->pd, &init_attr);
if (!ctx->qp) {
fprintf(stderr, "Couldn't create QP\n");
goto clean_cq;
}
ibv_query_qp(ctx->qp, &attr, IBV_QP_CAP, &init_attr);
if (init_attr.cap.max_inline_data >= size) {
ctx->send_flags |= IBV_SEND_INLINE;
}
}
{
struct ibv_qp_attr attr = {
.qp_state = IBV_QPS_INIT,
.pkey_index = 0,
.port_num = port,
.qp_access_flags = 0
};
if (ibv_modify_qp(ctx->qp, &attr,
IBV_QP_STATE |
IBV_QP_PKEY_INDEX |
IBV_QP_PORT |
IBV_QP_ACCESS_FLAGS)) {
fprintf(stderr, "Failed to modify QP to INIT\n");
goto clean_qp;
}
}
return ctx;
clean_qp:
ibv_destroy_qp(ctx->qp);
clean_cq:
ibv_destroy_cq(ctx->cq);
clean_mr:
ibv_dereg_mr(ctx->mr);
clean_pd:
ibv_dealloc_pd(ctx->pd);
clean_comp_channel:
if (ctx->channel)
ibv_destroy_comp_channel(ctx->channel);
clean_device:
ibv_close_device(ctx->context);
clean_buffer:
free(ctx->buf);
clean_ctx:
free(ctx);
return NULL;
}
static int pp_close_ctx(struct pingpong_context *ctx)
{
if (ibv_destroy_qp(ctx->qp)) {
fprintf(stderr, "Couldn't destroy QP\n");
return 1;
}
if (ibv_destroy_cq(ctx->cq)) {
fprintf(stderr, "Couldn't destroy CQ\n");
return 1;
}
if (ibv_dereg_mr(ctx->mr)) {
fprintf(stderr, "Couldn't deregister MR\n");
return 1;
}
if (ibv_dealloc_pd(ctx->pd)) {
fprintf(stderr, "Couldn't deallocate PD\n");
return 1;
}
if (ctx->channel) {
if (ibv_destroy_comp_channel(ctx->channel)) {
fprintf(stderr, "Couldn't destroy completion channel\n");
return 1;
}
}
if (ibv_close_device(ctx->context)) {
fprintf(stderr, "Couldn't release context\n");
return 1;
}
free(ctx->buf);
free(ctx);
return 0;
}
static int pp_post_recv(struct pingpong_context *ctx, int n)
{
struct ibv_sge list = {
.addr = (uintptr_t) ctx->buf,
.length = ctx->size,
.lkey = ctx->mr->lkey
};
struct ibv_recv_wr wr = {
.wr_id = PINGPONG_RECV_WRID,
.sg_list = &list,
.num_sge = 1,
};
struct ibv_recv_wr *bad_wr;
int i;
for (i = 0; i < n; ++i)
if (ibv_post_recv(ctx->qp, &wr, &bad_wr))
break;
return i;
}
static int pp_post_send(struct pingpong_context *ctx)
{
struct ibv_sge list = {
.addr = (uintptr_t) ctx->buf,
.length = ctx->size,
.lkey = ctx->mr->lkey
};
struct ibv_send_wr wr = {
.wr_id = PINGPONG_SEND_WRID,
.sg_list = &list,
.num_sge = 1,
.opcode = IBV_WR_SEND,
.send_flags = ctx->send_flags,
};
struct ibv_send_wr *bad_wr;
return ibv_post_send(ctx->qp, &wr, &bad_wr);
}
static void usage(const char *argv0)
{
printf("Usage:\n");
printf(" %s start a server and wait for connection\n", argv0);
printf(" %s <host> connect to server at <host>\n", argv0);
printf("\n");
printf("Options:\n");
printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n");
printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n");
printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n");
printf(" -s, --size=<size> size of message to exchange (default 4096)\n");
printf(" -m, --mtu=<size> path MTU (default 1024)\n");
printf(" -r, --rx-depth=<dep> number of receives to post at a time (default 500)\n");
printf(" -n, --iters=<iters> number of exchanges (default 1000)\n");
printf(" -l, --sl=<sl> service level value\n");
printf(" -e, --events sleep on CQ events (default poll)\n");
printf(" -g, --gid-idx=<gid index> local port gid index\n");
printf(" -c, --chk validate received buffer\n");
}
int main(int argc, char *argv[])
{
struct ibv_device **dev_list;
struct ibv_device *ib_dev;
struct pingpong_context *ctx;
struct pingpong_dest my_dest;
struct pingpong_dest *rem_dest;
struct timeval start, end;
char *ib_devname = NULL;
char *servername = NULL;
unsigned int port = 18515;
int ib_port = 1;
unsigned int size = 4096;
enum ibv_mtu mtu = IBV_MTU_1024;
unsigned int rx_depth = 500;
unsigned int iters = 1000;
int use_event = 0;
int routs;
int rcnt, scnt;
int num_cq_events = 0;
int sl = 0;
int gidx = -1;
char gid[33];
srand48(getpid() * time(NULL));
while (1) {
int c;
static struct option long_options[] = {
{ .name = "port", .has_arg = 1, .val = 'p' },
{ .name = "ib-dev", .has_arg = 1, .val = 'd' },
{ .name = "ib-port", .has_arg = 1, .val = 'i' },
{ .name = "size", .has_arg = 1, .val = 's' },
{ .name = "mtu", .has_arg = 1, .val = 'm' },
{ .name = "rx-depth", .has_arg = 1, .val = 'r' },
{ .name = "iters", .has_arg = 1, .val = 'n' },
{ .name = "sl", .has_arg = 1, .val = 'l' },
{ .name = "events", .has_arg = 0, .val = 'e' },
{ .name = "gid-idx", .has_arg = 1, .val = 'g' },
{ .name = "chk", .has_arg = 0, .val = 'c' },
{}
};
c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:c:",
long_options, NULL);
if (c == -1)
break;
switch (c) {
case 'p':
port = strtoul(optarg, NULL, 0);
if (port > 65535) {
usage(argv[0]);
return 1;
}
break;
case 'd':
ib_devname = strdupa(optarg);
break;
case 'i':
ib_port = strtol(optarg, NULL, 0);
if (ib_port < 1) {
usage(argv[0]);
return 1;
}
break;
case 's':
size = strtoul(optarg, NULL, 0);
break;
case 'm':
mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
if (mtu == 0) {
usage(argv[0]);
return 1;
}
break;
case 'r':
rx_depth = strtoul(optarg, NULL, 0);
break;
case 'n':
iters = strtoul(optarg, NULL, 0);
break;
case 'l':
sl = strtol(optarg, NULL, 0);
break;
case 'e':
++use_event;
break;
case 'g':
gidx = strtol(optarg, NULL, 0);
break;
case 'c':
validate_buf = 1;
break;
default:
usage(argv[0]);
return 1;
}
}
if (optind == argc - 1)
servername = strdupa(argv[optind]);
else if (optind < argc) {
usage(argv[0]);
return 1;
}
page_size = PAGE_SIZE;
dev_list = ibv_get_device_list(NULL);
if (!dev_list) {
perror("Failed to get IB devices list");
return 1;
}
if (!ib_devname) {
ib_dev = *dev_list;
if (!ib_dev) {
fprintf(stderr, "No IB devices found\n");
return 1;
}
} else {
int i;
for (i = 0; dev_list[i]; ++i)
if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname))
break;
ib_dev = dev_list[i];
if (!ib_dev) {
fprintf(stderr, "IB device %s not found\n", ib_devname);
return 1;
}
}
ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event);
if (!ctx)
return 1;
routs = pp_post_recv(ctx, ctx->rx_depth);
if (routs < ctx->rx_depth) {
fprintf(stderr, "Couldn't post receive (%d)\n", routs);
return 1;
}
if (use_event)
if (ibv_req_notify_cq(ctx->cq, 0)) {
fprintf(stderr, "Couldn't request CQ notification\n");
return 1;
}
if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
fprintf(stderr, "Couldn't get port info\n");
return 1;
}
my_dest.lid = ctx->portinfo.lid;
if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET &&
!my_dest.lid) {
fprintf(stderr, "Couldn't get local LID\n");
return 1;
}
if (gidx >= 0) {
if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest.gid)) {
fprintf(stderr, "can't read sgid of index %d\n", gidx);
return 1;
}
} else
memset(&my_dest.gid, 0, sizeof my_dest.gid);
my_dest.qpn = ctx->qp->qp_num;
my_dest.psn = lrand48() & 0xffffff;
printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
my_dest.lid, my_dest.qpn, my_dest.psn);
if (servername)
rem_dest = pp_client_exch_dest(servername, port, &my_dest);
else
rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl,
&my_dest, gidx);
if (!rem_dest)
return 1;
printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
rem_dest->lid, rem_dest->qpn, rem_dest->psn);
if (servername)
if (pp_connect_ctx(ctx, ib_port, my_dest.psn, mtu, sl, rem_dest,
gidx))
return 1;
ctx->pending = PINGPONG_RECV_WRID;
if (servername) {
if (validate_buf)
for (int i = 0; i < size; i += page_size)
ctx->buf[i] = i / page_size % sizeof(char);
if (pp_post_send(ctx)) {
fprintf(stderr, "Couldn't post send\n");
return 1;
}
ctx->pending |= PINGPONG_SEND_WRID;
}
if (gettimeofday(&start, NULL)) {
perror("gettimeofday");
return 1;
}
rcnt = scnt = 0;
while (rcnt < iters || scnt < iters) {
if (use_event) {
struct ibv_cq *ev_cq;
void *ev_ctx;
if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
fprintf(stderr, "Failed to get cq_event\n");
return 1;
}
++num_cq_events;
if (ev_cq != ctx->cq) {
fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
return 1;
}
if (ibv_req_notify_cq(ctx->cq, 0)) {
fprintf(stderr, "Couldn't request CQ notification\n");
return 1;
}
}
{
struct ibv_wc wc[2];
int ne, i;
do {
ne = ibv_poll_cq(ctx->cq, 2, wc);
if (ne < 0) {
fprintf(stderr, "poll CQ failed %d\n", ne);
return 1;
}
} while (!use_event && ne < 1);
for (i = 0; i < ne; ++i) {
if (wc[i].status != IBV_WC_SUCCESS) {
fprintf(stderr, "Failed status %s (%d) for wr_id %d\n",
ibv_wc_status_str(wc[i].status),
wc[i].status, (int) wc[i].wr_id);
return 1;
}
switch ((int) wc[i].wr_id) {
case PINGPONG_SEND_WRID:
++scnt;
break;
case PINGPONG_RECV_WRID:
if (--routs <= 1) {
routs += pp_post_recv(ctx, ctx->rx_depth - routs);
if (routs < ctx->rx_depth) {
fprintf(stderr,
"Couldn't post receive (%d)\n",
routs);
return 1;
}
}
++rcnt;
break;
default:
fprintf(stderr, "Completion for unknown wr_id %d\n",
(int) wc[i].wr_id);
return 1;
}
ctx->pending &= ~(int) wc[i].wr_id;
if (scnt < iters && !ctx->pending) {
if (pp_post_send(ctx)) {
fprintf(stderr, "Couldn't post send\n");
return 1;
}
ctx->pending = PINGPONG_RECV_WRID |
PINGPONG_SEND_WRID;
}
}
}
}
if (gettimeofday(&end, NULL)) {
perror("gettimeofday");
return 1;
}
{
float usec = (end.tv_sec - start.tv_sec) * 1000000 +
(end.tv_usec - start.tv_usec);
long long bytes = (long long) size * iters * 2;
printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
bytes, usec / 1000000., bytes * 8. / usec);
printf("%d iters in %.2f seconds = %.2f usec/iter\n",
iters, usec / 1000000., usec / iters);
if ((!servername) && (validate_buf)) {
for (int i = 0; i < size; i += page_size)
if (ctx->buf[i] != i / page_size % sizeof(char))
printf("invalid data in page %d\n",
i / page_size);
}
}
ibv_ack_cq_events(ctx->cq, num_cq_events);
if (pp_close_ctx(ctx))
return 1;
ibv_free_device_list(dev_list);
free(rem_dest);
return 0;
}

879
usr/tests/ib/ud_pingpong.c Normal file
View file

@ -0,0 +1,879 @@
/*
* Copyright (c) 2005 Topspin Communications.
* 2018 Annika Wierichs, RWTH Aachen.
* All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <netdb.h>
#include <malloc.h>
#include <getopt.h>
#include <netinet/in.h>
#include <time.h>
#include "pingpong.h"
enum {
PINGPONG_RECV_WRID = 1,
PINGPONG_SEND_WRID = 2,
};
static int page_size;
static int validate_buf;
struct pingpong_context {
struct ibv_context *context;
struct ibv_comp_channel *channel;
struct ibv_pd *pd;
struct ibv_mr *mr;
struct ibv_cq *cq;
struct ibv_qp *qp;
struct ibv_ah *ah;
char *buf;
int size;
int send_flags;
int rx_depth;
int pending;
struct ibv_port_attr portinfo;
};
struct pingpong_dest {
int lid;
int qpn;
int psn;
union ibv_gid gid;
};
static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
int sl, struct pingpong_dest *dest, int sgid_idx)
{
struct ibv_ah_attr ah_attr = {
.is_global = 0,
.dlid = dest->lid,
.sl = sl,
.src_path_bits = 0,
.port_num = port
};
struct ibv_qp_attr attr = {
.qp_state = IBV_QPS_RTR
};
if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE)) {
fprintf(stderr, "Failed to modify QP to RTR\n");
return 1;
}
attr.qp_state = IBV_QPS_RTS;
attr.sq_psn = my_psn;
if (ibv_modify_qp(ctx->qp, &attr,
IBV_QP_STATE |
IBV_QP_SQ_PSN)) {
fprintf(stderr, "Failed to modify QP to RTS\n");
return 1;
}
if (dest->gid.global.interface_id) {
ah_attr.is_global = 1;
ah_attr.grh.hop_limit = 1;
ah_attr.grh.dgid = dest->gid;
ah_attr.grh.sgid_index = sgid_idx;
}
ctx->ah = ibv_create_ah(ctx->pd, &ah_attr);
if (!ctx->ah) {
fprintf(stderr, "Failed to create AH\n");
return 1;
}
return 0;
}
static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
const struct pingpong_dest *my_dest)
{
struct addrinfo *res, *t;
struct addrinfo hints = {
.ai_family = AF_UNSPEC,
.ai_socktype = SOCK_STREAM
};
char *service;
char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
int n;
int sockfd = -1;
struct pingpong_dest *rem_dest = NULL;
char gid[33];
if (asprintf(&service, "%d", port) < 0)
return NULL;
n = getaddrinfo(servername, service, &hints, &res);
if (n < 0) {
fprintf(stderr, "Error for %s:%d\n", servername, port);
free(service);
return NULL;
}
for (t = res; t; t = t->ai_next) {
sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
if (sockfd >= 0) {
if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
break;
close(sockfd);
sockfd = -1;
}
}
freeaddrinfo(res);
free(service);
if (sockfd < 0) {
fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
return NULL;
}
gid_to_wire_gid(&my_dest->gid, gid);
sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn,
my_dest->psn, gid);
if (write(sockfd, msg, sizeof msg) != sizeof msg) {
fprintf(stderr, "Couldn't send local address\n");
goto out;
}
if (read(sockfd, msg, sizeof msg) != sizeof msg ||
write(sockfd, "done", sizeof "done") != sizeof "done") {
perror("client read/write");
fprintf(stderr, "Couldn't read/write remote address\n");
goto out;
}
rem_dest = malloc(sizeof *rem_dest);
if (!rem_dest)
goto out;
sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn,
&rem_dest->psn, gid);
wire_gid_to_gid(gid, &rem_dest->gid);
out:
close(sockfd);
return rem_dest;
}
static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
int ib_port, int port, int sl,
const struct pingpong_dest *my_dest,
int sgid_idx)
{
struct addrinfo *res, *t;
struct addrinfo hints = {
.ai_flags = AI_PASSIVE,
.ai_family = AF_UNSPEC,
.ai_socktype = SOCK_STREAM
};
char *service;
char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
int n;
int sockfd = -1, connfd;
struct pingpong_dest *rem_dest = NULL;
char gid[33];
if (asprintf(&service, "%d", port) < 0)
return NULL;
n = getaddrinfo(NULL, service, &hints, &res);
if (n < 0) {
fprintf(stderr, "Error for port %d\n", port);
free(service);
return NULL;
}
for (t = res; t; t = t->ai_next) {
sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
if (sockfd >= 0) {
n = 1;
setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
break;
close(sockfd);
sockfd = -1;
}
}
freeaddrinfo(res);
free(service);
if (sockfd < 0) {
fprintf(stderr, "Couldn't listen to port %d\n", port);
return NULL;
}
listen(sockfd, 1);
connfd = accept(sockfd, NULL, NULL);
close(sockfd);
if (connfd < 0) {
fprintf(stderr, "accept() failed\n");
return NULL;
}
n = read(connfd, msg, sizeof msg);
if (n != sizeof msg) {
perror("server read");
fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg);
goto out;
}
rem_dest = malloc(sizeof *rem_dest);
if (!rem_dest)
goto out;
sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn,
&rem_dest->psn, gid);
wire_gid_to_gid(gid, &rem_dest->gid);
if (pp_connect_ctx(ctx, ib_port, my_dest->psn, sl, rem_dest,
sgid_idx)) {
fprintf(stderr, "Couldn't connect to remote QP\n");
free(rem_dest);
rem_dest = NULL;
goto out;
}
gid_to_wire_gid(&my_dest->gid, gid);
sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn,
my_dest->psn, gid);
if (write(connfd, msg, sizeof msg) != sizeof msg ||
read(connfd, msg, sizeof msg) != sizeof "done") {
fprintf(stderr, "Couldn't send/recv local address\n");
free(rem_dest);
rem_dest = NULL;
goto out;
}
out:
close(connfd);
return rem_dest;
}
static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
int rx_depth, int port,
int use_event)
{
struct pingpong_context *ctx;
ctx = malloc(sizeof *ctx);
if (!ctx)
return NULL;
ctx->size = size;
ctx->send_flags = IBV_SEND_SIGNALED;
ctx->rx_depth = rx_depth;
ctx->buf = memalign(page_size, size + 40);
if (!ctx->buf) {
fprintf(stderr, "Couldn't allocate work buf.\n");
goto clean_ctx;
}
/* FIXME memset(ctx->buf, 0, size + 40); */
memset(ctx->buf, 0x7b, size + 40);
ctx->context = ibv_open_device(ib_dev);
if (!ctx->context) {
fprintf(stderr, "Couldn't get context for %s\n",
ibv_get_device_name(ib_dev));
goto clean_buffer;
}
{
struct ibv_port_attr port_info = {};
int mtu;
if (ibv_query_port(ctx->context, port, &port_info)) {
fprintf(stderr, "Unable to query port info for port %d\n", port);
goto clean_device;
}
mtu = 1 << (port_info.active_mtu + 7);
if (size > mtu) {
fprintf(stderr, "Requested size larger than port MTU (%d)\n", mtu);
goto clean_device;
}
}
if (use_event) {
ctx->channel = ibv_create_comp_channel(ctx->context);
if (!ctx->channel) {
fprintf(stderr, "Couldn't create completion channel\n");
goto clean_device;
}
} else
ctx->channel = NULL;
ctx->pd = ibv_alloc_pd(ctx->context);
if (!ctx->pd) {
fprintf(stderr, "Couldn't allocate PD\n");
goto clean_comp_channel;
}
ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size + 40, IBV_ACCESS_LOCAL_WRITE);
if (!ctx->mr) {
fprintf(stderr, "Couldn't register MR\n");
goto clean_pd;
}
ctx->cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL,
ctx->channel, 0);
if (!ctx->cq) {
fprintf(stderr, "Couldn't create CQ\n");
goto clean_mr;
}
{
struct ibv_qp_attr attr;
struct ibv_qp_init_attr init_attr = {
.send_cq = ctx->cq,
.recv_cq = ctx->cq,
.cap = {
.max_send_wr = 1,
.max_recv_wr = rx_depth,
.max_send_sge = 1,
.max_recv_sge = 1
},
.qp_type = IBV_QPT_UD,
};
ctx->qp = ibv_create_qp(ctx->pd, &init_attr);
if (!ctx->qp) {
fprintf(stderr, "Couldn't create QP\n");
goto clean_cq;
}
ibv_query_qp(ctx->qp, &attr, IBV_QP_CAP, &init_attr);
if (init_attr.cap.max_inline_data >= size) {
ctx->send_flags |= IBV_SEND_INLINE;
}
}
{
struct ibv_qp_attr attr = {
.qp_state = IBV_QPS_INIT,
.pkey_index = 0,
.port_num = port,
.qkey = 0x11111111
};
if (ibv_modify_qp(ctx->qp, &attr,
IBV_QP_STATE |
IBV_QP_PKEY_INDEX |
IBV_QP_PORT |
IBV_QP_QKEY)) {
fprintf(stderr, "Failed to modify QP to INIT\n");
goto clean_qp;
}
}
return ctx;
clean_qp:
ibv_destroy_qp(ctx->qp);
clean_cq:
ibv_destroy_cq(ctx->cq);
clean_mr:
ibv_dereg_mr(ctx->mr);
clean_pd:
ibv_dealloc_pd(ctx->pd);
clean_comp_channel:
if (ctx->channel)
ibv_destroy_comp_channel(ctx->channel);
clean_device:
ibv_close_device(ctx->context);
clean_buffer:
free(ctx->buf);
clean_ctx:
free(ctx);
return NULL;
}
static int pp_close_ctx(struct pingpong_context *ctx)
{
if (ibv_destroy_qp(ctx->qp)) {
fprintf(stderr, "Couldn't destroy QP\n");
return 1;
}
if (ibv_destroy_cq(ctx->cq)) {
fprintf(stderr, "Couldn't destroy CQ\n");
return 1;
}
if (ibv_dereg_mr(ctx->mr)) {
fprintf(stderr, "Couldn't deregister MR\n");
return 1;
}
if (ibv_destroy_ah(ctx->ah)) {
fprintf(stderr, "Couldn't destroy AH\n");
return 1;
}
if (ibv_dealloc_pd(ctx->pd)) {
fprintf(stderr, "Couldn't deallocate PD\n");
return 1;
}
if (ctx->channel) {
if (ibv_destroy_comp_channel(ctx->channel)) {
fprintf(stderr, "Couldn't destroy completion channel\n");
return 1;
}
}
if (ibv_close_device(ctx->context)) {
fprintf(stderr, "Couldn't release context\n");
return 1;
}
free(ctx->buf);
free(ctx);
return 0;
}
static int pp_post_recv(struct pingpong_context *ctx, int n)
{
struct ibv_sge list = {
.addr = (uintptr_t) ctx->buf,
.length = ctx->size + 40,
.lkey = ctx->mr->lkey
};
struct ibv_recv_wr wr = {
.wr_id = PINGPONG_RECV_WRID,
.sg_list = &list,
.num_sge = 1,
};
struct ibv_recv_wr *bad_wr;
int i;
for (i = 0; i < n; ++i)
if (ibv_post_recv(ctx->qp, &wr, &bad_wr))
break;
return i;
}
static int pp_post_send(struct pingpong_context *ctx, uint32_t qpn)
{
struct ibv_sge list = {
.addr = (uintptr_t) ctx->buf + 40,
.length = ctx->size,
.lkey = ctx->mr->lkey
};
struct ibv_send_wr wr = {
.wr_id = PINGPONG_SEND_WRID,
.sg_list = &list,
.num_sge = 1,
.opcode = IBV_WR_SEND,
.send_flags = ctx->send_flags,
.wr = {
.ud = {
.ah = ctx->ah,
.remote_qpn = qpn,
.remote_qkey = 0x11111111
}
}
};
struct ibv_send_wr *bad_wr;
return ibv_post_send(ctx->qp, &wr, &bad_wr);
}
static void usage(const char *argv0)
{
printf("Usage:\n");
printf(" %s start a server and wait for connection\n", argv0);
printf(" %s <host> connect to server at <host>\n", argv0);
printf("\n");
printf("Options:\n");
printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n");
printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n");
printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n");
printf(" -s, --size=<size> size of message to exchange (default 2048)\n");
printf(" -r, --rx-depth=<dep> number of receives to post at a time (default 500)\n");
printf(" -n, --iters=<iters> number of exchanges (default 1000)\n");
printf(" -l, --sl=<SL> send messages with service level <SL> (default 0)\n");
printf(" -e, --events sleep on CQ events (default poll)\n");
printf(" -g, --gid-idx=<gid index> local port gid index\n");
printf(" -c, --chk validate received buffer\n");
}
int main(int argc, char *argv[])
{
struct ibv_device **dev_list;
struct ibv_device *ib_dev;
struct pingpong_context *ctx;
struct pingpong_dest my_dest;
struct pingpong_dest *rem_dest;
struct timeval start, end;
char *ib_devname = NULL;
char *servername = NULL;
unsigned int port = 18515;
int ib_port = 1;
unsigned int size = 2048;
unsigned int rx_depth = 500;
unsigned int iters = 1000;
int use_event = 0;
int routs;
int rcnt, scnt;
int num_cq_events = 0;
int sl = 0;
int gidx = -1;
char gid[33];
srand48(getpid() * time(NULL));
while (1) {
int c;
static struct option long_options[] = {
{ .name = "port", .has_arg = 1, .val = 'p' },
{ .name = "ib-dev", .has_arg = 1, .val = 'd' },
{ .name = "ib-port", .has_arg = 1, .val = 'i' },
{ .name = "size", .has_arg = 1, .val = 's' },
{ .name = "rx-depth", .has_arg = 1, .val = 'r' },
{ .name = "iters", .has_arg = 1, .val = 'n' },
{ .name = "sl", .has_arg = 1, .val = 'l' },
{ .name = "events", .has_arg = 0, .val = 'e' },
{ .name = "gid-idx", .has_arg = 1, .val = 'g' },
{ .name = "chk", .has_arg = 0, .val = 'c' },
{}
};
c = getopt_long(argc, argv, "p:d:i:s:r:n:l:eg:c:", long_options,
NULL);
if (c == -1)
break;
switch (c) {
case 'p':
port = strtol(optarg, NULL, 0);
if (port > 65535) {
usage(argv[0]);
return 1;
}
break;
case 'd':
ib_devname = strdupa(optarg);
break;
case 'i':
ib_port = strtol(optarg, NULL, 0);
if (ib_port < 1) {
usage(argv[0]);
return 1;
}
break;
case 's':
size = strtoul(optarg, NULL, 0);
break;
case 'r':
rx_depth = strtoul(optarg, NULL, 0);
break;
case 'n':
iters = strtoul(optarg, NULL, 0);
break;
case 'l':
sl = strtol(optarg, NULL, 0);
break;
case 'e':
++use_event;
break;
case 'g':
gidx = strtol(optarg, NULL, 0);
break;
case 'c':
validate_buf = 1;
break;
default:
usage(argv[0]);
return 1;
}
}
if (optind == argc - 1)
servername = strdupa(argv[optind]);
else if (optind < argc) {
usage(argv[0]);
return 1;
}
page_size = PAGE_SIZE;
dev_list = ibv_get_device_list(NULL);
if (!dev_list) {
perror("Failed to get IB devices list");
return 1;
}
if (!ib_devname) {
ib_dev = *dev_list;
if (!ib_dev) {
fprintf(stderr, "No IB devices found\n");
return 1;
}
} else {
int i;
for (i = 0; dev_list[i]; ++i)
if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname))
break;
ib_dev = dev_list[i];
if (!ib_dev) {
fprintf(stderr, "IB device %s not found\n", ib_devname);
return 1;
}
}
ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event);
if (!ctx)
return 1;
routs = pp_post_recv(ctx, ctx->rx_depth);
if (routs < ctx->rx_depth) {
fprintf(stderr, "Couldn't post receive (%d)\n", routs);
return 1;
}
if (use_event)
if (ibv_req_notify_cq(ctx->cq, 0)) {
fprintf(stderr, "Couldn't request CQ notification\n");
return 1;
}
if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
fprintf(stderr, "Couldn't get port info\n");
return 1;
}
my_dest.lid = ctx->portinfo.lid;
my_dest.qpn = ctx->qp->qp_num;
my_dest.psn = lrand48() & 0xffffff;
if (gidx >= 0) {
if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest.gid)) {
fprintf(stderr, "Could not get local gid for gid index "
"%d\n", gidx);
return 1;
}
} else
memset(&my_dest.gid, 0, sizeof my_dest.gid);
printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
my_dest.lid, my_dest.qpn, my_dest.psn);
if (servername)
rem_dest = pp_client_exch_dest(servername, port, &my_dest);
else
rem_dest = pp_server_exch_dest(ctx, ib_port, port, sl,
&my_dest, gidx);
if (!rem_dest)
return 1;
printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
rem_dest->lid, rem_dest->qpn, rem_dest->psn);
if (servername)
if (pp_connect_ctx(ctx, ib_port, my_dest.psn, sl, rem_dest,
gidx))
return 1;
ctx->pending = PINGPONG_RECV_WRID;
if (servername) {
if (validate_buf)
for (int i = 0; i < size; i += page_size)
ctx->buf[i] = i / page_size % sizeof(char);
if (pp_post_send(ctx, rem_dest->qpn)) {
fprintf(stderr, "Couldn't post send\n");
return 1;
}
ctx->pending |= PINGPONG_SEND_WRID;
}
if (gettimeofday(&start, NULL)) {
perror("gettimeofday");
return 1;
}
rcnt = scnt = 0;
while (rcnt < iters || scnt < iters) {
if (use_event) {
struct ibv_cq *ev_cq;
void *ev_ctx;
if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
fprintf(stderr, "Failed to get cq_event\n");
return 1;
}
++num_cq_events;
if (ev_cq != ctx->cq) {
fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
return 1;
}
if (ibv_req_notify_cq(ctx->cq, 0)) {
fprintf(stderr, "Couldn't request CQ notification\n");
return 1;
}
}
{
struct ibv_wc wc[2];
int ne, i;
do {
ne = ibv_poll_cq(ctx->cq, 2, wc);
if (ne < 0) {
fprintf(stderr, "poll CQ failed %d\n", ne);
return 1;
}
} while (!use_event && ne < 1);
for (i = 0; i < ne; ++i) {
if (wc[i].status != IBV_WC_SUCCESS) {
fprintf(stderr, "Failed status: %s (%d) (for wr_id %d)\n",
ibv_wc_status_str(wc[i].status),
wc[i].status, (int) wc[i].wr_id);
return 1;
}
switch ((int) wc[i].wr_id) {
case PINGPONG_SEND_WRID:
++scnt;
break;
case PINGPONG_RECV_WRID:
if (--routs <= 1) {
routs += pp_post_recv(ctx, ctx->rx_depth - routs);
if (routs < ctx->rx_depth) {
fprintf(stderr,
"Couldn't post receive (%d)\n",
routs);
return 1;
}
}
++rcnt;
break;
default:
fprintf(stderr, "Completion for unknown wr_id %d\n",
(int) wc[i].wr_id);
return 1;
}
ctx->pending &= ~(int) wc[i].wr_id;
if (scnt < iters && !ctx->pending) {
if (pp_post_send(ctx, rem_dest->qpn)) {
fprintf(stderr, "Couldn't post send\n");
return 1;
}
ctx->pending = PINGPONG_RECV_WRID |
PINGPONG_SEND_WRID;
}
}
}
}
if (gettimeofday(&end, NULL)) {
perror("gettimeofday");
return 1;
}
{
float usec = (end.tv_sec - start.tv_sec) * 1000000 +
(end.tv_usec - start.tv_usec);
long long bytes = (long long) size * iters * 2;
printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
bytes, usec / 1000000., bytes * 8. / usec);
printf("%d iters in %.2f seconds = %.2f usec/iter\n",
iters, usec / 1000000., usec / iters);
if ((!servername) && (validate_buf)) {
for (int i = 0; i < size; i += page_size)
if (ctx->buf[i] != i / page_size % sizeof(char))
printf("invalid data in page %d\n",
i / page_size);
}
}
ibv_ack_cq_events(ctx->cq, num_cq_events);
if (pp_close_ctx(ctx))
return 1;
ibv_free_device_list(dev_list);
free(rem_dest);
return 0;
}

1023
usr/tests/ib/xsrq_pingpong.c Normal file

File diff suppressed because it is too large Load diff