diff --git a/include/hermit/stddef.h b/include/hermit/stddef.h index fa31b9f53..ee27cc799 100644 --- a/include/hermit/stddef.h +++ b/include/hermit/stddef.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2010, Stefan Lankes, RWTH Aachen University - * Copyright (c) 2018, Annika Wierichs, RWTH Aachen University + * 2018, Annika Wierichs, RWTH Aachen University * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -69,99 +69,95 @@ extern const size_t image_size; // KVM I/O ports corresponding to verbs API functions #define UHYVE_PORT_SET_IB_POOL_ADDR 0x610 -#define UHYVE_PORT_IBV_WC_STATUS_STR 0x611 -#define UHYVE_PORT_IBV_RATE_TO_MULT 0x612 -#define UHYVE_PORT_MULT_TO_IBV_RATE 0x613 -#define UHYVE_PORT_IBV_RATE_TO_MBPS 0x614 -#define UHYVE_PORT_MBPS_TO_IBV_RATE 0x615 -#define UHYVE_PORT_IBV_CQ_EX_TO_CQ 0x616 -#define UHYVE_PORT_IBV_START_POLL 0x617 -#define UHYVE_PORT_IBV_NEXT_POLL 0x618 -#define UHYVE_PORT_IBV_END_POLL 0x619 -#define UHYVE_PORT_IBV_WC_READ_OPCODE 0x61A -#define UHYVE_PORT_IBV_WC_READ_VENDOR_ERR 0x61B -#define UHYVE_PORT_IBV_WC_READ_BYTE_LEN 0x61C -#define UHYVE_PORT_IBV_WC_READ_IMM_DATA 0x61D -#define UHYVE_PORT_IBV_WC_READ_INVALIDATED_RKEY 0x61E -#define UHYVE_PORT_IBV_WC_READ_QP_NUM 0x61F -#define UHYVE_PORT_IBV_WC_READ_SRC_QP 0x620 -#define UHYVE_PORT_IBV_WC_READ_WC_FLAGS 0x621 -#define UHYVE_PORT_IBV_WC_READ_SLID 0x622 -#define UHYVE_PORT_IBV_WC_READ_SL 0x623 -#define UHYVE_PORT_IBV_WC_READ_DLID_PATH_BITS 0x624 -#define UHYVE_PORT_IBV_WC_READ_COMPLETION_TS 0x625 -#define UHYVE_PORT_IBV_WC_READ_CVLAN 0x626 -#define UHYVE_PORT_IBV_WC_READ_FLOW_TAG 0x627 -#define UHYVE_PORT_IBV_POST_WQ_RECV 0x628 -#define UHYVE_PORT_IBV_GET_DEVICE_LIST 0x629 -#define UHYVE_PORT_IBV_FREE_DEVICE_LIST 0x62A -#define UHYVE_PORT_IBV_GET_DEVICE_NAME 0x62B -#define UHYVE_PORT_IBV_GET_DEVICE_GUID 0x62C -#define UHYVE_PORT_IBV_OPEN_DEVICE 0x62D -#define UHYVE_PORT_IBV_CLOSE_DEVICE 0x62E -#define UHYVE_PORT_IBV_GET_ASYNC_EVENT 0x62F -#define UHYVE_PORT_IBV_ACK_ASYNC_EVENT 0x630 -#define UHYVE_PORT_IBV_QUERY_DEVICE 0x631 -#define UHYVE_PORT_IBV_QUERY_PORT 0x632 -#define UHYVE_PORT_IBV_QUERY_GID 0x633 -#define UHYVE_PORT_IBV_QUERY_PKEY 0x634 -#define UHYVE_PORT_IBV_ALLOC_PD 0x635 -#define UHYVE_PORT_IBV_DEALLOC_PD 0x636 -#define UHYVE_PORT_IBV_CREATE_FLOW 0x637 -#define UHYVE_PORT_IBV_DESTROY_FLOW 0x638 -#define UHYVE_PORT_IBV_OPEN_XRCD 0x639 -#define UHYVE_PORT_IBV_CLOSE_XRCD 0x63A -#define UHYVE_PORT_IBV_REG_MR 0x63B -#define UHYVE_PORT_IBV_REREG_MR 0x63C -#define UHYVE_PORT_IBV_DEREG_MR 0x63D -#define UHYVE_PORT_IBV_ALLOC_MW 0x63E -#define UHYVE_PORT_IBV_DEALLOC_MW 0x63F -#define UHYVE_PORT_IBV_INC_RKEY 0x640 -#define UHYVE_PORT_IBV_BIND_MW 0x641 -#define UHYVE_PORT_IBV_CREATE_COMP_CHANNEL 0x642 -#define UHYVE_PORT_IBV_DESTROY_COMP_CHANNEL 0x643 -#define UHYVE_PORT_IBV_CREATE_CQ 0x644 -#define UHYVE_PORT_IBV_CREATE_CQ_EX 0x645 -#define UHYVE_PORT_IBV_RESIZE_CQ 0x646 -#define UHYVE_PORT_IBV_DESTROY_CQ 0x647 -#define UHYVE_PORT_IBV_GET_CQ_EVENT 0x648 -#define UHYVE_PORT_IBV_ACK_CQ_EVENTS 0x649 -#define UHYVE_PORT_IBV_POLL_CQ 0x64A -#define UHYVE_PORT_IBV_REQ_NOTIFY_CQ 0x64B -#define UHYVE_PORT_IBV_CREATE_SRQ 0x64C -#define UHYVE_PORT_IBV_CREATE_SRQ_EX 0x64D -#define UHYVE_PORT_IBV_MODIFY_SRQ 0x64E -#define UHYVE_PORT_IBV_QUERY_SRQ 0x64F -#define UHYVE_PORT_IBV_GET_SRQ_NUM 0x650 -#define UHYVE_PORT_IBV_DESTROY_SRQ 0x651 -#define UHYVE_PORT_IBV_POST_SRQ_RECV 0x652 -#define UHYVE_PORT_IBV_CREATE_QP 0x653 -#define UHYVE_PORT_IBV_CREATE_QP_EX 0x654 -#define UHYVE_PORT_IBV_QUERY_RT_VALUES_EX 0x655 -#define UHYVE_PORT_IBV_QUERY_DEVICE_EX 0x656 -#define UHYVE_PORT_IBV_OPEN_QP 0x657 -#define UHYVE_PORT_IBV_MODIFY_QP 0x658 -#define UHYVE_PORT_IBV_QUERY_QP 0x659 -#define UHYVE_PORT_IBV_DESTROY_QP 0x65A -#define UHYVE_PORT_IBV_CREATE_WQ 0x65B -#define UHYVE_PORT_IBV_MODIFY_WQ 0x65C -#define UHYVE_PORT_IBV_DESTROY_WQ 0x65D -#define UHYVE_PORT_IBV_CREATE_RWQ_IND_TABLE 0x65E -#define UHYVE_PORT_IBV_DESTROY_RWQ_IND_TABLE 0x65F -#define UHYVE_PORT_IBV_POST_SEND 0x660 -#define UHYVE_PORT_IBV_POST_RECV 0x661 -#define UHYVE_PORT_IBV_CREATE_AH 0x662 -#define UHYVE_PORT_IBV_INIT_AH_FROM_WC 0x663 -#define UHYVE_PORT_IBV_CREATE_AH_FROM_WC 0x664 -#define UHYVE_PORT_IBV_DESTROY_AH 0x665 -#define UHYVE_PORT_IBV_ATTACH_MCAST 0x666 -#define UHYVE_PORT_IBV_DETACH_MCAST 0x667 -#define UHYVE_PORT_IBV_FORK_INIT 0x668 -#define UHYVE_PORT_IBV_NODE_TYPE_STR 0x669 -#define UHYVE_PORT_IBV_PORT_STATE_STR 0x66A -#define UHYVE_PORT_IBV_EVENT_TYPE_STR 0x66B -// #define UHYVE_PORT_IBV_RESOLVE_ETH_L2_FROM_GID 0x66C -#define UHYVE_PORT_IBV_IS_QPT_SUPPORTED 0x66D +#define UHYVE_PORT_IBV_RATE_TO_MULT 0x611 +#define UHYVE_PORT_MULT_TO_IBV_RATE 0x612 +#define UHYVE_PORT_IBV_RATE_TO_MBPS 0x613 +#define UHYVE_PORT_MBPS_TO_IBV_RATE 0x614 +#define UHYVE_PORT_IBV_CQ_EX_TO_CQ 0x615 +#define UHYVE_PORT_IBV_START_POLL 0x616 +#define UHYVE_PORT_IBV_NEXT_POLL 0x617 +#define UHYVE_PORT_IBV_END_POLL 0x618 +#define UHYVE_PORT_IBV_WC_READ_OPCODE 0x619 +#define UHYVE_PORT_IBV_WC_READ_VENDOR_ERR 0x61A +#define UHYVE_PORT_IBV_WC_READ_BYTE_LEN 0x61B +#define UHYVE_PORT_IBV_WC_READ_IMM_DATA 0x61C +#define UHYVE_PORT_IBV_WC_READ_INVALIDATED_RKEY 0x61D +#define UHYVE_PORT_IBV_WC_READ_QP_NUM 0x61E +#define UHYVE_PORT_IBV_WC_READ_SRC_QP 0x61F +#define UHYVE_PORT_IBV_WC_READ_WC_FLAGS 0x620 +#define UHYVE_PORT_IBV_WC_READ_SLID 0x621 +#define UHYVE_PORT_IBV_WC_READ_SL 0x622 +#define UHYVE_PORT_IBV_WC_READ_DLID_PATH_BITS 0x623 +#define UHYVE_PORT_IBV_WC_READ_COMPLETION_TS 0x624 +#define UHYVE_PORT_IBV_WC_READ_CVLAN 0x625 +#define UHYVE_PORT_IBV_WC_READ_FLOW_TAG 0x626 +#define UHYVE_PORT_IBV_POST_WQ_RECV 0x627 +#define UHYVE_PORT_IBV_GET_DEVICE_LIST 0x628 +#define UHYVE_PORT_IBV_FREE_DEVICE_LIST 0x629 +#define UHYVE_PORT_IBV_GET_DEVICE_NAME 0x62A +#define UHYVE_PORT_IBV_GET_DEVICE_GUID 0x62B +#define UHYVE_PORT_IBV_OPEN_DEVICE 0x62C +#define UHYVE_PORT_IBV_CLOSE_DEVICE 0x62D +#define UHYVE_PORT_IBV_GET_ASYNC_EVENT 0x62E +#define UHYVE_PORT_IBV_ACK_ASYNC_EVENT 0x62F +#define UHYVE_PORT_IBV_QUERY_DEVICE 0x630 +#define UHYVE_PORT_IBV_QUERY_PORT 0x631 +#define UHYVE_PORT_IBV_QUERY_GID 0x632 +#define UHYVE_PORT_IBV_QUERY_PKEY 0x633 +#define UHYVE_PORT_IBV_ALLOC_PD 0x634 +#define UHYVE_PORT_IBV_DEALLOC_PD 0x635 +#define UHYVE_PORT_IBV_CREATE_FLOW 0x636 +#define UHYVE_PORT_IBV_DESTROY_FLOW 0x637 +#define UHYVE_PORT_IBV_OPEN_XRCD 0x638 +#define UHYVE_PORT_IBV_CLOSE_XRCD 0x639 +#define UHYVE_PORT_IBV_REG_MR 0x63A +#define UHYVE_PORT_IBV_REREG_MR 0x63B +#define UHYVE_PORT_IBV_DEREG_MR 0x63C +#define UHYVE_PORT_IBV_ALLOC_MW 0x63D +#define UHYVE_PORT_IBV_DEALLOC_MW 0x63E +#define UHYVE_PORT_IBV_INC_RKEY 0x63F +#define UHYVE_PORT_IBV_BIND_MW 0x640 +#define UHYVE_PORT_IBV_CREATE_COMP_CHANNEL 0x641 +#define UHYVE_PORT_IBV_DESTROY_COMP_CHANNEL 0x642 +#define UHYVE_PORT_IBV_CREATE_CQ 0x643 +#define UHYVE_PORT_IBV_CREATE_CQ_EX 0x644 +#define UHYVE_PORT_IBV_RESIZE_CQ 0x645 +#define UHYVE_PORT_IBV_DESTROY_CQ 0x646 +#define UHYVE_PORT_IBV_GET_CQ_EVENT 0x647 +#define UHYVE_PORT_IBV_ACK_CQ_EVENTS 0x648 +#define UHYVE_PORT_IBV_POLL_CQ 0x649 +#define UHYVE_PORT_IBV_REQ_NOTIFY_CQ 0x64A +#define UHYVE_PORT_IBV_CREATE_SRQ 0x64B +#define UHYVE_PORT_IBV_CREATE_SRQ_EX 0x64C +#define UHYVE_PORT_IBV_MODIFY_SRQ 0x64D +#define UHYVE_PORT_IBV_QUERY_SRQ 0x64E +#define UHYVE_PORT_IBV_GET_SRQ_NUM 0x64F +#define UHYVE_PORT_IBV_DESTROY_SRQ 0x650 +#define UHYVE_PORT_IBV_POST_SRQ_RECV 0x651 +#define UHYVE_PORT_IBV_CREATE_QP 0x652 +#define UHYVE_PORT_IBV_CREATE_QP_EX 0x653 +#define UHYVE_PORT_IBV_QUERY_RT_VALUES_EX 0x654 +#define UHYVE_PORT_IBV_QUERY_DEVICE_EX 0x655 +#define UHYVE_PORT_IBV_OPEN_QP 0x656 +#define UHYVE_PORT_IBV_MODIFY_QP 0x657 +#define UHYVE_PORT_IBV_QUERY_QP 0x658 +#define UHYVE_PORT_IBV_DESTROY_QP 0x659 +#define UHYVE_PORT_IBV_CREATE_WQ 0x65A +#define UHYVE_PORT_IBV_MODIFY_WQ 0x65B +#define UHYVE_PORT_IBV_DESTROY_WQ 0x65C +#define UHYVE_PORT_IBV_CREATE_RWQ_IND_TABLE 0x65D +#define UHYVE_PORT_IBV_DESTROY_RWQ_IND_TABLE 0x65E +#define UHYVE_PORT_IBV_POST_SEND 0x65F +#define UHYVE_PORT_IBV_POST_RECV 0x660 +#define UHYVE_PORT_IBV_CREATE_AH 0x661 +#define UHYVE_PORT_IBV_INIT_AH_FROM_WC 0x662 +#define UHYVE_PORT_IBV_CREATE_AH_FROM_WC 0x663 +#define UHYVE_PORT_IBV_DESTROY_AH 0x664 +#define UHYVE_PORT_IBV_ATTACH_MCAST 0x665 +#define UHYVE_PORT_IBV_DETACH_MCAST 0x666 +#define UHYVE_PORT_IBV_FORK_INIT 0x667 +// #define UHYVE_PORT_IBV_RESOLVE_ETH_L2_FROM_GID 0x668 +#define UHYVE_PORT_IBV_IS_QPT_SUPPORTED 0x669 #define BUILTIN_EXPECT(exp, b) __builtin_expect((exp), (b)) //#define BUILTIN_EXPECT(exp, b) (exp) diff --git a/include/hermit/verbs.h b/include/hermit/verbs.h index b425d4226..e1934735b 100644 --- a/include/hermit/verbs.h +++ b/include/hermit/verbs.h @@ -425,7 +425,9 @@ enum ibv_wc_status { IBV_WC_INV_EEC_STATE_ERR, IBV_WC_FATAL_ERR, IBV_WC_RESP_TIMEOUT_ERR, - IBV_WC_GENERAL_ERR + IBV_WC_GENERAL_ERR, + IBV_WC_TM_ERR, + IBV_WC_TM_RNDV_INCOMPLETE, }; enum ibv_wc_opcode { diff --git a/kernel/ibv.c b/kernel/ibv.c index 7f4698131..079de4b2f 100644 --- a/kernel/ibv.c +++ b/kernel/ibv.c @@ -29,7 +29,22 @@ * This file implements the verbs API functions. Each function performs necessary * pointer conversions for its parameters, writes its arguments struct to uhyve's * KVM I/O port that belongs to the function, and reverts changes to the parameters - * before returning. Functions requiring non-trivial conversions are listed first. + * before returning. + * + * Functions requiring non-trivial conversions are listed first: + * - ibv_post_send + * - ibv_post_wq_recv + * - ibv_post_srq_recv + * - ibv_post_recv + * - ibv_create_rwq_ind_table + * - ibv_open_xrcd + * + * A few trivial functions that match enum values with a const char* string are not + * forwarded to uhyve and entirely implemented in HermitCore; they are listed second: + * - ibv_wc_status_str + * - ibv_node_type_str + * - ibv_port_state_str + * - ibv_event_type_str */ @@ -52,7 +67,6 @@ /* } */ - /* * ibv_post_send */ @@ -478,20 +492,115 @@ struct ibv_xrcd * ibv_open_xrcd(struct ibv_context * context, struct ibv_xrcd_in * ibv_wc_status_str */ -typedef struct { - // Parameters: - enum ibv_wc_status status; - // Return value: - const char * ret; -} __attribute__((packed)) uhyve_ibv_wc_status_str_t; - const char * ibv_wc_status_str(enum ibv_wc_status status) { - uhyve_ibv_wc_status_str_t uhyve_args; - uhyve_args.status = status; + static const char *const wc_status_str[] = { + [IBV_WC_SUCCESS] = "success", + [IBV_WC_LOC_LEN_ERR] = "local length error", + [IBV_WC_LOC_QP_OP_ERR] = "local QP operation error", + [IBV_WC_LOC_EEC_OP_ERR] = "local EE context operation error", + [IBV_WC_LOC_PROT_ERR] = "local protection error", + [IBV_WC_WR_FLUSH_ERR] = "Work Request Flushed Error", + [IBV_WC_MW_BIND_ERR] = "memory management operation error", + [IBV_WC_BAD_RESP_ERR] = "bad response error", + [IBV_WC_LOC_ACCESS_ERR] = "local access error", + [IBV_WC_REM_INV_REQ_ERR] = "remote invalid request error", + [IBV_WC_REM_ACCESS_ERR] = "remote access error", + [IBV_WC_REM_OP_ERR] = "remote operation error", + [IBV_WC_RETRY_EXC_ERR] = "transport retry counter exceeded", + [IBV_WC_RNR_RETRY_EXC_ERR] = "RNR retry counter exceeded", + [IBV_WC_LOC_RDD_VIOL_ERR] = "local RDD violation error", + [IBV_WC_REM_INV_RD_REQ_ERR] = "remote invalid RD request", + [IBV_WC_REM_ABORT_ERR] = "aborted error", + [IBV_WC_INV_EECN_ERR] = "invalid EE context number", + [IBV_WC_INV_EEC_STATE_ERR] = "invalid EE context state", + [IBV_WC_FATAL_ERR] = "fatal error", + [IBV_WC_RESP_TIMEOUT_ERR] = "response timeout error", + [IBV_WC_GENERAL_ERR] = "general error", + [IBV_WC_TM_ERR] = "TM error", + [IBV_WC_TM_RNDV_INCOMPLETE] = "TM software rendezvous", + }; - uhyve_send(UHYVE_PORT_IBV_WC_STATUS_STR, (unsigned) virt_to_phys((size_t) &uhyve_args)); + if (status < IBV_WC_SUCCESS || status > IBV_WC_TM_RNDV_INCOMPLETE) + return "unknown"; - return uhyve_args.ret; + return wc_status_str[status]; +} + + +/* + * ibv_node_type_str + */ + +const char * ibv_node_type_str(enum ibv_node_type node_type) { + static const char *const node_type_str[] = { + [IBV_NODE_CA] = "InfiniBand channel adapter", + [IBV_NODE_SWITCH] = "InfiniBand switch", + [IBV_NODE_ROUTER] = "InfiniBand router", + [IBV_NODE_RNIC] = "iWARP NIC", + [IBV_NODE_USNIC] = "usNIC", + [IBV_NODE_USNIC_UDP] = "usNIC UDP", + }; + + if (node_type < IBV_NODE_CA || node_type > IBV_NODE_USNIC_UDP) + return "unknown"; + + return node_type_str[node_type]; +} + + +/* + * ibv_port_state_str + */ + +const char * ibv_port_state_str(enum ibv_port_state port_state) { + static const char *const port_state_str[] = { + [IBV_PORT_NOP] = "no state change (NOP)", + [IBV_PORT_DOWN] = "down", + [IBV_PORT_INIT] = "init", + [IBV_PORT_ARMED] = "armed", + [IBV_PORT_ACTIVE] = "active", + [IBV_PORT_ACTIVE_DEFER] = "active defer" + }; + + if (port_state < IBV_PORT_NOP || port_state > IBV_PORT_ACTIVE_DEFER) + return "unknown"; + + return port_state_str[port_state]; +} + + +/* + * ibv_event_type_str + */ + +const char * ibv_event_type_str(enum ibv_event_type event) { + static const char *const event_type_str[] = { + [IBV_EVENT_CQ_ERR] = "CQ error", + [IBV_EVENT_QP_FATAL] = "local work queue catastrophic error", + [IBV_EVENT_QP_REQ_ERR] = "invalid request local work queue error", + [IBV_EVENT_QP_ACCESS_ERR] = "local access violation work queue error", + [IBV_EVENT_COMM_EST] = "communication established", + [IBV_EVENT_SQ_DRAINED] = "send queue drained", + [IBV_EVENT_PATH_MIG] = "path migrated", + [IBV_EVENT_PATH_MIG_ERR] = "path migration request error", + [IBV_EVENT_DEVICE_FATAL] = "local catastrophic error", + [IBV_EVENT_PORT_ACTIVE] = "port active", + [IBV_EVENT_PORT_ERR] = "port error", + [IBV_EVENT_LID_CHANGE] = "LID change", + [IBV_EVENT_PKEY_CHANGE] = "P_Key change", + [IBV_EVENT_SM_CHANGE] = "SM change", + [IBV_EVENT_SRQ_ERR] = "SRQ catastrophic error", + [IBV_EVENT_SRQ_LIMIT_REACHED] = "SRQ limit reached", + [IBV_EVENT_QP_LAST_WQE_REACHED] = "last WQE reached", + [IBV_EVENT_CLIENT_REREGISTER] = "client reregistration", + [IBV_EVENT_GID_CHANGE] = "GID table change", + [IBV_EVENT_WQ_FATAL] = "WQ fatal" + }; + + if (event < IBV_EVENT_CQ_ERR || event > IBV_EVENT_GID_CHANGE) + return "unknown"; + + return event_type_str[event]; } @@ -1840,7 +1949,7 @@ typedef struct { int ibv_get_srq_num(struct ibv_srq * srq, uint32_t * srq_num) { uhyve_ibv_get_srq_num_t uhyve_args; uhyve_args.srq = srq; - uhyve_args.srq_num = srq_num; + uhyve_args.srq_num = (uint32_t *) guest_to_host((size_t) srq_num); uhyve_send(UHYVE_PORT_IBV_GET_SRQ_NUM, (unsigned) virt_to_phys((size_t) &uhyve_args)); @@ -2320,69 +2429,6 @@ int ibv_fork_init() { } -/* - * ibv_node_type_str - */ - -typedef struct { - // Parameters: - enum ibv_node_type node_type; - // Return value: - const char * ret; -} __attribute__((packed)) uhyve_ibv_node_type_str_t; - -const char * ibv_node_type_str(enum ibv_node_type node_type) { - uhyve_ibv_node_type_str_t uhyve_args; - uhyve_args.node_type = node_type; - - uhyve_send(UHYVE_PORT_IBV_NODE_TYPE_STR, (unsigned) virt_to_phys((size_t) &uhyve_args)); - - return uhyve_args.ret; -} - - -/* - * ibv_port_state_str - */ - -typedef struct { - // Parameters: - enum ibv_port_state port_state; - // Return value: - const char * ret; -} __attribute__((packed)) uhyve_ibv_port_state_str_t; - -const char * ibv_port_state_str(enum ibv_port_state port_state) { - uhyve_ibv_port_state_str_t uhyve_args; - uhyve_args.port_state = port_state; - - uhyve_send(UHYVE_PORT_IBV_PORT_STATE_STR, (unsigned) virt_to_phys((size_t) &uhyve_args)); - - return uhyve_args.ret; -} - - -/* - * ibv_event_type_str - */ - -typedef struct { - // Parameters: - enum ibv_event_type event; - // Return value: - const char * ret; -} __attribute__((packed)) uhyve_ibv_event_type_str_t; - -const char * ibv_event_type_str(enum ibv_event_type event) { - uhyve_ibv_event_type_str_t uhyve_args; - uhyve_args.event = event; - - uhyve_send(UHYVE_PORT_IBV_EVENT_TYPE_STR, (unsigned) virt_to_phys((size_t) &uhyve_args)); - - return uhyve_args.ret; -} - - /* * ibv_resolve_eth_l2_from_gid */ diff --git a/tools/uhyve-ibv.c b/tools/uhyve-ibv.c index 5c91fd741..8df7730a2 100644 --- a/tools/uhyve-ibv.c +++ b/tools/uhyve-ibv.c @@ -49,20 +49,6 @@ static inline unsigned long long rdtsc() { return val; } -/* - * ibv_wc_status_str - */ - -void call_ibv_wc_status_str(struct kvm_run * run, uint8_t * guest_mem) { - printf("LOG: UHYVE - call_ibv_wc_status_str\n"); - unsigned data = *((unsigned*) ((size_t) run + run->io.data_offset)); - uhyve_ibv_wc_status_str_t * args = (uhyve_ibv_wc_status_str_t *) (guest_mem + data); - - use_ib_mem_pool = true; - args->ret = ibv_wc_status_str(args->status); - use_ib_mem_pool = false; -} - /* * ibv_rate_to_mult @@ -1386,51 +1372,6 @@ void call_ibv_fork_init(struct kvm_run * run, uint8_t * guest_mem) { } -/* - * ibv_node_type_str - */ - -void call_ibv_node_type_str(struct kvm_run * run, uint8_t * guest_mem) { - printf("LOG: UHYVE - call_ibv_node_type_str\n"); - unsigned data = *((unsigned*) ((size_t) run + run->io.data_offset)); - uhyve_ibv_node_type_str_t * args = (uhyve_ibv_node_type_str_t *) (guest_mem + data); - - use_ib_mem_pool = true; - args->ret = ibv_node_type_str(args->node_type); - use_ib_mem_pool = false; -} - - -/* - * ibv_port_state_str - */ - -void call_ibv_port_state_str(struct kvm_run * run, uint8_t * guest_mem) { - printf("LOG: UHYVE - call_ibv_port_state_str\n"); - unsigned data = *((unsigned*) ((size_t) run + run->io.data_offset)); - uhyve_ibv_port_state_str_t * args = (uhyve_ibv_port_state_str_t *) (guest_mem + data); - - use_ib_mem_pool = true; - args->ret = ibv_port_state_str(args->port_state); - use_ib_mem_pool = false; -} - - -/* - * ibv_event_type_str - */ - -void call_ibv_event_type_str(struct kvm_run * run, uint8_t * guest_mem) { - printf("LOG: UHYVE - call_ibv_event_type_str\n"); - unsigned data = *((unsigned*) ((size_t) run + run->io.data_offset)); - uhyve_ibv_event_type_str_t * args = (uhyve_ibv_event_type_str_t *) (guest_mem + data); - - use_ib_mem_pool = true; - args->ret = ibv_event_type_str(args->event); - use_ib_mem_pool = false; -} - - /* * ibv_resolve_eth_l2_from_gid */ diff --git a/tools/uhyve-ibv.h b/tools/uhyve-ibv.h index 8460fdc32..8f034f12a 100644 --- a/tools/uhyve-ibv.h +++ b/tools/uhyve-ibv.h @@ -51,108 +51,98 @@ extern bool use_ib_mem_pool; typedef enum { UHYVE_PORT_SET_IB_POOL_ADDR = 0x610, - UHYVE_PORT_IBV_WC_STATUS_STR = 0x611, - UHYVE_PORT_IBV_RATE_TO_MULT = 0x612, - UHYVE_PORT_MULT_TO_IBV_RATE = 0x613, - UHYVE_PORT_IBV_RATE_TO_MBPS = 0x614, - UHYVE_PORT_MBPS_TO_IBV_RATE = 0x615, - UHYVE_PORT_IBV_CQ_EX_TO_CQ = 0x616, - UHYVE_PORT_IBV_START_POLL = 0x617, - UHYVE_PORT_IBV_NEXT_POLL = 0x618, - UHYVE_PORT_IBV_END_POLL = 0x619, - UHYVE_PORT_IBV_WC_READ_OPCODE = 0x61A, - UHYVE_PORT_IBV_WC_READ_VENDOR_ERR = 0x61B, - UHYVE_PORT_IBV_WC_READ_BYTE_LEN = 0x61C, - UHYVE_PORT_IBV_WC_READ_IMM_DATA = 0x61D, - UHYVE_PORT_IBV_WC_READ_INVALIDATED_RKEY = 0x61E, - UHYVE_PORT_IBV_WC_READ_QP_NUM = 0x61F, - UHYVE_PORT_IBV_WC_READ_SRC_QP = 0x620, - UHYVE_PORT_IBV_WC_READ_WC_FLAGS = 0x621, - UHYVE_PORT_IBV_WC_READ_SLID = 0x622, - UHYVE_PORT_IBV_WC_READ_SL = 0x623, - UHYVE_PORT_IBV_WC_READ_DLID_PATH_BITS = 0x624, - UHYVE_PORT_IBV_WC_READ_COMPLETION_TS = 0x625, - UHYVE_PORT_IBV_WC_READ_CVLAN = 0x626, - UHYVE_PORT_IBV_WC_READ_FLOW_TAG = 0x627, - UHYVE_PORT_IBV_POST_WQ_RECV = 0x628, - UHYVE_PORT_IBV_GET_DEVICE_LIST = 0x629, - UHYVE_PORT_IBV_FREE_DEVICE_LIST = 0x62A, - UHYVE_PORT_IBV_GET_DEVICE_NAME = 0x62B, - UHYVE_PORT_IBV_GET_DEVICE_GUID = 0x62C, - UHYVE_PORT_IBV_OPEN_DEVICE = 0x62D, - UHYVE_PORT_IBV_CLOSE_DEVICE = 0x62E, - UHYVE_PORT_IBV_GET_ASYNC_EVENT = 0x62F, - UHYVE_PORT_IBV_ACK_ASYNC_EVENT = 0x630, - UHYVE_PORT_IBV_QUERY_DEVICE = 0x631, - UHYVE_PORT_IBV_QUERY_PORT = 0x632, - UHYVE_PORT_IBV_QUERY_GID = 0x633, - UHYVE_PORT_IBV_QUERY_PKEY = 0x634, - UHYVE_PORT_IBV_ALLOC_PD = 0x635, - UHYVE_PORT_IBV_DEALLOC_PD = 0x636, - UHYVE_PORT_IBV_CREATE_FLOW = 0x637, - UHYVE_PORT_IBV_DESTROY_FLOW = 0x638, - UHYVE_PORT_IBV_OPEN_XRCD = 0x639, - UHYVE_PORT_IBV_CLOSE_XRCD = 0x63A, - UHYVE_PORT_IBV_REG_MR = 0x63B, - UHYVE_PORT_IBV_REREG_MR = 0x63C, - UHYVE_PORT_IBV_DEREG_MR = 0x63D, - UHYVE_PORT_IBV_ALLOC_MW = 0x63E, - UHYVE_PORT_IBV_DEALLOC_MW = 0x63F, - UHYVE_PORT_IBV_INC_RKEY = 0x640, - UHYVE_PORT_IBV_BIND_MW = 0x641, - UHYVE_PORT_IBV_CREATE_COMP_CHANNEL = 0x642, - UHYVE_PORT_IBV_DESTROY_COMP_CHANNEL = 0x643, - UHYVE_PORT_IBV_CREATE_CQ = 0x644, - UHYVE_PORT_IBV_CREATE_CQ_EX = 0x645, - UHYVE_PORT_IBV_RESIZE_CQ = 0x646, - UHYVE_PORT_IBV_DESTROY_CQ = 0x647, - UHYVE_PORT_IBV_GET_CQ_EVENT = 0x648, - UHYVE_PORT_IBV_ACK_CQ_EVENTS = 0x649, - UHYVE_PORT_IBV_POLL_CQ = 0x64A, - UHYVE_PORT_IBV_REQ_NOTIFY_CQ = 0x64B, - UHYVE_PORT_IBV_CREATE_SRQ = 0x64C, - UHYVE_PORT_IBV_CREATE_SRQ_EX = 0x64D, - UHYVE_PORT_IBV_MODIFY_SRQ = 0x64E, - UHYVE_PORT_IBV_QUERY_SRQ = 0x64F, - UHYVE_PORT_IBV_GET_SRQ_NUM = 0x650, - UHYVE_PORT_IBV_DESTROY_SRQ = 0x651, - UHYVE_PORT_IBV_POST_SRQ_RECV = 0x652, - UHYVE_PORT_IBV_CREATE_QP = 0x653, - UHYVE_PORT_IBV_CREATE_QP_EX = 0x654, - UHYVE_PORT_IBV_QUERY_RT_VALUES_EX = 0x655, - UHYVE_PORT_IBV_QUERY_DEVICE_EX = 0x656, - UHYVE_PORT_IBV_OPEN_QP = 0x657, - UHYVE_PORT_IBV_MODIFY_QP = 0x658, - UHYVE_PORT_IBV_QUERY_QP = 0x659, - UHYVE_PORT_IBV_DESTROY_QP = 0x65A, - UHYVE_PORT_IBV_CREATE_WQ = 0x65B, - UHYVE_PORT_IBV_MODIFY_WQ = 0x65C, - UHYVE_PORT_IBV_DESTROY_WQ = 0x65D, - UHYVE_PORT_IBV_CREATE_RWQ_IND_TABLE = 0x65E, - UHYVE_PORT_IBV_DESTROY_RWQ_IND_TABLE = 0x65F, - UHYVE_PORT_IBV_POST_SEND = 0x660, - UHYVE_PORT_IBV_POST_RECV = 0x661, - UHYVE_PORT_IBV_CREATE_AH = 0x662, - UHYVE_PORT_IBV_INIT_AH_FROM_WC = 0x663, - UHYVE_PORT_IBV_CREATE_AH_FROM_WC = 0x664, - UHYVE_PORT_IBV_DESTROY_AH = 0x665, - UHYVE_PORT_IBV_ATTACH_MCAST = 0x666, - UHYVE_PORT_IBV_DETACH_MCAST = 0x667, - UHYVE_PORT_IBV_FORK_INIT = 0x668, - UHYVE_PORT_IBV_NODE_TYPE_STR = 0x669, - UHYVE_PORT_IBV_PORT_STATE_STR = 0x66A, - UHYVE_PORT_IBV_EVENT_TYPE_STR = 0x66B, - // UHYVE_PORT_IBV_RESOLVE_ETH_L2_FROM_GID = 0x66C, - UHYVE_PORT_IBV_IS_QPT_SUPPORTED = 0x66D, + UHYVE_PORT_IBV_RATE_TO_MULT = 0x611, + UHYVE_PORT_MULT_TO_IBV_RATE = 0x612, + UHYVE_PORT_IBV_RATE_TO_MBPS = 0x613, + UHYVE_PORT_MBPS_TO_IBV_RATE = 0x614, + UHYVE_PORT_IBV_CQ_EX_TO_CQ = 0x615, + UHYVE_PORT_IBV_START_POLL = 0x616, + UHYVE_PORT_IBV_NEXT_POLL = 0x617, + UHYVE_PORT_IBV_END_POLL = 0x618, + UHYVE_PORT_IBV_WC_READ_OPCODE = 0x619, + UHYVE_PORT_IBV_WC_READ_VENDOR_ERR = 0x61A, + UHYVE_PORT_IBV_WC_READ_BYTE_LEN = 0x61B, + UHYVE_PORT_IBV_WC_READ_IMM_DATA = 0x61C, + UHYVE_PORT_IBV_WC_READ_INVALIDATED_RKEY = 0x61D, + UHYVE_PORT_IBV_WC_READ_QP_NUM = 0x61E, + UHYVE_PORT_IBV_WC_READ_SRC_QP = 0x61F, + UHYVE_PORT_IBV_WC_READ_WC_FLAGS = 0x620, + UHYVE_PORT_IBV_WC_READ_SLID = 0x621, + UHYVE_PORT_IBV_WC_READ_SL = 0x622, + UHYVE_PORT_IBV_WC_READ_DLID_PATH_BITS = 0x623, + UHYVE_PORT_IBV_WC_READ_COMPLETION_TS = 0x624, + UHYVE_PORT_IBV_WC_READ_CVLAN = 0x625, + UHYVE_PORT_IBV_WC_READ_FLOW_TAG = 0x626, + UHYVE_PORT_IBV_POST_WQ_RECV = 0x627, + UHYVE_PORT_IBV_GET_DEVICE_LIST = 0x628, + UHYVE_PORT_IBV_FREE_DEVICE_LIST = 0x629, + UHYVE_PORT_IBV_GET_DEVICE_NAME = 0x62A, + UHYVE_PORT_IBV_GET_DEVICE_GUID = 0x62B, + UHYVE_PORT_IBV_OPEN_DEVICE = 0x62C, + UHYVE_PORT_IBV_CLOSE_DEVICE = 0x62D, + UHYVE_PORT_IBV_GET_ASYNC_EVENT = 0x62E, + UHYVE_PORT_IBV_ACK_ASYNC_EVENT = 0x62F, + UHYVE_PORT_IBV_QUERY_DEVICE = 0x630, + UHYVE_PORT_IBV_QUERY_PORT = 0x631, + UHYVE_PORT_IBV_QUERY_GID = 0x632, + UHYVE_PORT_IBV_QUERY_PKEY = 0x633, + UHYVE_PORT_IBV_ALLOC_PD = 0x634, + UHYVE_PORT_IBV_DEALLOC_PD = 0x635, + UHYVE_PORT_IBV_CREATE_FLOW = 0x636, + UHYVE_PORT_IBV_DESTROY_FLOW = 0x637, + UHYVE_PORT_IBV_OPEN_XRCD = 0x638, + UHYVE_PORT_IBV_CLOSE_XRCD = 0x639, + UHYVE_PORT_IBV_REG_MR = 0x63A, + UHYVE_PORT_IBV_REREG_MR = 0x63B, + UHYVE_PORT_IBV_DEREG_MR = 0x63C, + UHYVE_PORT_IBV_ALLOC_MW = 0x63D, + UHYVE_PORT_IBV_DEALLOC_MW = 0x63E, + UHYVE_PORT_IBV_INC_RKEY = 0x63F, + UHYVE_PORT_IBV_BIND_MW = 0x640, + UHYVE_PORT_IBV_CREATE_COMP_CHANNEL = 0x641, + UHYVE_PORT_IBV_DESTROY_COMP_CHANNEL = 0x642, + UHYVE_PORT_IBV_CREATE_CQ = 0x643, + UHYVE_PORT_IBV_CREATE_CQ_EX = 0x644, + UHYVE_PORT_IBV_RESIZE_CQ = 0x645, + UHYVE_PORT_IBV_DESTROY_CQ = 0x646, + UHYVE_PORT_IBV_GET_CQ_EVENT = 0x647, + UHYVE_PORT_IBV_ACK_CQ_EVENTS = 0x648, + UHYVE_PORT_IBV_POLL_CQ = 0x649, + UHYVE_PORT_IBV_REQ_NOTIFY_CQ = 0x64A, + UHYVE_PORT_IBV_CREATE_SRQ = 0x64B, + UHYVE_PORT_IBV_CREATE_SRQ_EX = 0x64C, + UHYVE_PORT_IBV_MODIFY_SRQ = 0x64D, + UHYVE_PORT_IBV_QUERY_SRQ = 0x64E, + UHYVE_PORT_IBV_GET_SRQ_NUM = 0x64F, + UHYVE_PORT_IBV_DESTROY_SRQ = 0x650, + UHYVE_PORT_IBV_POST_SRQ_RECV = 0x651, + UHYVE_PORT_IBV_CREATE_QP = 0x652, + UHYVE_PORT_IBV_CREATE_QP_EX = 0x653, + UHYVE_PORT_IBV_QUERY_RT_VALUES_EX = 0x654, + UHYVE_PORT_IBV_QUERY_DEVICE_EX = 0x655, + UHYVE_PORT_IBV_OPEN_QP = 0x656, + UHYVE_PORT_IBV_MODIFY_QP = 0x657, + UHYVE_PORT_IBV_QUERY_QP = 0x658, + UHYVE_PORT_IBV_DESTROY_QP = 0x659, + UHYVE_PORT_IBV_CREATE_WQ = 0x65A, + UHYVE_PORT_IBV_MODIFY_WQ = 0x65B, + UHYVE_PORT_IBV_DESTROY_WQ = 0x65C, + UHYVE_PORT_IBV_CREATE_RWQ_IND_TABLE = 0x65D, + UHYVE_PORT_IBV_DESTROY_RWQ_IND_TABLE = 0x65E, + UHYVE_PORT_IBV_POST_SEND = 0x65F, + UHYVE_PORT_IBV_POST_RECV = 0x660, + UHYVE_PORT_IBV_CREATE_AH = 0x661, + UHYVE_PORT_IBV_INIT_AH_FROM_WC = 0x662, + UHYVE_PORT_IBV_CREATE_AH_FROM_WC = 0x663, + UHYVE_PORT_IBV_DESTROY_AH = 0x664, + UHYVE_PORT_IBV_ATTACH_MCAST = 0x665, + UHYVE_PORT_IBV_DETACH_MCAST = 0x666, + UHYVE_PORT_IBV_FORK_INIT = 0x667, + // UHYVE_PORT_IBV_RESOLVE_ETH_L2_FROM_GID = 0x668, + UHYVE_PORT_IBV_IS_QPT_SUPPORTED = 0x669, } uhyve_ibv_t; -typedef struct { - // Parameters: - enum ibv_wc_status status; - // Return value: - const char * ret; -} __attribute__((packed)) uhyve_ibv_wc_status_str_t; typedef struct { // Parameters: @@ -831,27 +821,6 @@ typedef struct { int ret; } __attribute__((packed)) uhyve_ibv_fork_init_t; -typedef struct { - // Parameters: - enum ibv_node_type node_type; - // Return value: - const char * ret; -} __attribute__((packed)) uhyve_ibv_node_type_str_t; - -typedef struct { - // Parameters: - enum ibv_port_state port_state; - // Return value: - const char * ret; -} __attribute__((packed)) uhyve_ibv_port_state_str_t; - -typedef struct { - // Parameters: - enum ibv_event_type event; - // Return value: - const char * ret; -} __attribute__((packed)) uhyve_ibv_event_type_str_t; - // typedef struct { // // Parameters: // struct ibv_context * context; @@ -872,7 +841,6 @@ typedef struct { -void call_ibv_wc_status_str (struct kvm_run * run, uint8_t * guest_mem); void call_ibv_rate_to_mult (struct kvm_run * run, uint8_t * guest_mem); void call_mult_to_ibv_rate (struct kvm_run * run, uint8_t * guest_mem); void call_ibv_rate_to_mbps (struct kvm_run * run, uint8_t * guest_mem); @@ -960,9 +928,6 @@ void call_ibv_destroy_ah (struct kvm_run * run, uint8_t * guest_me void call_ibv_attach_mcast (struct kvm_run * run, uint8_t * guest_mem); void call_ibv_detach_mcast (struct kvm_run * run, uint8_t * guest_mem); void call_ibv_fork_init (struct kvm_run * run, uint8_t * guest_mem); -void call_ibv_node_type_str (struct kvm_run * run, uint8_t * guest_mem); -void call_ibv_port_state_str (struct kvm_run * run, uint8_t * guest_mem); -void call_ibv_event_type_str (struct kvm_run * run, uint8_t * guest_mem); // void call_ibv_resolve_eth_l2_from_gid(struct kvm_run * run, uint8_t * guest_mem); void call_ibv_is_qpt_supported (struct kvm_run * run, uint8_t * guest_mem); diff --git a/tools/uhyve.c b/tools/uhyve.c index 153648c93..df53c2bc9 100644 --- a/tools/uhyve.c +++ b/tools/uhyve.c @@ -1276,9 +1276,6 @@ static int vcpu_loop(void) call_ibv_post_srq_recv(run, guest_mem); break; - case UHYVE_PORT_IBV_WC_STATUS_STR: - call_ibv_wc_status_str(run, guest_mem); - break; case UHYVE_PORT_IBV_RATE_TO_MULT: call_ibv_rate_to_mult(run, guest_mem); break; @@ -1525,15 +1522,6 @@ static int vcpu_loop(void) case UHYVE_PORT_IBV_FORK_INIT: call_ibv_fork_init(run, guest_mem); break; - case UHYVE_PORT_IBV_NODE_TYPE_STR: - call_ibv_node_type_str(run, guest_mem); - break; - case UHYVE_PORT_IBV_PORT_STATE_STR: - call_ibv_port_state_str(run, guest_mem); - break; - case UHYVE_PORT_IBV_EVENT_TYPE_STR: - call_ibv_event_type_str(run, guest_mem); - break; /* case UHYVE_PORT_IBV_RESOLVE_ETH_L2_FROM_GID: */ /* call_ibv_resolve_eth_l2_from_gid(run, guest_mem); */ /* break; */ diff --git a/usr/tests/CMakeLists.txt b/usr/tests/CMakeLists.txt index 35aefb392..85156435a 100644 --- a/usr/tests/CMakeLists.txt +++ b/usr/tests/CMakeLists.txt @@ -10,8 +10,18 @@ add_executable(hello++ hello++.cpp) add_executable(hellof hellof.f90) add_executable(pi pi.go) -#add_executable(ib-pingpong ib/pingpong.c ib/pingpong.h ib/pingpong-ud.c) +# InfiniBand Pingpongs + +# Reliable Connected add_executable(ib-rc-pingpong ib/pingpong.c ib/rc_pingpong.c) +# Unrealiable Datagram +add_executable(ib-ud-pingpong ib/pingpong.c ib/ud_pingpong.c) +# Unreliable Connected +add_executable(ib-uc-pingpong ib/pingpong.c ib/uc_pingpong.c) +# Shared Receive Queue +add_executable(ib-srq-pingpong ib/pingpong.c ib/srq_pingpong.c) +# Shared Receive Queue, eXtended Reliable Connected +add_executable(ib-xsrq-pingpong ib/pingpong.c ib/xsrq_pingpong.c) add_executable(test-malloc test-malloc.c) add_executable(test-malloc-mt test-malloc-mt.c) diff --git a/usr/tests/ib/pingpong.c b/usr/tests/ib/pingpong.c index 468eddcbd..673954e93 100644 --- a/usr/tests/ib/pingpong.c +++ b/usr/tests/ib/pingpong.c @@ -1,6 +1,7 @@ /* - * Copyright (c) 2006 Cisco Systems. All rights reserved. - * 2018 Annika Wierichs, RWTH Aachen. All rights reserved. + * Copyright (c) 2006 Cisco Systems. + * 2018 Annika Wierichs, RWTH Aachen. + * All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/usr/tests/ib/pingpong.h b/usr/tests/ib/pingpong.h index f9a8f0b8d..309a1ee68 100644 --- a/usr/tests/ib/pingpong.h +++ b/usr/tests/ib/pingpong.h @@ -1,6 +1,7 @@ /* - * Copyright (c) 2006 Cisco Systems. All rights reserved. - * 2018 Annika Wierichs, RWTH Aachen. All rights reserved. + * Copyright (c) 2006 Cisco Systems. + * 2018 Annika Wierichs, RWTH Aachen. + * All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/usr/tests/ib/rc_pingpong.c b/usr/tests/ib/rc_pingpong.c index 049df1eb2..84f5e4e86 100644 --- a/usr/tests/ib/rc_pingpong.c +++ b/usr/tests/ib/rc_pingpong.c @@ -1,6 +1,8 @@ /* - * Copyright (c) 2005 Topspin Communications. All rights reserved. - * 2018 Annika Wierichs, RWTH Aachen. All rights reserved. + * Copyright (c) 2005 Topspin Communications. + * 2018 Annika Wierichs, RWTH Aachen. + * All rights reserved. + * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file diff --git a/usr/tests/ib/srq_pingpong.c b/usr/tests/ib/srq_pingpong.c new file mode 100644 index 000000000..c5db6ade8 --- /dev/null +++ b/usr/tests/ib/srq_pingpong.c @@ -0,0 +1,995 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#define _GNU_SOURCE +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pingpong.h" + +enum { + PINGPONG_RECV_WRID = 1, + PINGPONG_SEND_WRID = 2, + + MAX_QP = 256, +}; + +static int page_size; +static int validate_buf; + +struct pingpong_context { + struct ibv_context *context; + struct ibv_comp_channel *channel; + struct ibv_pd *pd; + struct ibv_mr *mr; + struct ibv_cq *cq; + struct ibv_srq *srq; + struct ibv_qp *qp[MAX_QP]; + char *buf; + int size; + int send_flags; + int num_qp; + int rx_depth; + int pending[MAX_QP]; + struct ibv_port_attr portinfo; +}; + +struct pingpong_dest { + int lid; + int qpn; + int psn; + union ibv_gid gid; +}; + +static int pp_connect_ctx(struct pingpong_context *ctx, int port, enum ibv_mtu mtu, + int sl, const struct pingpong_dest *my_dest, + const struct pingpong_dest *dest, int sgid_idx) +{ + int i; + + for (i = 0; i < ctx->num_qp; ++i) { + struct ibv_qp_attr attr = { + .qp_state = IBV_QPS_RTR, + .path_mtu = mtu, + .dest_qp_num = dest[i].qpn, + .rq_psn = dest[i].psn, + .max_dest_rd_atomic = 1, + .min_rnr_timer = 12, + .ah_attr = { + .is_global = 0, + .dlid = dest[i].lid, + .sl = sl, + .src_path_bits = 0, + .port_num = port + } + }; + + if (dest->gid.global.interface_id) { + attr.ah_attr.is_global = 1; + attr.ah_attr.grh.hop_limit = 1; + attr.ah_attr.grh.dgid = dest->gid; + attr.ah_attr.grh.sgid_index = sgid_idx; + } + if (ibv_modify_qp(ctx->qp[i], &attr, + IBV_QP_STATE | + IBV_QP_AV | + IBV_QP_PATH_MTU | + IBV_QP_DEST_QPN | + IBV_QP_RQ_PSN | + IBV_QP_MAX_DEST_RD_ATOMIC | + IBV_QP_MIN_RNR_TIMER)) { + fprintf(stderr, "Failed to modify QP[%d] to RTR\n", i); + return 1; + } + + attr.qp_state = IBV_QPS_RTS; + attr.timeout = 14; + attr.retry_cnt = 7; + attr.rnr_retry = 7; + attr.sq_psn = my_dest[i].psn; + attr.max_rd_atomic = 1; + if (ibv_modify_qp(ctx->qp[i], &attr, + IBV_QP_STATE | + IBV_QP_TIMEOUT | + IBV_QP_RETRY_CNT | + IBV_QP_RNR_RETRY | + IBV_QP_SQ_PSN | + IBV_QP_MAX_QP_RD_ATOMIC)) { + fprintf(stderr, "Failed to modify QP[%d] to RTS\n", i); + return 1; + } + } + + return 0; +} + +static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port, + const struct pingpong_dest *my_dest) +{ + struct addrinfo *res, *t; + struct addrinfo hints = { + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM + }; + char *service; + char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; + int n; + int r; + int i; + int sockfd = -1; + struct pingpong_dest *rem_dest = NULL; + char gid[33]; + + if (asprintf(&service, "%d", port) < 0) + return NULL; + + n = getaddrinfo(servername, service, &hints, &res); + + if (n < 0) { + fprintf(stderr, "Error for %s:%d\n", servername, port); + free(service); + return NULL; + } + + for (t = res; t; t = t->ai_next) { + sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); + if (sockfd >= 0) { + if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) + break; + close(sockfd); + sockfd = -1; + } + } + + freeaddrinfo(res); + free(service); + + if (sockfd < 0) { + fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); + return NULL; + } + + for (i = 0; i < MAX_QP; ++i) { + gid_to_wire_gid(&my_dest[i].gid, gid); + sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid, + my_dest[i].qpn, my_dest[i].psn, gid); + if (write(sockfd, msg, sizeof msg) != sizeof msg) { + fprintf(stderr, "Couldn't send local address\n"); + goto out; + } + } + + rem_dest = malloc(MAX_QP * sizeof *rem_dest); + if (!rem_dest) + goto out; + + for (i = 0; i < MAX_QP; ++i) { + n = 0; + while (n < sizeof msg) { + r = read(sockfd, msg + n, sizeof msg - n); + if (r < 0) { + perror("client read"); + fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n", + n, (int) sizeof msg, i); + goto out; + } + n += r; + } + + sscanf(msg, "%x:%x:%x:%s", &rem_dest[i].lid, &rem_dest[i].qpn, + &rem_dest[i].psn, gid); + wire_gid_to_gid(gid, &rem_dest[i].gid); + } + + if (write(sockfd, "done", sizeof "done") != sizeof "done") { + perror("client write"); + goto out; + } +out: + close(sockfd); + return rem_dest; +} + +static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx, + int ib_port, enum ibv_mtu mtu, + int port, int sl, + const struct pingpong_dest *my_dest, + int sgid_idx) +{ + struct addrinfo *res, *t; + struct addrinfo hints = { + .ai_flags = AI_PASSIVE, + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM + }; + char *service; + char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; + int n; + int r; + int i; + int sockfd = -1, connfd; + struct pingpong_dest *rem_dest = NULL; + char gid[33]; + + if (asprintf(&service, "%d", port) < 0) + return NULL; + + n = getaddrinfo(NULL, service, &hints, &res); + + if (n < 0) { + fprintf(stderr, "Error for port %d\n", port); + free(service); + return NULL; + } + + for (t = res; t; t = t->ai_next) { + sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); + if (sockfd >= 0) { + n = 1; + + setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); + + if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) + break; + close(sockfd); + sockfd = -1; + } + } + + freeaddrinfo(res); + free(service); + + if (sockfd < 0) { + fprintf(stderr, "Couldn't listen to port %d\n", port); + return NULL; + } + + listen(sockfd, 1); + connfd = accept(sockfd, NULL, NULL); + close(sockfd); + if (connfd < 0) { + fprintf(stderr, "accept() failed\n"); + return NULL; + } + + rem_dest = malloc(MAX_QP * sizeof *rem_dest); + if (!rem_dest) + goto out; + + for (i = 0; i < MAX_QP; ++i) { + n = 0; + while (n < sizeof msg) { + r = read(connfd, msg + n, sizeof msg - n); + if (r < 0) { + perror("server read"); + fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n", + n, (int) sizeof msg, i); + goto out; + } + n += r; + } + + sscanf(msg, "%x:%x:%x:%s", &rem_dest[i].lid, &rem_dest[i].qpn, + &rem_dest[i].psn, gid); + wire_gid_to_gid(gid, &rem_dest[i].gid); + } + + if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest, + sgid_idx)) { + fprintf(stderr, "Couldn't connect to remote QP\n"); + free(rem_dest); + rem_dest = NULL; + goto out; + } + + for (i = 0; i < MAX_QP; ++i) { + gid_to_wire_gid(&my_dest[i].gid, gid); + sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid, + my_dest[i].qpn, my_dest[i].psn, gid); + if (write(connfd, msg, sizeof msg) != sizeof msg) { + fprintf(stderr, "Couldn't send local address\n"); + free(rem_dest); + rem_dest = NULL; + goto out; + } + } + + if (read(connfd, msg, sizeof msg) != sizeof "done") { + perror("client write"); + free(rem_dest); + rem_dest = NULL; + goto out; + } + +out: + close(connfd); + return rem_dest; +} + +static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, + int num_qp, int rx_depth, int port, + int use_event) +{ + struct pingpong_context *ctx; + int i; + + ctx = calloc(1, sizeof *ctx); + if (!ctx) + return NULL; + + ctx->size = size; + ctx->send_flags = IBV_SEND_SIGNALED; + ctx->num_qp = num_qp; + ctx->rx_depth = rx_depth; + + ctx->buf = memalign(page_size, size); + if (!ctx->buf) { + fprintf(stderr, "Couldn't allocate work buf.\n"); + goto clean_ctx; + } + + memset(ctx->buf, 0, size); + + ctx->context = ibv_open_device(ib_dev); + if (!ctx->context) { + fprintf(stderr, "Couldn't get context for %s\n", + ibv_get_device_name(ib_dev)); + goto clean_buffer; + } + + if (use_event) { + ctx->channel = ibv_create_comp_channel(ctx->context); + if (!ctx->channel) { + fprintf(stderr, "Couldn't create completion channel\n"); + goto clean_device; + } + } else + ctx->channel = NULL; + + ctx->pd = ibv_alloc_pd(ctx->context); + if (!ctx->pd) { + fprintf(stderr, "Couldn't allocate PD\n"); + goto clean_comp_channel; + } + + ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE); + if (!ctx->mr) { + fprintf(stderr, "Couldn't register MR\n"); + goto clean_pd; + } + + ctx->cq = ibv_create_cq(ctx->context, rx_depth + num_qp, NULL, + ctx->channel, 0); + if (!ctx->cq) { + fprintf(stderr, "Couldn't create CQ\n"); + goto clean_mr; + } + + { + struct ibv_srq_init_attr attr = { + .attr = { + .max_wr = rx_depth, + .max_sge = 1 + } + }; + + ctx->srq = ibv_create_srq(ctx->pd, &attr); + if (!ctx->srq) { + fprintf(stderr, "Couldn't create SRQ\n"); + goto clean_cq; + } + } + + for (i = 0; i < num_qp; ++i) { + struct ibv_qp_attr attr; + struct ibv_qp_init_attr init_attr = { + .send_cq = ctx->cq, + .recv_cq = ctx->cq, + .srq = ctx->srq, + .cap = { + .max_send_wr = 1, + .max_send_sge = 1, + }, + .qp_type = IBV_QPT_RC + }; + + ctx->qp[i] = ibv_create_qp(ctx->pd, &init_attr); + if (!ctx->qp[i]) { + fprintf(stderr, "Couldn't create QP[%d]\n", i); + goto clean_qps; + } + ibv_query_qp(ctx->qp[i], &attr, IBV_QP_CAP, &init_attr); + if (init_attr.cap.max_inline_data >= size) { + ctx->send_flags |= IBV_SEND_INLINE; + } + } + + for (i = 0; i < num_qp; ++i) { + struct ibv_qp_attr attr = { + .qp_state = IBV_QPS_INIT, + .pkey_index = 0, + .port_num = port, + .qp_access_flags = 0 + }; + + if (ibv_modify_qp(ctx->qp[i], &attr, + IBV_QP_STATE | + IBV_QP_PKEY_INDEX | + IBV_QP_PORT | + IBV_QP_ACCESS_FLAGS)) { + fprintf(stderr, "Failed to modify QP[%d] to INIT\n", i); + goto clean_qps_full; + } + } + + return ctx; + +clean_qps_full: + i = num_qp; + +clean_qps: + for (--i; i >= 0; --i) + ibv_destroy_qp(ctx->qp[i]); + + ibv_destroy_srq(ctx->srq); + +clean_cq: + ibv_destroy_cq(ctx->cq); + +clean_mr: + ibv_dereg_mr(ctx->mr); + +clean_pd: + ibv_dealloc_pd(ctx->pd); + +clean_comp_channel: + if (ctx->channel) + ibv_destroy_comp_channel(ctx->channel); + +clean_device: + ibv_close_device(ctx->context); + +clean_buffer: + free(ctx->buf); + +clean_ctx: + free(ctx); + + return NULL; +} + +static int pp_close_ctx(struct pingpong_context *ctx, int num_qp) +{ + int i; + + for (i = 0; i < num_qp; ++i) { + if (ibv_destroy_qp(ctx->qp[i])) { + fprintf(stderr, "Couldn't destroy QP[%d]\n", i); + return 1; + } + } + + if (ibv_destroy_srq(ctx->srq)) { + fprintf(stderr, "Couldn't destroy SRQ\n"); + return 1; + } + + if (ibv_destroy_cq(ctx->cq)) { + fprintf(stderr, "Couldn't destroy CQ\n"); + return 1; + } + + if (ibv_dereg_mr(ctx->mr)) { + fprintf(stderr, "Couldn't deregister MR\n"); + return 1; + } + + if (ibv_dealloc_pd(ctx->pd)) { + fprintf(stderr, "Couldn't deallocate PD\n"); + return 1; + } + + if (ctx->channel) { + if (ibv_destroy_comp_channel(ctx->channel)) { + fprintf(stderr, "Couldn't destroy completion channel\n"); + return 1; + } + } + + if (ibv_close_device(ctx->context)) { + fprintf(stderr, "Couldn't release context\n"); + return 1; + } + + free(ctx->buf); + free(ctx); + + return 0; +} + +static int pp_post_recv(struct pingpong_context *ctx, int n) +{ + struct ibv_sge list = { + .addr = (uintptr_t) ctx->buf, + .length = ctx->size, + .lkey = ctx->mr->lkey + }; + struct ibv_recv_wr wr = { + .wr_id = PINGPONG_RECV_WRID, + .sg_list = &list, + .num_sge = 1, + }; + struct ibv_recv_wr *bad_wr; + int i; + + for (i = 0; i < n; ++i) + if (ibv_post_srq_recv(ctx->srq, &wr, &bad_wr)) + break; + + return i; +} + +static int pp_post_send(struct pingpong_context *ctx, int qp_index) +{ + struct ibv_sge list = { + .addr = (uintptr_t) ctx->buf, + .length = ctx->size, + .lkey = ctx->mr->lkey + }; + struct ibv_send_wr wr = { + .wr_id = PINGPONG_SEND_WRID, + .sg_list = &list, + .num_sge = 1, + .opcode = IBV_WR_SEND, + .send_flags = ctx->send_flags, + }; + struct ibv_send_wr *bad_wr; + + return ibv_post_send(ctx->qp[qp_index], &wr, &bad_wr); +} + +static int find_qp(int qpn, struct pingpong_context *ctx, int num_qp) +{ + int i; + + for (i = 0; i < num_qp; ++i) + if (ctx->qp[i]->qp_num == qpn) + return i; + + return -1; +} + +static void usage(const char *argv0) +{ + printf("Usage:\n"); + printf(" %s start a server and wait for connection\n", argv0); + printf(" %s connect to server at \n", argv0); + printf("\n"); + printf("Options:\n"); + printf(" -p, --port= listen on/connect to port (default 18515)\n"); + printf(" -d, --ib-dev= use IB device (default first device found)\n"); + printf(" -i, --ib-port= use port of IB device (default 1)\n"); + printf(" -s, --size= size of message to exchange (default 4096)\n"); + printf(" -m, --mtu= path MTU (default 1024)\n"); + printf(" -q, --num-qp= number of QPs to use (default 16)\n"); + printf(" -r, --rx-depth= number of receives to post at a time (default 500)\n"); + printf(" -n, --iters= number of exchanges per QP(default 1000)\n"); + printf(" -l, --sl= service level value\n"); + printf(" -e, --events sleep on CQ events (default poll)\n"); + printf(" -g, --gid-idx= local port gid index\n"); + printf(" -c, --chk validate received buffer\n"); +} + +int main(int argc, char *argv[]) +{ + struct ibv_device **dev_list; + struct ibv_device *ib_dev; + struct ibv_wc *wc; + struct pingpong_context *ctx; + struct pingpong_dest my_dest[MAX_QP]; + struct pingpong_dest *rem_dest; + struct timeval start, end; + char *ib_devname = NULL; + char *servername = NULL; + unsigned int port = 18515; + int ib_port = 1; + unsigned int size = 4096; + enum ibv_mtu mtu = IBV_MTU_1024; + unsigned int num_qp = 16; + unsigned int rx_depth = 500; + unsigned int iters = 1000; + int use_event = 0; + int routs; + int rcnt, scnt; + int num_wc; + int i; + int num_cq_events = 0; + int sl = 0; + int gidx = -1; + char gid[33]; + + srand48(getpid() * time(NULL)); + + while (1) { + int c; + + static struct option long_options[] = { + { .name = "port", .has_arg = 1, .val = 'p' }, + { .name = "ib-dev", .has_arg = 1, .val = 'd' }, + { .name = "ib-port", .has_arg = 1, .val = 'i' }, + { .name = "size", .has_arg = 1, .val = 's' }, + { .name = "mtu", .has_arg = 1, .val = 'm' }, + { .name = "num-qp", .has_arg = 1, .val = 'q' }, + { .name = "rx-depth", .has_arg = 1, .val = 'r' }, + { .name = "iters", .has_arg = 1, .val = 'n' }, + { .name = "sl", .has_arg = 1, .val = 'l' }, + { .name = "events", .has_arg = 0, .val = 'e' }, + { .name = "gid-idx", .has_arg = 1, .val = 'g' }, + { .name = "chk", .has_arg = 0, .val = 'c' }, + {} + }; + + c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:eg:c:", + long_options, NULL); + if (c == -1) + break; + + switch (c) { + case 'p': + port = strtoul(optarg, NULL, 0); + if (port > 65535) { + usage(argv[0]); + return 1; + } + break; + + case 'd': + ib_devname = strdupa(optarg); + break; + + case 'i': + ib_port = strtol(optarg, NULL, 0); + if (ib_port < 1) { + usage(argv[0]); + return 1; + } + break; + + case 's': + size = strtoul(optarg, NULL, 0); + if (size < 1) { + usage(argv[0]); + return 1; + } + break; + + case 'm': + mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0)); + if (mtu == 0) { + usage(argv[0]); + return 1; + } + break; + + case 'q': + num_qp = strtoul(optarg, NULL, 0); + break; + + case 'r': + rx_depth = strtoul(optarg, NULL, 0); + break; + + case 'n': + iters = strtoul(optarg, NULL, 0); + break; + + case 'l': + sl = strtol(optarg, NULL, 0); + break; + + case 'e': + ++use_event; + break; + + case 'g': + gidx = strtol(optarg, NULL, 0); + break; + + case 'c': + validate_buf = 1; + break; + + default: + usage(argv[0]); + return 1; + } + } + + if (optind == argc - 1) + servername = strdupa(argv[optind]); + else if (optind < argc) { + usage(argv[0]); + return 1; + } + + if (num_qp > rx_depth) { + fprintf(stderr, "rx_depth %d is too small for %d QPs -- " + "must have at least one receive per QP.\n", + rx_depth, num_qp); + return 1; + } + + num_wc = num_qp + rx_depth; + wc = alloca(num_wc * sizeof *wc); + + page_size = PAGE_SIZE; + + dev_list = ibv_get_device_list(NULL); + if (!dev_list) { + perror("Failed to get IB devices list"); + return 1; + } + + if (!ib_devname) { + ib_dev = *dev_list; + if (!ib_dev) { + fprintf(stderr, "No IB devices found\n"); + return 1; + } + } else { + for (i = 0; dev_list[i]; ++i) + if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname)) + break; + ib_dev = dev_list[i]; + if (!ib_dev) { + fprintf(stderr, "IB device %s not found\n", ib_devname); + return 1; + } + } + + ctx = pp_init_ctx(ib_dev, size, num_qp, rx_depth, ib_port, use_event); + if (!ctx) + return 1; + + routs = pp_post_recv(ctx, ctx->rx_depth); + if (routs < ctx->rx_depth) { + fprintf(stderr, "Couldn't post receive (%d)\n", routs); + return 1; + } + + if (use_event) + if (ibv_req_notify_cq(ctx->cq, 0)) { + fprintf(stderr, "Couldn't request CQ notification\n"); + return 1; + } + + memset(my_dest, 0, sizeof my_dest); + + if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) { + fprintf(stderr, "Couldn't get port info\n"); + return 1; + } + for (i = 0; i < num_qp; ++i) { + my_dest[i].qpn = ctx->qp[i]->qp_num; + my_dest[i].psn = lrand48() & 0xffffff; + my_dest[i].lid = ctx->portinfo.lid; + if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET + && !my_dest[i].lid) { + fprintf(stderr, "Couldn't get local LID\n"); + return 1; + } + + if (gidx >= 0) { + if (ibv_query_gid(ctx->context, ib_port, gidx, + &my_dest[i].gid)) { + fprintf(stderr, "Could not get local gid for " + "gid index %d\n", gidx); + return 1; + } + } else + memset(&my_dest[i].gid, 0, sizeof my_dest[i].gid); + + printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, \n", + my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn); + } + + if (servername) + rem_dest = pp_client_exch_dest(servername, port, my_dest); + else + rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl, + my_dest, gidx); + + if (!rem_dest) + return 1; + + for (i = 0; i < num_qp; ++i) { + printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n", + rem_dest[i].lid, rem_dest[i].qpn, rem_dest[i].psn); + } + + if (servername) + if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest, gidx)) + return 1; + + if (servername) { + if (validate_buf) + for (i = 0; i < size; i += page_size) + ctx->buf[i] = i / page_size % sizeof(char); + + for (i = 0; i < num_qp; ++i) { + if (pp_post_send(ctx, i)) { + fprintf(stderr, "Couldn't post send\n"); + return 1; + } + ctx->pending[i] = PINGPONG_SEND_WRID | PINGPONG_RECV_WRID; + } + } else + for (i = 0; i < num_qp; ++i) + ctx->pending[i] = PINGPONG_RECV_WRID; + + if (gettimeofday(&start, NULL)) { + perror("gettimeofday"); + return 1; + } + + rcnt = scnt = 0; + while (rcnt < iters || scnt < iters) { + if (use_event) { + struct ibv_cq *ev_cq; + void *ev_ctx; + + if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) { + fprintf(stderr, "Failed to get cq_event\n"); + return 1; + } + + ++num_cq_events; + + if (ev_cq != ctx->cq) { + fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); + return 1; + } + + if (ibv_req_notify_cq(ctx->cq, 0)) { + fprintf(stderr, "Couldn't request CQ notification\n"); + return 1; + } + } + + { + int ne, qp_ind; + + do { + ne = ibv_poll_cq(ctx->cq, num_wc, wc); + if (ne < 0) { + fprintf(stderr, "poll CQ failed %d\n", ne); + return 1; + } + } while (!use_event && ne < 1); + + for (i = 0; i < ne; ++i) { + if (wc[i].status != IBV_WC_SUCCESS) { + fprintf(stderr, "Failed status %s (%d) for wr_id %d\n", + ibv_wc_status_str(wc[i].status), wc[i].status, (int) wc[i].wr_id); + return 1; + } + + qp_ind = find_qp(wc[i].qp_num, ctx, num_qp); + if (qp_ind < 0) { + fprintf(stderr, "Couldn't find QPN %06x\n", + wc[i].qp_num); + return 1; + } + + switch ((int) wc[i].wr_id) { + case PINGPONG_SEND_WRID: + ++scnt; + break; + + case PINGPONG_RECV_WRID: + if (--routs <= num_qp) { + routs += pp_post_recv(ctx, ctx->rx_depth - routs); + if (routs < ctx->rx_depth) { + fprintf(stderr, + "Couldn't post receive (%d)\n", + routs); + return 1; + } + } + + ++rcnt; + break; + + default: + fprintf(stderr, "Completion for unknown wr_id %d\n", + (int) wc[i].wr_id); + return 1; + } + + ctx->pending[qp_ind] &= ~(int) wc[i].wr_id; + if (scnt < iters && !ctx->pending[qp_ind]) { + if (pp_post_send(ctx, qp_ind)) { + fprintf(stderr, "Couldn't post send\n"); + return 1; + } + ctx->pending[qp_ind] = PINGPONG_RECV_WRID | + PINGPONG_SEND_WRID; + } + + } + } + } + + if (gettimeofday(&end, NULL)) { + perror("gettimeofday"); + return 1; + } + + { + float usec = (end.tv_sec - start.tv_sec) * 1000000 + + (end.tv_usec - start.tv_usec); + long long bytes = (long long) size * iters * 2; + + printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n", + bytes, usec / 1000000., bytes * 8. / usec); + printf("%d iters in %.2f seconds = %.2f usec/iter\n", + iters, usec / 1000000., usec / iters); + + if ((!servername) && (validate_buf)) { + for (i = 0; i < size; i += page_size) + if (ctx->buf[i] != i / page_size % sizeof(char)) + printf("invalid data in page %d\n", + i / page_size); + } + } + + ibv_ack_cq_events(ctx->cq, num_cq_events); + + if (pp_close_ctx(ctx, num_qp)) + return 1; + + ibv_free_device_list(dev_list); + free(rem_dest); + + return 0; +} diff --git a/usr/tests/ib/uc_pingpong.c b/usr/tests/ib/uc_pingpong.c new file mode 100644 index 000000000..d33b6a9ea --- /dev/null +++ b/usr/tests/ib/uc_pingpong.c @@ -0,0 +1,872 @@ +/* + * Copyright (c) 2005 Topspin Communications. + * 2018 Annika Wierichs, RWTH Aachen. + * All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pingpong.h" + +enum { + PINGPONG_RECV_WRID = 1, + PINGPONG_SEND_WRID = 2, +}; + +static int page_size; +static int validate_buf; + +struct pingpong_context { + struct ibv_context *context; + struct ibv_comp_channel *channel; + struct ibv_pd *pd; + struct ibv_mr *mr; + struct ibv_cq *cq; + struct ibv_qp *qp; + char *buf; + int size; + int send_flags; + int rx_depth; + int pending; + struct ibv_port_attr portinfo; +}; + +struct pingpong_dest { + int lid; + int qpn; + int psn; + union ibv_gid gid; +}; + +static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn, + enum ibv_mtu mtu, int sl, + struct pingpong_dest *dest, int sgid_idx) +{ + struct ibv_qp_attr attr = { + .qp_state = IBV_QPS_RTR, + .path_mtu = mtu, + .dest_qp_num = dest->qpn, + .rq_psn = dest->psn, + .ah_attr = { + .is_global = 0, + .dlid = dest->lid, + .sl = sl, + .src_path_bits = 0, + .port_num = port + } + }; + + if (dest->gid.global.interface_id) { + attr.ah_attr.is_global = 1; + attr.ah_attr.grh.hop_limit = 1; + attr.ah_attr.grh.dgid = dest->gid; + attr.ah_attr.grh.sgid_index = sgid_idx; + } + if (ibv_modify_qp(ctx->qp, &attr, + IBV_QP_STATE | + IBV_QP_AV | + IBV_QP_PATH_MTU | + IBV_QP_DEST_QPN | + IBV_QP_RQ_PSN)) { + fprintf(stderr, "Failed to modify QP to RTR\n"); + return 1; + } + + attr.qp_state = IBV_QPS_RTS; + attr.sq_psn = my_psn; + if (ibv_modify_qp(ctx->qp, &attr, + IBV_QP_STATE | + IBV_QP_SQ_PSN)) { + fprintf(stderr, "Failed to modify QP to RTS\n"); + return 1; + } + + return 0; +} + +static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port, + const struct pingpong_dest *my_dest) +{ + struct addrinfo *res, *t; + struct addrinfo hints = { + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM + }; + char *service; + char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; + int n; + int sockfd = -1; + struct pingpong_dest *rem_dest = NULL; + char gid[33]; + + if (asprintf(&service, "%d", port) < 0) + return NULL; + + n = getaddrinfo(servername, service, &hints, &res); + + if (n < 0) { + fprintf(stderr, "Error for %s:%d\n", servername, port); + free(service); + return NULL; + } + + for (t = res; t; t = t->ai_next) { + sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); + if (sockfd >= 0) { + if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) + break; + close(sockfd); + sockfd = -1; + } + } + + freeaddrinfo(res); + free(service); + + if (sockfd < 0) { + fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); + return NULL; + } + + gid_to_wire_gid(&my_dest->gid, gid); + sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, + my_dest->psn, gid); + if (write(sockfd, msg, sizeof msg) != sizeof msg) { + fprintf(stderr, "Couldn't send local address\n"); + goto out; + } + + if (read(sockfd, msg, sizeof msg) != sizeof msg || + write(sockfd, "done", sizeof "done") != sizeof "done") { + perror("client read/write"); + fprintf(stderr, "Couldn't read/write remote address\n"); + goto out; + } + + + rem_dest = malloc(sizeof *rem_dest); + if (!rem_dest) + goto out; + + sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, + &rem_dest->psn, gid); + wire_gid_to_gid(gid, &rem_dest->gid); + +out: + close(sockfd); + return rem_dest; +} + +static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx, + int ib_port, enum ibv_mtu mtu, + int port, int sl, + const struct pingpong_dest *my_dest, + int sgid_idx) +{ + struct addrinfo *res, *t; + struct addrinfo hints = { + .ai_flags = AI_PASSIVE, + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM + }; + char *service; + char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; + int n; + int sockfd = -1, connfd; + struct pingpong_dest *rem_dest = NULL; + char gid[33]; + + if (asprintf(&service, "%d", port) < 0) + return NULL; + + n = getaddrinfo(NULL, service, &hints, &res); + + if (n < 0) { + fprintf(stderr, "Error for port %d\n", port); + free(service); + return NULL; + } + + for (t = res; t; t = t->ai_next) { + sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); + if (sockfd >= 0) { + n = 1; + + setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); + + if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) + break; + close(sockfd); + sockfd = -1; + } + } + + freeaddrinfo(res); + free(service); + + if (sockfd < 0) { + fprintf(stderr, "Couldn't listen to port %d\n", port); + return NULL; + } + + listen(sockfd, 1); + connfd = accept(sockfd, NULL, NULL); + close(sockfd); + if (connfd < 0) { + fprintf(stderr, "accept() failed\n"); + return NULL; + } + + n = read(connfd, msg, sizeof msg); + if (n != sizeof msg) { + perror("server read"); + fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg); + goto out; + } + + rem_dest = malloc(sizeof *rem_dest); + if (!rem_dest) + goto out; + + sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, + &rem_dest->psn, gid); + wire_gid_to_gid(gid, &rem_dest->gid); + + if (pp_connect_ctx(ctx, ib_port, my_dest->psn, mtu, sl, rem_dest, + sgid_idx)) { + fprintf(stderr, "Couldn't connect to remote QP\n"); + free(rem_dest); + rem_dest = NULL; + goto out; + } + + + gid_to_wire_gid(&my_dest->gid, gid); + sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, + my_dest->psn, gid); + if (write(connfd, msg, sizeof msg) != sizeof msg || + read(connfd, msg, sizeof msg) != sizeof "done") { + fprintf(stderr, "Couldn't send/recv local address\n"); + free(rem_dest); + rem_dest = NULL; + goto out; + } + +out: + close(connfd); + return rem_dest; +} + +static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, + int rx_depth, int port, + int use_event) +{ + struct pingpong_context *ctx; + + ctx = calloc(1, sizeof *ctx); + if (!ctx) + return NULL; + + ctx->size = size; + ctx->send_flags = IBV_SEND_SIGNALED; + ctx->rx_depth = rx_depth; + + ctx->buf = memalign(page_size, size); + if (!ctx->buf) { + fprintf(stderr, "Couldn't allocate work buf.\n"); + goto clean_ctx; + } + + /* FIXME memset(ctx->buf, 0, size); */ + memset(ctx->buf, 0x7b, size); + + ctx->context = ibv_open_device(ib_dev); + if (!ctx->context) { + fprintf(stderr, "Couldn't get context for %s\n", + ibv_get_device_name(ib_dev)); + goto clean_buffer; + } + + if (use_event) { + ctx->channel = ibv_create_comp_channel(ctx->context); + if (!ctx->channel) { + fprintf(stderr, "Couldn't create completion channel\n"); + goto clean_device; + } + } else + ctx->channel = NULL; + + ctx->pd = ibv_alloc_pd(ctx->context); + if (!ctx->pd) { + fprintf(stderr, "Couldn't allocate PD\n"); + goto clean_comp_channel; + } + + ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE); + if (!ctx->mr) { + fprintf(stderr, "Couldn't register MR\n"); + goto clean_pd; + } + + ctx->cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL, + ctx->channel, 0); + if (!ctx->cq) { + fprintf(stderr, "Couldn't create CQ\n"); + goto clean_mr; + } + + { + struct ibv_qp_attr attr; + struct ibv_qp_init_attr init_attr = { + .send_cq = ctx->cq, + .recv_cq = ctx->cq, + .cap = { + .max_send_wr = 1, + .max_recv_wr = rx_depth, + .max_send_sge = 1, + .max_recv_sge = 1 + }, + .qp_type = IBV_QPT_UC + }; + + ctx->qp = ibv_create_qp(ctx->pd, &init_attr); + if (!ctx->qp) { + fprintf(stderr, "Couldn't create QP\n"); + goto clean_cq; + } + ibv_query_qp(ctx->qp, &attr, IBV_QP_CAP, &init_attr); + if (init_attr.cap.max_inline_data >= size) { + ctx->send_flags |= IBV_SEND_INLINE; + } + } + + { + struct ibv_qp_attr attr = { + .qp_state = IBV_QPS_INIT, + .pkey_index = 0, + .port_num = port, + .qp_access_flags = 0 + }; + + if (ibv_modify_qp(ctx->qp, &attr, + IBV_QP_STATE | + IBV_QP_PKEY_INDEX | + IBV_QP_PORT | + IBV_QP_ACCESS_FLAGS)) { + fprintf(stderr, "Failed to modify QP to INIT\n"); + goto clean_qp; + } + } + + return ctx; + +clean_qp: + ibv_destroy_qp(ctx->qp); + +clean_cq: + ibv_destroy_cq(ctx->cq); + +clean_mr: + ibv_dereg_mr(ctx->mr); + +clean_pd: + ibv_dealloc_pd(ctx->pd); + +clean_comp_channel: + if (ctx->channel) + ibv_destroy_comp_channel(ctx->channel); + +clean_device: + ibv_close_device(ctx->context); + +clean_buffer: + free(ctx->buf); + +clean_ctx: + free(ctx); + + return NULL; +} + +static int pp_close_ctx(struct pingpong_context *ctx) +{ + if (ibv_destroy_qp(ctx->qp)) { + fprintf(stderr, "Couldn't destroy QP\n"); + return 1; + } + + if (ibv_destroy_cq(ctx->cq)) { + fprintf(stderr, "Couldn't destroy CQ\n"); + return 1; + } + + if (ibv_dereg_mr(ctx->mr)) { + fprintf(stderr, "Couldn't deregister MR\n"); + return 1; + } + + if (ibv_dealloc_pd(ctx->pd)) { + fprintf(stderr, "Couldn't deallocate PD\n"); + return 1; + } + + if (ctx->channel) { + if (ibv_destroy_comp_channel(ctx->channel)) { + fprintf(stderr, "Couldn't destroy completion channel\n"); + return 1; + } + } + + if (ibv_close_device(ctx->context)) { + fprintf(stderr, "Couldn't release context\n"); + return 1; + } + + free(ctx->buf); + free(ctx); + + return 0; +} + +static int pp_post_recv(struct pingpong_context *ctx, int n) +{ + struct ibv_sge list = { + .addr = (uintptr_t) ctx->buf, + .length = ctx->size, + .lkey = ctx->mr->lkey + }; + struct ibv_recv_wr wr = { + .wr_id = PINGPONG_RECV_WRID, + .sg_list = &list, + .num_sge = 1, + }; + struct ibv_recv_wr *bad_wr; + int i; + + for (i = 0; i < n; ++i) + if (ibv_post_recv(ctx->qp, &wr, &bad_wr)) + break; + + return i; +} + +static int pp_post_send(struct pingpong_context *ctx) +{ + struct ibv_sge list = { + .addr = (uintptr_t) ctx->buf, + .length = ctx->size, + .lkey = ctx->mr->lkey + }; + struct ibv_send_wr wr = { + .wr_id = PINGPONG_SEND_WRID, + .sg_list = &list, + .num_sge = 1, + .opcode = IBV_WR_SEND, + .send_flags = ctx->send_flags, + }; + struct ibv_send_wr *bad_wr; + + return ibv_post_send(ctx->qp, &wr, &bad_wr); +} + +static void usage(const char *argv0) +{ + printf("Usage:\n"); + printf(" %s start a server and wait for connection\n", argv0); + printf(" %s connect to server at \n", argv0); + printf("\n"); + printf("Options:\n"); + printf(" -p, --port= listen on/connect to port (default 18515)\n"); + printf(" -d, --ib-dev= use IB device (default first device found)\n"); + printf(" -i, --ib-port= use port of IB device (default 1)\n"); + printf(" -s, --size= size of message to exchange (default 4096)\n"); + printf(" -m, --mtu= path MTU (default 1024)\n"); + printf(" -r, --rx-depth= number of receives to post at a time (default 500)\n"); + printf(" -n, --iters= number of exchanges (default 1000)\n"); + printf(" -l, --sl= service level value\n"); + printf(" -e, --events sleep on CQ events (default poll)\n"); + printf(" -g, --gid-idx= local port gid index\n"); + printf(" -c, --chk validate received buffer\n"); +} + +int main(int argc, char *argv[]) +{ + struct ibv_device **dev_list; + struct ibv_device *ib_dev; + struct pingpong_context *ctx; + struct pingpong_dest my_dest; + struct pingpong_dest *rem_dest; + struct timeval start, end; + char *ib_devname = NULL; + char *servername = NULL; + unsigned int port = 18515; + int ib_port = 1; + unsigned int size = 4096; + enum ibv_mtu mtu = IBV_MTU_1024; + unsigned int rx_depth = 500; + unsigned int iters = 1000; + int use_event = 0; + int routs; + int rcnt, scnt; + int num_cq_events = 0; + int sl = 0; + int gidx = -1; + char gid[33]; + + srand48(getpid() * time(NULL)); + + while (1) { + int c; + + static struct option long_options[] = { + { .name = "port", .has_arg = 1, .val = 'p' }, + { .name = "ib-dev", .has_arg = 1, .val = 'd' }, + { .name = "ib-port", .has_arg = 1, .val = 'i' }, + { .name = "size", .has_arg = 1, .val = 's' }, + { .name = "mtu", .has_arg = 1, .val = 'm' }, + { .name = "rx-depth", .has_arg = 1, .val = 'r' }, + { .name = "iters", .has_arg = 1, .val = 'n' }, + { .name = "sl", .has_arg = 1, .val = 'l' }, + { .name = "events", .has_arg = 0, .val = 'e' }, + { .name = "gid-idx", .has_arg = 1, .val = 'g' }, + { .name = "chk", .has_arg = 0, .val = 'c' }, + {} + }; + + c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:c:", + long_options, NULL); + if (c == -1) + break; + + switch (c) { + case 'p': + port = strtoul(optarg, NULL, 0); + if (port > 65535) { + usage(argv[0]); + return 1; + } + break; + + case 'd': + ib_devname = strdupa(optarg); + break; + + case 'i': + ib_port = strtol(optarg, NULL, 0); + if (ib_port < 1) { + usage(argv[0]); + return 1; + } + break; + + case 's': + size = strtoul(optarg, NULL, 0); + break; + + case 'm': + mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0)); + if (mtu == 0) { + usage(argv[0]); + return 1; + } + break; + + case 'r': + rx_depth = strtoul(optarg, NULL, 0); + break; + + case 'n': + iters = strtoul(optarg, NULL, 0); + break; + + case 'l': + sl = strtol(optarg, NULL, 0); + break; + + case 'e': + ++use_event; + break; + + case 'g': + gidx = strtol(optarg, NULL, 0); + break; + + case 'c': + validate_buf = 1; + break; + + default: + usage(argv[0]); + return 1; + } + } + + if (optind == argc - 1) + servername = strdupa(argv[optind]); + else if (optind < argc) { + usage(argv[0]); + return 1; + } + + page_size = PAGE_SIZE; + + dev_list = ibv_get_device_list(NULL); + if (!dev_list) { + perror("Failed to get IB devices list"); + return 1; + } + + if (!ib_devname) { + ib_dev = *dev_list; + if (!ib_dev) { + fprintf(stderr, "No IB devices found\n"); + return 1; + } + } else { + int i; + for (i = 0; dev_list[i]; ++i) + if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname)) + break; + ib_dev = dev_list[i]; + if (!ib_dev) { + fprintf(stderr, "IB device %s not found\n", ib_devname); + return 1; + } + } + + ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event); + if (!ctx) + return 1; + + routs = pp_post_recv(ctx, ctx->rx_depth); + if (routs < ctx->rx_depth) { + fprintf(stderr, "Couldn't post receive (%d)\n", routs); + return 1; + } + + if (use_event) + if (ibv_req_notify_cq(ctx->cq, 0)) { + fprintf(stderr, "Couldn't request CQ notification\n"); + return 1; + } + + + if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) { + fprintf(stderr, "Couldn't get port info\n"); + return 1; + } + + my_dest.lid = ctx->portinfo.lid; + if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET && + !my_dest.lid) { + fprintf(stderr, "Couldn't get local LID\n"); + return 1; + } + + if (gidx >= 0) { + if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest.gid)) { + fprintf(stderr, "can't read sgid of index %d\n", gidx); + return 1; + } + } else + memset(&my_dest.gid, 0, sizeof my_dest.gid); + + my_dest.qpn = ctx->qp->qp_num; + my_dest.psn = lrand48() & 0xffffff; + printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n", + my_dest.lid, my_dest.qpn, my_dest.psn); + + + if (servername) + rem_dest = pp_client_exch_dest(servername, port, &my_dest); + else + rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl, + &my_dest, gidx); + + if (!rem_dest) + return 1; + + printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n", + rem_dest->lid, rem_dest->qpn, rem_dest->psn); + + if (servername) + if (pp_connect_ctx(ctx, ib_port, my_dest.psn, mtu, sl, rem_dest, + gidx)) + return 1; + + ctx->pending = PINGPONG_RECV_WRID; + + if (servername) { + if (validate_buf) + for (int i = 0; i < size; i += page_size) + ctx->buf[i] = i / page_size % sizeof(char); + + if (pp_post_send(ctx)) { + fprintf(stderr, "Couldn't post send\n"); + return 1; + } + ctx->pending |= PINGPONG_SEND_WRID; + } + + if (gettimeofday(&start, NULL)) { + perror("gettimeofday"); + return 1; + } + + rcnt = scnt = 0; + while (rcnt < iters || scnt < iters) { + if (use_event) { + struct ibv_cq *ev_cq; + void *ev_ctx; + + if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) { + fprintf(stderr, "Failed to get cq_event\n"); + return 1; + } + + ++num_cq_events; + + if (ev_cq != ctx->cq) { + fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); + return 1; + } + + if (ibv_req_notify_cq(ctx->cq, 0)) { + fprintf(stderr, "Couldn't request CQ notification\n"); + return 1; + } + } + + { + struct ibv_wc wc[2]; + int ne, i; + + do { + ne = ibv_poll_cq(ctx->cq, 2, wc); + if (ne < 0) { + fprintf(stderr, "poll CQ failed %d\n", ne); + return 1; + } + + } while (!use_event && ne < 1); + + for (i = 0; i < ne; ++i) { + if (wc[i].status != IBV_WC_SUCCESS) { + fprintf(stderr, "Failed status %s (%d) for wr_id %d\n", + ibv_wc_status_str(wc[i].status), + wc[i].status, (int) wc[i].wr_id); + return 1; + } + + switch ((int) wc[i].wr_id) { + case PINGPONG_SEND_WRID: + ++scnt; + break; + + case PINGPONG_RECV_WRID: + if (--routs <= 1) { + routs += pp_post_recv(ctx, ctx->rx_depth - routs); + if (routs < ctx->rx_depth) { + fprintf(stderr, + "Couldn't post receive (%d)\n", + routs); + return 1; + } + } + + ++rcnt; + break; + + default: + fprintf(stderr, "Completion for unknown wr_id %d\n", + (int) wc[i].wr_id); + return 1; + } + + ctx->pending &= ~(int) wc[i].wr_id; + if (scnt < iters && !ctx->pending) { + if (pp_post_send(ctx)) { + fprintf(stderr, "Couldn't post send\n"); + return 1; + } + ctx->pending = PINGPONG_RECV_WRID | + PINGPONG_SEND_WRID; + } + } + } + } + + if (gettimeofday(&end, NULL)) { + perror("gettimeofday"); + return 1; + } + + { + float usec = (end.tv_sec - start.tv_sec) * 1000000 + + (end.tv_usec - start.tv_usec); + long long bytes = (long long) size * iters * 2; + + printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n", + bytes, usec / 1000000., bytes * 8. / usec); + printf("%d iters in %.2f seconds = %.2f usec/iter\n", + iters, usec / 1000000., usec / iters); + + if ((!servername) && (validate_buf)) { + for (int i = 0; i < size; i += page_size) + if (ctx->buf[i] != i / page_size % sizeof(char)) + printf("invalid data in page %d\n", + i / page_size); + } + } + + ibv_ack_cq_events(ctx->cq, num_cq_events); + + if (pp_close_ctx(ctx)) + return 1; + + ibv_free_device_list(dev_list); + free(rem_dest); + + return 0; +} diff --git a/usr/tests/ib/ud_pingpong.c b/usr/tests/ib/ud_pingpong.c new file mode 100644 index 000000000..2520f1f14 --- /dev/null +++ b/usr/tests/ib/ud_pingpong.c @@ -0,0 +1,879 @@ +/* + * Copyright (c) 2005 Topspin Communications. + * 2018 Annika Wierichs, RWTH Aachen. + * All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pingpong.h" + +enum { + PINGPONG_RECV_WRID = 1, + PINGPONG_SEND_WRID = 2, +}; + +static int page_size; +static int validate_buf; + +struct pingpong_context { + struct ibv_context *context; + struct ibv_comp_channel *channel; + struct ibv_pd *pd; + struct ibv_mr *mr; + struct ibv_cq *cq; + struct ibv_qp *qp; + struct ibv_ah *ah; + + char *buf; + int size; + int send_flags; + int rx_depth; + int pending; + struct ibv_port_attr portinfo; +}; + +struct pingpong_dest { + int lid; + int qpn; + int psn; + union ibv_gid gid; +}; + +static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn, + int sl, struct pingpong_dest *dest, int sgid_idx) +{ + struct ibv_ah_attr ah_attr = { + .is_global = 0, + .dlid = dest->lid, + .sl = sl, + .src_path_bits = 0, + .port_num = port + }; + struct ibv_qp_attr attr = { + .qp_state = IBV_QPS_RTR + }; + + if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE)) { + fprintf(stderr, "Failed to modify QP to RTR\n"); + return 1; + } + + attr.qp_state = IBV_QPS_RTS; + attr.sq_psn = my_psn; + + if (ibv_modify_qp(ctx->qp, &attr, + IBV_QP_STATE | + IBV_QP_SQ_PSN)) { + fprintf(stderr, "Failed to modify QP to RTS\n"); + return 1; + } + + if (dest->gid.global.interface_id) { + ah_attr.is_global = 1; + ah_attr.grh.hop_limit = 1; + ah_attr.grh.dgid = dest->gid; + ah_attr.grh.sgid_index = sgid_idx; + } + + ctx->ah = ibv_create_ah(ctx->pd, &ah_attr); + if (!ctx->ah) { + fprintf(stderr, "Failed to create AH\n"); + return 1; + } + + return 0; +} + +static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port, + const struct pingpong_dest *my_dest) +{ + struct addrinfo *res, *t; + struct addrinfo hints = { + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM + }; + char *service; + char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; + int n; + int sockfd = -1; + struct pingpong_dest *rem_dest = NULL; + char gid[33]; + + if (asprintf(&service, "%d", port) < 0) + return NULL; + + n = getaddrinfo(servername, service, &hints, &res); + + if (n < 0) { + fprintf(stderr, "Error for %s:%d\n", servername, port); + free(service); + return NULL; + } + + for (t = res; t; t = t->ai_next) { + sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); + if (sockfd >= 0) { + if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) + break; + close(sockfd); + sockfd = -1; + } + } + + freeaddrinfo(res); + free(service); + + if (sockfd < 0) { + fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); + return NULL; + } + + gid_to_wire_gid(&my_dest->gid, gid); + sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, + my_dest->psn, gid); + if (write(sockfd, msg, sizeof msg) != sizeof msg) { + fprintf(stderr, "Couldn't send local address\n"); + goto out; + } + + if (read(sockfd, msg, sizeof msg) != sizeof msg || + write(sockfd, "done", sizeof "done") != sizeof "done") { + perror("client read/write"); + fprintf(stderr, "Couldn't read/write remote address\n"); + goto out; + } + + rem_dest = malloc(sizeof *rem_dest); + if (!rem_dest) + goto out; + + sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, + &rem_dest->psn, gid); + wire_gid_to_gid(gid, &rem_dest->gid); + +out: + close(sockfd); + return rem_dest; +} + +static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx, + int ib_port, int port, int sl, + const struct pingpong_dest *my_dest, + int sgid_idx) +{ + struct addrinfo *res, *t; + struct addrinfo hints = { + .ai_flags = AI_PASSIVE, + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM + }; + char *service; + char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; + int n; + int sockfd = -1, connfd; + struct pingpong_dest *rem_dest = NULL; + char gid[33]; + + if (asprintf(&service, "%d", port) < 0) + return NULL; + + n = getaddrinfo(NULL, service, &hints, &res); + + if (n < 0) { + fprintf(stderr, "Error for port %d\n", port); + free(service); + return NULL; + } + + for (t = res; t; t = t->ai_next) { + sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); + if (sockfd >= 0) { + n = 1; + + setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); + + if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) + break; + close(sockfd); + sockfd = -1; + } + } + + freeaddrinfo(res); + free(service); + + if (sockfd < 0) { + fprintf(stderr, "Couldn't listen to port %d\n", port); + return NULL; + } + + listen(sockfd, 1); + connfd = accept(sockfd, NULL, NULL); + close(sockfd); + if (connfd < 0) { + fprintf(stderr, "accept() failed\n"); + return NULL; + } + + n = read(connfd, msg, sizeof msg); + if (n != sizeof msg) { + perror("server read"); + fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg); + goto out; + } + + rem_dest = malloc(sizeof *rem_dest); + if (!rem_dest) + goto out; + + sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, + &rem_dest->psn, gid); + wire_gid_to_gid(gid, &rem_dest->gid); + + if (pp_connect_ctx(ctx, ib_port, my_dest->psn, sl, rem_dest, + sgid_idx)) { + fprintf(stderr, "Couldn't connect to remote QP\n"); + free(rem_dest); + rem_dest = NULL; + goto out; + } + + gid_to_wire_gid(&my_dest->gid, gid); + sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, + my_dest->psn, gid); + if (write(connfd, msg, sizeof msg) != sizeof msg || + read(connfd, msg, sizeof msg) != sizeof "done") { + fprintf(stderr, "Couldn't send/recv local address\n"); + free(rem_dest); + rem_dest = NULL; + goto out; + } +out: + close(connfd); + return rem_dest; +} + +static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, + int rx_depth, int port, + int use_event) +{ + struct pingpong_context *ctx; + + ctx = malloc(sizeof *ctx); + if (!ctx) + return NULL; + + ctx->size = size; + ctx->send_flags = IBV_SEND_SIGNALED; + ctx->rx_depth = rx_depth; + + ctx->buf = memalign(page_size, size + 40); + if (!ctx->buf) { + fprintf(stderr, "Couldn't allocate work buf.\n"); + goto clean_ctx; + } + + /* FIXME memset(ctx->buf, 0, size + 40); */ + memset(ctx->buf, 0x7b, size + 40); + + ctx->context = ibv_open_device(ib_dev); + if (!ctx->context) { + fprintf(stderr, "Couldn't get context for %s\n", + ibv_get_device_name(ib_dev)); + goto clean_buffer; + } + + { + struct ibv_port_attr port_info = {}; + int mtu; + + if (ibv_query_port(ctx->context, port, &port_info)) { + fprintf(stderr, "Unable to query port info for port %d\n", port); + goto clean_device; + } + mtu = 1 << (port_info.active_mtu + 7); + if (size > mtu) { + fprintf(stderr, "Requested size larger than port MTU (%d)\n", mtu); + goto clean_device; + } + } + + if (use_event) { + ctx->channel = ibv_create_comp_channel(ctx->context); + if (!ctx->channel) { + fprintf(stderr, "Couldn't create completion channel\n"); + goto clean_device; + } + } else + ctx->channel = NULL; + + ctx->pd = ibv_alloc_pd(ctx->context); + if (!ctx->pd) { + fprintf(stderr, "Couldn't allocate PD\n"); + goto clean_comp_channel; + } + + ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size + 40, IBV_ACCESS_LOCAL_WRITE); + if (!ctx->mr) { + fprintf(stderr, "Couldn't register MR\n"); + goto clean_pd; + } + + ctx->cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL, + ctx->channel, 0); + if (!ctx->cq) { + fprintf(stderr, "Couldn't create CQ\n"); + goto clean_mr; + } + + { + struct ibv_qp_attr attr; + struct ibv_qp_init_attr init_attr = { + .send_cq = ctx->cq, + .recv_cq = ctx->cq, + .cap = { + .max_send_wr = 1, + .max_recv_wr = rx_depth, + .max_send_sge = 1, + .max_recv_sge = 1 + }, + .qp_type = IBV_QPT_UD, + }; + + ctx->qp = ibv_create_qp(ctx->pd, &init_attr); + if (!ctx->qp) { + fprintf(stderr, "Couldn't create QP\n"); + goto clean_cq; + } + + ibv_query_qp(ctx->qp, &attr, IBV_QP_CAP, &init_attr); + if (init_attr.cap.max_inline_data >= size) { + ctx->send_flags |= IBV_SEND_INLINE; + } + } + + { + struct ibv_qp_attr attr = { + .qp_state = IBV_QPS_INIT, + .pkey_index = 0, + .port_num = port, + .qkey = 0x11111111 + }; + + if (ibv_modify_qp(ctx->qp, &attr, + IBV_QP_STATE | + IBV_QP_PKEY_INDEX | + IBV_QP_PORT | + IBV_QP_QKEY)) { + fprintf(stderr, "Failed to modify QP to INIT\n"); + goto clean_qp; + } + } + + return ctx; + +clean_qp: + ibv_destroy_qp(ctx->qp); + +clean_cq: + ibv_destroy_cq(ctx->cq); + +clean_mr: + ibv_dereg_mr(ctx->mr); + +clean_pd: + ibv_dealloc_pd(ctx->pd); + +clean_comp_channel: + if (ctx->channel) + ibv_destroy_comp_channel(ctx->channel); + +clean_device: + ibv_close_device(ctx->context); + +clean_buffer: + free(ctx->buf); + +clean_ctx: + free(ctx); + + return NULL; +} + +static int pp_close_ctx(struct pingpong_context *ctx) +{ + if (ibv_destroy_qp(ctx->qp)) { + fprintf(stderr, "Couldn't destroy QP\n"); + return 1; + } + + if (ibv_destroy_cq(ctx->cq)) { + fprintf(stderr, "Couldn't destroy CQ\n"); + return 1; + } + + if (ibv_dereg_mr(ctx->mr)) { + fprintf(stderr, "Couldn't deregister MR\n"); + return 1; + } + + if (ibv_destroy_ah(ctx->ah)) { + fprintf(stderr, "Couldn't destroy AH\n"); + return 1; + } + + if (ibv_dealloc_pd(ctx->pd)) { + fprintf(stderr, "Couldn't deallocate PD\n"); + return 1; + } + + if (ctx->channel) { + if (ibv_destroy_comp_channel(ctx->channel)) { + fprintf(stderr, "Couldn't destroy completion channel\n"); + return 1; + } + } + + if (ibv_close_device(ctx->context)) { + fprintf(stderr, "Couldn't release context\n"); + return 1; + } + + free(ctx->buf); + free(ctx); + + return 0; +} + +static int pp_post_recv(struct pingpong_context *ctx, int n) +{ + struct ibv_sge list = { + .addr = (uintptr_t) ctx->buf, + .length = ctx->size + 40, + .lkey = ctx->mr->lkey + }; + struct ibv_recv_wr wr = { + .wr_id = PINGPONG_RECV_WRID, + .sg_list = &list, + .num_sge = 1, + }; + struct ibv_recv_wr *bad_wr; + int i; + + for (i = 0; i < n; ++i) + if (ibv_post_recv(ctx->qp, &wr, &bad_wr)) + break; + + return i; +} + +static int pp_post_send(struct pingpong_context *ctx, uint32_t qpn) +{ + struct ibv_sge list = { + .addr = (uintptr_t) ctx->buf + 40, + .length = ctx->size, + .lkey = ctx->mr->lkey + }; + struct ibv_send_wr wr = { + .wr_id = PINGPONG_SEND_WRID, + .sg_list = &list, + .num_sge = 1, + .opcode = IBV_WR_SEND, + .send_flags = ctx->send_flags, + .wr = { + .ud = { + .ah = ctx->ah, + .remote_qpn = qpn, + .remote_qkey = 0x11111111 + } + } + }; + struct ibv_send_wr *bad_wr; + + return ibv_post_send(ctx->qp, &wr, &bad_wr); +} + +static void usage(const char *argv0) +{ + printf("Usage:\n"); + printf(" %s start a server and wait for connection\n", argv0); + printf(" %s connect to server at \n", argv0); + printf("\n"); + printf("Options:\n"); + printf(" -p, --port= listen on/connect to port (default 18515)\n"); + printf(" -d, --ib-dev= use IB device (default first device found)\n"); + printf(" -i, --ib-port= use port of IB device (default 1)\n"); + printf(" -s, --size= size of message to exchange (default 2048)\n"); + printf(" -r, --rx-depth= number of receives to post at a time (default 500)\n"); + printf(" -n, --iters= number of exchanges (default 1000)\n"); + printf(" -l, --sl= send messages with service level (default 0)\n"); + printf(" -e, --events sleep on CQ events (default poll)\n"); + printf(" -g, --gid-idx= local port gid index\n"); + printf(" -c, --chk validate received buffer\n"); +} + +int main(int argc, char *argv[]) +{ + struct ibv_device **dev_list; + struct ibv_device *ib_dev; + struct pingpong_context *ctx; + struct pingpong_dest my_dest; + struct pingpong_dest *rem_dest; + struct timeval start, end; + char *ib_devname = NULL; + char *servername = NULL; + unsigned int port = 18515; + int ib_port = 1; + unsigned int size = 2048; + unsigned int rx_depth = 500; + unsigned int iters = 1000; + int use_event = 0; + int routs; + int rcnt, scnt; + int num_cq_events = 0; + int sl = 0; + int gidx = -1; + char gid[33]; + + srand48(getpid() * time(NULL)); + + while (1) { + int c; + + static struct option long_options[] = { + { .name = "port", .has_arg = 1, .val = 'p' }, + { .name = "ib-dev", .has_arg = 1, .val = 'd' }, + { .name = "ib-port", .has_arg = 1, .val = 'i' }, + { .name = "size", .has_arg = 1, .val = 's' }, + { .name = "rx-depth", .has_arg = 1, .val = 'r' }, + { .name = "iters", .has_arg = 1, .val = 'n' }, + { .name = "sl", .has_arg = 1, .val = 'l' }, + { .name = "events", .has_arg = 0, .val = 'e' }, + { .name = "gid-idx", .has_arg = 1, .val = 'g' }, + { .name = "chk", .has_arg = 0, .val = 'c' }, + {} + }; + + c = getopt_long(argc, argv, "p:d:i:s:r:n:l:eg:c:", long_options, + NULL); + if (c == -1) + break; + + switch (c) { + case 'p': + port = strtol(optarg, NULL, 0); + if (port > 65535) { + usage(argv[0]); + return 1; + } + break; + + case 'd': + ib_devname = strdupa(optarg); + break; + + case 'i': + ib_port = strtol(optarg, NULL, 0); + if (ib_port < 1) { + usage(argv[0]); + return 1; + } + break; + + case 's': + size = strtoul(optarg, NULL, 0); + break; + + case 'r': + rx_depth = strtoul(optarg, NULL, 0); + break; + + case 'n': + iters = strtoul(optarg, NULL, 0); + break; + + case 'l': + sl = strtol(optarg, NULL, 0); + break; + + case 'e': + ++use_event; + break; + + case 'g': + gidx = strtol(optarg, NULL, 0); + break; + + case 'c': + validate_buf = 1; + break; + + default: + usage(argv[0]); + return 1; + } + } + + if (optind == argc - 1) + servername = strdupa(argv[optind]); + else if (optind < argc) { + usage(argv[0]); + return 1; + } + + page_size = PAGE_SIZE; + + dev_list = ibv_get_device_list(NULL); + if (!dev_list) { + perror("Failed to get IB devices list"); + return 1; + } + + if (!ib_devname) { + ib_dev = *dev_list; + if (!ib_dev) { + fprintf(stderr, "No IB devices found\n"); + return 1; + } + } else { + int i; + for (i = 0; dev_list[i]; ++i) + if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname)) + break; + ib_dev = dev_list[i]; + if (!ib_dev) { + fprintf(stderr, "IB device %s not found\n", ib_devname); + return 1; + } + } + + ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event); + if (!ctx) + return 1; + + routs = pp_post_recv(ctx, ctx->rx_depth); + if (routs < ctx->rx_depth) { + fprintf(stderr, "Couldn't post receive (%d)\n", routs); + return 1; + } + + if (use_event) + if (ibv_req_notify_cq(ctx->cq, 0)) { + fprintf(stderr, "Couldn't request CQ notification\n"); + return 1; + } + + if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) { + fprintf(stderr, "Couldn't get port info\n"); + return 1; + } + my_dest.lid = ctx->portinfo.lid; + + my_dest.qpn = ctx->qp->qp_num; + my_dest.psn = lrand48() & 0xffffff; + + if (gidx >= 0) { + if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest.gid)) { + fprintf(stderr, "Could not get local gid for gid index " + "%d\n", gidx); + return 1; + } + } else + memset(&my_dest.gid, 0, sizeof my_dest.gid); + + printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n", + my_dest.lid, my_dest.qpn, my_dest.psn); + + if (servername) + rem_dest = pp_client_exch_dest(servername, port, &my_dest); + else + rem_dest = pp_server_exch_dest(ctx, ib_port, port, sl, + &my_dest, gidx); + + if (!rem_dest) + return 1; + + printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n", + rem_dest->lid, rem_dest->qpn, rem_dest->psn); + + if (servername) + if (pp_connect_ctx(ctx, ib_port, my_dest.psn, sl, rem_dest, + gidx)) + return 1; + + ctx->pending = PINGPONG_RECV_WRID; + + if (servername) { + if (validate_buf) + for (int i = 0; i < size; i += page_size) + ctx->buf[i] = i / page_size % sizeof(char); + + if (pp_post_send(ctx, rem_dest->qpn)) { + fprintf(stderr, "Couldn't post send\n"); + return 1; + } + ctx->pending |= PINGPONG_SEND_WRID; + } + + if (gettimeofday(&start, NULL)) { + perror("gettimeofday"); + return 1; + } + + rcnt = scnt = 0; + while (rcnt < iters || scnt < iters) { + if (use_event) { + struct ibv_cq *ev_cq; + void *ev_ctx; + + if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) { + fprintf(stderr, "Failed to get cq_event\n"); + return 1; + } + + ++num_cq_events; + + if (ev_cq != ctx->cq) { + fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); + return 1; + } + + if (ibv_req_notify_cq(ctx->cq, 0)) { + fprintf(stderr, "Couldn't request CQ notification\n"); + return 1; + } + } + + { + struct ibv_wc wc[2]; + int ne, i; + + do { + ne = ibv_poll_cq(ctx->cq, 2, wc); + if (ne < 0) { + fprintf(stderr, "poll CQ failed %d\n", ne); + return 1; + } + } while (!use_event && ne < 1); + + for (i = 0; i < ne; ++i) { + if (wc[i].status != IBV_WC_SUCCESS) { + fprintf(stderr, "Failed status: %s (%d) (for wr_id %d)\n", + ibv_wc_status_str(wc[i].status), + wc[i].status, (int) wc[i].wr_id); + return 1; + } + + switch ((int) wc[i].wr_id) { + case PINGPONG_SEND_WRID: + ++scnt; + break; + + case PINGPONG_RECV_WRID: + if (--routs <= 1) { + routs += pp_post_recv(ctx, ctx->rx_depth - routs); + if (routs < ctx->rx_depth) { + fprintf(stderr, + "Couldn't post receive (%d)\n", + routs); + return 1; + } + } + + ++rcnt; + break; + + default: + fprintf(stderr, "Completion for unknown wr_id %d\n", + (int) wc[i].wr_id); + return 1; + } + + ctx->pending &= ~(int) wc[i].wr_id; + if (scnt < iters && !ctx->pending) { + if (pp_post_send(ctx, rem_dest->qpn)) { + fprintf(stderr, "Couldn't post send\n"); + return 1; + } + ctx->pending = PINGPONG_RECV_WRID | + PINGPONG_SEND_WRID; + } + } + } + } + + if (gettimeofday(&end, NULL)) { + perror("gettimeofday"); + return 1; + } + + { + float usec = (end.tv_sec - start.tv_sec) * 1000000 + + (end.tv_usec - start.tv_usec); + long long bytes = (long long) size * iters * 2; + + printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n", + bytes, usec / 1000000., bytes * 8. / usec); + printf("%d iters in %.2f seconds = %.2f usec/iter\n", + iters, usec / 1000000., usec / iters); + + if ((!servername) && (validate_buf)) { + for (int i = 0; i < size; i += page_size) + if (ctx->buf[i] != i / page_size % sizeof(char)) + printf("invalid data in page %d\n", + i / page_size); + } + } + + ibv_ack_cq_events(ctx->cq, num_cq_events); + + if (pp_close_ctx(ctx)) + return 1; + + ibv_free_device_list(dev_list); + free(rem_dest); + + return 0; +} diff --git a/usr/tests/ib/xsrq_pingpong.c b/usr/tests/ib/xsrq_pingpong.c new file mode 100644 index 000000000..0e5f33ff9 --- /dev/null +++ b/usr/tests/ib/xsrq_pingpong.c @@ -0,0 +1,1023 @@ +/* + * Copyright (c) 2005 Topspin Communications. + * 2011 Intel Corporation, Inc. + * 2018 Annika Wierichs, RWTH Aachen. + * All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define _GNU_SOURCE +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pingpong.h" + +// TODO: These are not availabe in sys/stat.h. Adding here temporarily. +#define S_IRUSR 0000400 /* read permission, owner */ +#define S_IRGRP 0000040 /* read permission, group */ + +#define MSG_FORMAT "%04x:%06x:%06x:%06x:%06x:%32s" +#define MSG_SIZE 66 +#define MSG_SSCAN "%x:%x:%x:%x:%x:%s" +#define ADDR_FORMAT \ + "%8s: LID %04x, QPN RECV %06x SEND %06x, PSN %06x, SRQN %06x\n" + /* "%8s: LID %04x, QPN RECV %06x SEND %06x, PSN %06x, SRQN %06x, GID %s\n" */ +#define TERMINATION_FORMAT "%s" +#define TERMINATION_MSG_SIZE 4 +#define TERMINATION_MSG "END" +static int page_size; + +struct pingpong_dest { + union ibv_gid gid; + int lid; + int recv_qpn; + int send_qpn; + int recv_psn; + int send_psn; + int srqn; + int pp_cnt; + int sockfd; +}; + +struct pingpong_context { + struct ibv_context *context; + struct ibv_comp_channel *channel; + struct ibv_pd *pd; + struct ibv_mr *mr; + struct ibv_cq *send_cq; + struct ibv_cq *recv_cq; + struct ibv_srq *srq; + struct ibv_xrcd *xrcd; + struct ibv_qp **recv_qp; + struct ibv_qp **send_qp; + + struct pingpong_dest *rem_dest; + void *buf; + int lid; + int sl; + enum ibv_mtu mtu; + int ib_port; + int fd; + int size; + int num_clients; + int num_tests; + int use_event; + int gidx; +}; + +static struct pingpong_context ctx; + + +static int open_device(char *ib_devname) +{ + struct ibv_device **dev_list; + int i = 0; + + dev_list = ibv_get_device_list(NULL); + if (!dev_list) { + fprintf(stderr, "Failed to get IB devices list"); + return -1; + } + + if (ib_devname) { + for (; dev_list[i]; ++i) { + if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname)) + break; + } + } + if (!dev_list[i]) { + fprintf(stderr, "IB device %s not found\n", + ib_devname ? ib_devname : ""); + return -1; + } + + ctx.context = ibv_open_device(dev_list[i]); + if (!ctx.context) { + fprintf(stderr, "Couldn't get context for %s\n", + ibv_get_device_name(dev_list[i])); + return -1; + } + + ibv_free_device_list(dev_list); + return 0; +} + +static int create_qps(void) +{ + struct ibv_qp_init_attr_ex init; + struct ibv_qp_attr mod; + int i; + + for (i = 0; i < ctx.num_clients; ++i) { + + memset(&init, 0, sizeof init); + init.qp_type = IBV_QPT_XRC_RECV; + init.comp_mask = IBV_QP_INIT_ATTR_XRCD; + init.xrcd = ctx.xrcd; + + ctx.recv_qp[i] = ibv_create_qp_ex(ctx.context, &init); + if (!ctx.recv_qp[i]) { + fprintf(stderr, "Couldn't create recv QP[%d] errno %d\n", + i, errno); + return 1; + } + + mod.qp_state = IBV_QPS_INIT; + mod.pkey_index = 0; + mod.port_num = ctx.ib_port; + mod.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ; + + if (ibv_modify_qp(ctx.recv_qp[i], &mod, + IBV_QP_STATE | IBV_QP_PKEY_INDEX | + IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { + fprintf(stderr, "Failed to modify recv QP[%d] to INIT\n", i); + return 1; + } + + memset(&init, 0, sizeof init); + init.qp_type = IBV_QPT_XRC_SEND; + init.send_cq = ctx.send_cq; + init.cap.max_send_wr = ctx.num_clients * ctx.num_tests; + init.cap.max_send_sge = 1; + init.comp_mask = IBV_QP_INIT_ATTR_PD; + init.pd = ctx.pd; + + ctx.send_qp[i] = ibv_create_qp_ex(ctx.context, &init); + if (!ctx.send_qp[i]) { + fprintf(stderr, "Couldn't create send QP[%d] errno %d\n", i, errno); + return 1; + } + + mod.qp_state = IBV_QPS_INIT; + mod.pkey_index = 0; + mod.port_num = ctx.ib_port; + mod.qp_access_flags = 0; + + if (ibv_modify_qp(ctx.send_qp[i], &mod, + IBV_QP_STATE | IBV_QP_PKEY_INDEX | + IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { + fprintf(stderr, "Failed to modify send QP[%d] to INIT\n", i); + return 1; + } + } + + return 0; +} + +static int pp_init_ctx(char *ib_devname) +{ + struct ibv_srq_init_attr_ex attr; + struct ibv_xrcd_init_attr xrcd_attr; + struct ibv_port_attr port_attr; + + ctx.recv_qp = calloc(ctx.num_clients, sizeof *ctx.recv_qp); + ctx.send_qp = calloc(ctx.num_clients, sizeof *ctx.send_qp); + ctx.rem_dest = calloc(ctx.num_clients, sizeof *ctx.rem_dest); + if (!ctx.recv_qp || !ctx.send_qp || !ctx.rem_dest) + return 1; + + if (open_device(ib_devname)) { + fprintf(stderr, "Failed to open device\n"); + return 1; + } + + if (pp_get_port_info(ctx.context, ctx.ib_port, &port_attr)) { + fprintf(stderr, "Failed to get port info\n"); + return 1; + } + + ctx.lid = port_attr.lid; + if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET && !ctx.lid) { + fprintf(stderr, "Couldn't get local LID\n"); + return 1; + } + + ctx.buf = memalign(page_size, ctx.size); + if (!ctx.buf) { + fprintf(stderr, "Couldn't allocate work buf.\n"); + return 1; + } + + memset(ctx.buf, 0, ctx.size); + + if (ctx.use_event) { + ctx.channel = ibv_create_comp_channel(ctx.context); + if (!ctx.channel) { + fprintf(stderr, "Couldn't create completion channel\n"); + return 1; + } + } + + ctx.pd = ibv_alloc_pd(ctx.context); + if (!ctx.pd) { + fprintf(stderr, "Couldn't allocate PD\n"); + return 1; + } + + ctx.mr = ibv_reg_mr(ctx.pd, ctx.buf, ctx.size, IBV_ACCESS_LOCAL_WRITE); + if (!ctx.mr) { + fprintf(stderr, "Couldn't register MR\n"); + return 1; + } + + ctx.fd = open("/tmp/xrc_domain", O_RDONLY | O_CREAT, S_IRUSR | S_IRGRP); + if (ctx.fd < 0) { + fprintf(stderr, "Couldn't create the file for the XRC Domain " + "but not stopping %d\n", errno); + ctx.fd = -1; + } + + memset(&xrcd_attr, 0, sizeof xrcd_attr); + xrcd_attr.comp_mask = IBV_XRCD_INIT_ATTR_FD | IBV_XRCD_INIT_ATTR_OFLAGS; + xrcd_attr.fd = ctx.fd; + xrcd_attr.oflags = O_CREAT; + ctx.xrcd = ibv_open_xrcd(ctx.context, &xrcd_attr); + if (!ctx.xrcd) { + fprintf(stderr, "Couldn't Open the XRC Domain %d\n", errno); + return 1; + } + + ctx.recv_cq = ibv_create_cq(ctx.context, ctx.num_clients, &ctx.recv_cq, ctx.channel, 0); + if (!ctx.recv_cq) { + fprintf(stderr, "Couldn't create recv CQ\n"); + return 1; + } + + if (ctx.use_event) { + if (ibv_req_notify_cq(ctx.recv_cq, 0)) { + fprintf(stderr, "Couldn't request CQ notification\n"); + return 1; + } + } + + ctx.send_cq = ibv_create_cq(ctx.context, ctx.num_clients, NULL, NULL, 0); + if (!ctx.send_cq) { + fprintf(stderr, "Couldn't create send CQ\n"); + return 1; + } + + memset(&attr, 0, sizeof attr); + attr.attr.max_wr = ctx.num_clients; + attr.attr.max_sge = 1; + attr.comp_mask = IBV_SRQ_INIT_ATTR_TYPE | IBV_SRQ_INIT_ATTR_XRCD | + IBV_SRQ_INIT_ATTR_CQ | IBV_SRQ_INIT_ATTR_PD; + attr.srq_type = IBV_SRQT_XRC; + attr.xrcd = ctx.xrcd; + attr.cq = ctx.recv_cq; + attr.pd = ctx.pd; + + ctx.srq = ibv_create_srq_ex(ctx.context, &attr); + if (!ctx.srq) { + fprintf(stderr, "Couldn't create SRQ\n"); + return 1; + } + + if (create_qps()) + return 1; + + return 0; +} + +static int recv_termination_ack(int index) +{ + char msg[TERMINATION_MSG_SIZE]; + int n = 0, r; + int sockfd = ctx.rem_dest[index].sockfd; + + while (n < TERMINATION_MSG_SIZE) { + r = read(sockfd, msg + n, TERMINATION_MSG_SIZE - n); + if (r < 0) { + perror("client read"); + fprintf(stderr, "%d/%d: Couldn't read remote termination ack\n", + n, TERMINATION_MSG_SIZE); + return 1; + } + n += r; + } + + if (strcmp(msg, TERMINATION_MSG)) { + fprintf(stderr, "Invalid termination ack was accepted\n"); + return 1; + } + + return 0; +} + +static int send_termination_ack(int index) +{ + char msg[TERMINATION_MSG_SIZE]; + int sockfd = ctx.rem_dest[index].sockfd; + + sprintf(msg, TERMINATION_FORMAT, TERMINATION_MSG); + + if (write(sockfd, msg, TERMINATION_MSG_SIZE) != TERMINATION_MSG_SIZE) { + fprintf(stderr, "Couldn't send termination ack\n"); + return 1; + } + + return 0; +} + +static int pp_client_termination(void) +{ + if (send_termination_ack(0)) + return 1; + if (recv_termination_ack(0)) + return 1; + + return 0; +} + +static int pp_server_termination(void) +{ + int i; + + for (i = 0; i < ctx.num_clients; i++) { + if (recv_termination_ack(i)) + return 1; + } + + for (i = 0; i < ctx.num_clients; i++) { + if (send_termination_ack(i)) + return 1; + } + + return 0; +} + +static int send_local_dest(int sockfd, int index) +{ + char msg[MSG_SIZE]; + char gid[33]; + uint32_t srq_num; + union ibv_gid local_gid; + + if (ctx.gidx >= 0) { + if (ibv_query_gid(ctx.context, ctx.ib_port, ctx.gidx, + &local_gid)) { + fprintf(stderr, "can't read sgid of index %d\n", + ctx.gidx); + return -1; + } + } else { + memset(&local_gid, 0, sizeof(local_gid)); + } + + ctx.rem_dest[index].recv_psn = lrand48() & 0xffffff; + if (ibv_get_srq_num(ctx.srq, &srq_num)) { + fprintf(stderr, "Couldn't get SRQ num\n"); + return -1; + } + + printf(ADDR_FORMAT, "local", ctx.lid, ctx.recv_qp[index]->qp_num, + ctx.send_qp[index]->qp_num, ctx.rem_dest[index].recv_psn, srq_num); + + gid_to_wire_gid(&local_gid, gid); + sprintf(msg, MSG_FORMAT, ctx.lid, ctx.recv_qp[index]->qp_num, + ctx.send_qp[index]->qp_num, ctx.rem_dest[index].recv_psn, + srq_num, gid); + + if (write(sockfd, msg, MSG_SIZE) != MSG_SIZE) { + fprintf(stderr, "Couldn't send local address\n"); + return -1; + } + + return 0; +} + +static int recv_remote_dest(int sockfd, int index) +{ + struct pingpong_dest *rem_dest; + char msg[MSG_SIZE]; + char gid[33]; + int n = 0, r; + + while (n < MSG_SIZE) { + r = read(sockfd, msg + n, MSG_SIZE - n); + if (r < 0) { + perror("client read"); + fprintf(stderr, + "%d/%d: Couldn't read remote address [%d]\n", + n, MSG_SIZE, index); + return -1; + } + n += r; + } + + rem_dest = &ctx.rem_dest[index]; + sscanf(msg, MSG_SSCAN, &rem_dest->lid, &rem_dest->recv_qpn, + &rem_dest->send_qpn, &rem_dest->send_psn, &rem_dest->srqn, gid); + + wire_gid_to_gid(gid, &rem_dest->gid); + printf(ADDR_FORMAT, "remote", rem_dest->lid, rem_dest->recv_qpn, + rem_dest->send_qpn, rem_dest->send_psn, rem_dest->srqn); + + rem_dest->sockfd = sockfd; + return 0; +} + +static void set_ah_attr(struct ibv_ah_attr *attr, struct pingpong_context *myctx, + int index) +{ + attr->is_global = 1; + attr->grh.hop_limit = 5; + attr->grh.dgid = myctx->rem_dest[index].gid; + attr->grh.sgid_index = myctx->gidx; +} + +static int connect_qps(int index) +{ + struct ibv_qp_attr attr; + + memset(&attr, 0, sizeof attr); + attr.qp_state = IBV_QPS_RTR; + attr.dest_qp_num = ctx.rem_dest[index].send_qpn; + attr.path_mtu = ctx.mtu; + attr.rq_psn = ctx.rem_dest[index].send_psn; + attr.min_rnr_timer = 12; + attr.ah_attr.dlid = ctx.rem_dest[index].lid; + attr.ah_attr.sl = ctx.sl; + attr.ah_attr.port_num = ctx.ib_port; + + if (ctx.rem_dest[index].gid.global.interface_id) + set_ah_attr(&attr.ah_attr, &ctx, index); + + if (ibv_modify_qp(ctx.recv_qp[index], &attr, + IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | + IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | + IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER)) { + fprintf(stderr, "Failed to modify recv QP[%d] to RTR\n", index); + return 1; + } + + memset(&attr, 0, sizeof attr); + attr.qp_state = IBV_QPS_RTS; + attr.timeout = 14; + attr.sq_psn = ctx.rem_dest[index].recv_psn; + + if (ibv_modify_qp(ctx.recv_qp[index], &attr, + IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_SQ_PSN)) { + fprintf(stderr, "Failed to modify recv QP[%d] to RTS\n", index); + return 1; + } + + memset(&attr, 0, sizeof attr); + attr.qp_state = IBV_QPS_RTR; + attr.dest_qp_num = ctx.rem_dest[index].recv_qpn; + attr.path_mtu = ctx.mtu; + attr.rq_psn = ctx.rem_dest[index].send_psn; + attr.ah_attr.dlid = ctx.rem_dest[index].lid; + attr.ah_attr.sl = ctx.sl; + attr.ah_attr.port_num = ctx.ib_port; + + if (ctx.rem_dest[index].gid.global.interface_id) + set_ah_attr(&attr.ah_attr, &ctx, index); + + if (ibv_modify_qp(ctx.send_qp[index], &attr, + IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | + IBV_QP_DEST_QPN | IBV_QP_RQ_PSN)) { + fprintf(stderr, "Failed to modify send QP[%d] to RTR\n", index); + return 1; + } + + memset(&attr, 0, sizeof attr); + attr.qp_state = IBV_QPS_RTS; + attr.timeout = 14; + attr.retry_cnt = 7; + attr.rnr_retry = 7; + attr.sq_psn = ctx.rem_dest[index].recv_psn; + + if (ibv_modify_qp(ctx.send_qp[index], &attr, + IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_SQ_PSN | + IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC)) { + fprintf(stderr, "Failed to modify send QP[%d] to RTS\n", index); + return 1; + } + + return 0; +} + +static int pp_client_connect(const char *servername, int port) +{ + struct addrinfo *res, *t; + char *service; + int ret; + int sockfd = -1; + struct addrinfo hints = { + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM + }; + + if (asprintf(&service, "%d", port) < 0) + return 1; + + ret = getaddrinfo(servername, service, &hints, &res); + if (ret < 0) { + fprintf(stderr, "Error for %s:%d\n", servername, port); + free(service); + return 1; + } + + for (t = res; t; t = t->ai_next) { + sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); + if (sockfd >= 0) { + if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) + break; + close(sockfd); + sockfd = -1; + } + } + + freeaddrinfo(res); + free(service); + + if (sockfd < 0) { + fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); + return 1; + } + + if (send_local_dest(sockfd, 0)) + return 1; + + if (recv_remote_dest(sockfd, 0)) + return 1; + + if (connect_qps(0)) + return 1; + + return 0; +} + +static int pp_server_connect(int port) +{ + struct addrinfo *res, *t; + char *service; + int ret, i, n; + int sockfd = -1, connfd; + struct addrinfo hints = { + .ai_flags = AI_PASSIVE, + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM + }; + + if (asprintf(&service, "%d", port) < 0) + return 1; + + ret = getaddrinfo(NULL, service, &hints, &res); + if (ret < 0) { + fprintf(stderr, "Error for port %d\n", port); + free(service); + return 1; + } + + for (t = res; t; t = t->ai_next) { + sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); + if (sockfd >= 0) { + n = 1; + setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); + if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) + break; + close(sockfd); + sockfd = -1; + } + } + + freeaddrinfo(res); + free(service); + + if (sockfd < 0) { + fprintf(stderr, "Couldn't listen to port %d\n", port); + return 1; + } + + listen(sockfd, ctx.num_clients); + + for (i = 0; i < ctx.num_clients; i++) { + connfd = accept(sockfd, NULL, NULL); + if (connfd < 0) { + fprintf(stderr, "accept() failed for client %d\n", i); + return 1; + } + + if (recv_remote_dest(connfd, i)) + return 1; + + if (send_local_dest(connfd, i)) + return 1; + + if (connect_qps(i)) + return 1; + } + + close(sockfd); + return 0; +} + + +static int pp_close_ctx(void) +{ + int i; + + for (i = 0; i < ctx.num_clients; ++i) { + + if (ibv_destroy_qp(ctx.send_qp[i])) { + fprintf(stderr, "Couldn't destroy INI QP[%d]\n", i); + return 1; + } + + if (ibv_destroy_qp(ctx.recv_qp[i])) { + fprintf(stderr, "Couldn't destroy TGT QP[%d]\n", i); + return 1; + } + + if (ctx.rem_dest[i].sockfd) + close(ctx.rem_dest[i].sockfd); + } + + if (ibv_destroy_srq(ctx.srq)) { + fprintf(stderr, "Couldn't destroy SRQ\n"); + return 1; + } + + if (ctx.xrcd && ibv_close_xrcd(ctx.xrcd)) { + fprintf(stderr, "Couldn't close the XRC Domain\n"); + return 1; + } + if (ctx.fd >= 0 && close(ctx.fd)) { + fprintf(stderr, "Couldn't close the file for the XRC Domain\n"); + return 1; + } + + if (ibv_destroy_cq(ctx.send_cq)) { + fprintf(stderr, "Couldn't destroy send CQ\n"); + return 1; + } + + if (ibv_destroy_cq(ctx.recv_cq)) { + fprintf(stderr, "Couldn't destroy recv CQ\n"); + return 1; + } + + if (ibv_dereg_mr(ctx.mr)) { + fprintf(stderr, "Couldn't deregister MR\n"); + return 1; + } + + if (ibv_dealloc_pd(ctx.pd)) { + fprintf(stderr, "Couldn't deallocate PD\n"); + return 1; + } + + if (ctx.channel) { + if (ibv_destroy_comp_channel(ctx.channel)) { + fprintf(stderr, + "Couldn't destroy completion channel\n"); + return 1; + } + } + + if (ibv_close_device(ctx.context)) { + fprintf(stderr, "Couldn't release context\n"); + return 1; + } + + free(ctx.buf); + free(ctx.rem_dest); + free(ctx.send_qp); + free(ctx.recv_qp); + return 0; +} + +static int pp_post_recv(int cnt) +{ + struct ibv_sge sge; + struct ibv_recv_wr wr, *bad_wr; + + sge.addr = (uintptr_t) ctx.buf; + sge.length = ctx.size; + sge.lkey = ctx.mr->lkey; + + wr.next = NULL; + wr.wr_id = (uintptr_t) &ctx; + wr.sg_list = &sge; + wr.num_sge = 1; + + while (cnt--) { + if (ibv_post_srq_recv(ctx.srq, &wr, &bad_wr)) { + fprintf(stderr, "Failed to post receive to SRQ\n"); + return 1; + } + } + return 0; +} + +/* + * Send to each client round robin on each set of xrc send/recv qp. + * Generate a completion on the last send. + */ +static int pp_post_send(int index) +{ + struct ibv_sge sge; + struct ibv_send_wr wr, *bad_wr; + int qpi; + + sge.addr = (uintptr_t) ctx.buf; + sge.length = ctx.size; + sge.lkey = ctx.mr->lkey; + + wr.wr_id = (uintptr_t) index; + wr.next = NULL; + wr.sg_list = &sge; + wr.num_sge = 1; + wr.opcode = IBV_WR_SEND; + wr.qp_type.xrc.remote_srqn = ctx.rem_dest[index].srqn; + + qpi = (index + ctx.rem_dest[index].pp_cnt) % ctx.num_clients; + wr.send_flags = (++ctx.rem_dest[index].pp_cnt >= ctx.num_tests) ? + IBV_SEND_SIGNALED : 0; + + return ibv_post_send(ctx.send_qp[qpi], &wr, &bad_wr); +} + +static int find_qp(int qpn) +{ + int i; + + if (ctx.num_clients == 1) + return 0; + + for (i = 0; i < ctx.num_clients; ++i) + if (ctx.recv_qp[i]->qp_num == qpn) + return i; + + fprintf(stderr, "Unable to find qp %x\n", qpn); + return 0; +} + +static int get_cq_event(void) +{ + struct ibv_cq *ev_cq; + void *ev_ctx; + + if (ibv_get_cq_event(ctx.channel, &ev_cq, &ev_ctx)) { + fprintf(stderr, "Failed to get cq_event\n"); + return 1; + } + + if (ev_cq != ctx.recv_cq) { + fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); + return 1; + } + + if (ibv_req_notify_cq(ctx.recv_cq, 0)) { + fprintf(stderr, "Couldn't request CQ notification\n"); + return 1; + } + + return 0; +} + +static void init(void) +{ + srand48(getpid() * time(NULL)); + + ctx.size = 4096; + ctx.ib_port = 1; + ctx.num_clients = 1; + ctx.num_tests = 5; + ctx.mtu = IBV_MTU_1024; + ctx.sl = 0; + ctx.gidx = -1; +} + +static void usage(const char *argv0) +{ + printf("Usage:\n"); + printf(" %s start a server and wait for connection\n", argv0); + printf(" %s connect to server at \n", argv0); + printf("\n"); + printf("Options:\n"); + printf(" -p, --port= listen on/connect to port (default 18515)\n"); + printf(" -d, --ib-dev= use IB device (default first device found)\n"); + printf(" -i, --ib-port= use port of IB device (default 1)\n"); + printf(" -s, --size= size of message to exchange (default 4096)\n"); + printf(" -m, --mtu= path MTU (default 2048)\n"); + printf(" -c, --clients= number of clients (on server only, default 1)\n"); + printf(" -n, --num_tests= number of tests per client (default 5)\n"); + printf(" -l, --sl= service level value\n"); + printf(" -e, --events sleep on CQ events (default poll)\n"); + printf(" -g, --gid-idx= local port gid index\n"); +} + +int main(int argc, char *argv[]) +{ + char *ib_devname = NULL; + char *servername = NULL; + int port = 18515; + int i, total, cnt = 0; + int ne, qpi, num_cq_events = 0; + struct ibv_wc wc; + + init(); + while (1) { + int c; + + static struct option long_options[] = { + { .name = "port", .has_arg = 1, .val = 'p' }, + { .name = "ib-dev", .has_arg = 1, .val = 'd' }, + { .name = "ib-port", .has_arg = 1, .val = 'i' }, + { .name = "size", .has_arg = 1, .val = 's' }, + { .name = "mtu", .has_arg = 1, .val = 'm' }, + { .name = "clients", .has_arg = 1, .val = 'c' }, + { .name = "num_tests", .has_arg = 1, .val = 'n' }, + { .name = "sl", .has_arg = 1, .val = 'l' }, + { .name = "events", .has_arg = 0, .val = 'e' }, + { .name = "gid-idx", .has_arg = 1, .val = 'g' }, + {} + }; + + c = getopt_long(argc, argv, "p:d:i:s:m:c:n:l:eg:", long_options, + NULL); + if (c == -1) + break; + + switch (c) { + case 'p': + port = strtol(optarg, NULL, 0); + if (port < 0 || port > 65535) { + usage(argv[0]); + return 1; + } + break; + case 'd': + ib_devname = strdupa(optarg); + break; + case 'i': + ctx.ib_port = strtol(optarg, NULL, 0); + if (ctx.ib_port < 0) { + usage(argv[0]); + return 1; + } + break; + case 's': + ctx.size = strtol(optarg, NULL, 0); + break; + case 'm': + ctx.mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0)); + if (ctx.mtu == 0) { + usage(argv[0]); + return 1; + } + break; + + case 'c': + ctx.num_clients = strtol(optarg, NULL, 0); + break; + case 'n': + ctx.num_tests = strtol(optarg, NULL, 0); + break; + case 'l': + ctx.sl = strtol(optarg, NULL, 0); + break; + case 'g': + ctx.gidx = strtol(optarg, NULL, 0); + break; + case 'e': + ctx.use_event = 1; + break; + default: + usage(argv[0]); + return 1; + } + } + + if (optind == argc - 1) { + servername = strdupa(argv[optind]); + ctx.num_clients = 1; + } else if (optind < argc) { + usage(argv[0]); + return 1; + } + + page_size = PAGE_SIZE; + + if (pp_init_ctx(ib_devname)) + return 1; + + if (pp_post_recv(ctx.num_clients)) { + fprintf(stderr, "Couldn't post receives\n"); + return 1; + } + + if (servername) { + if (pp_client_connect(servername, port)) + return 1; + } else { + if (pp_server_connect(port)) + return 1; + + for (i = 0; i < ctx.num_clients; i++) + pp_post_send(i); + } + + total = ctx.num_clients * ctx.num_tests; + while (cnt < total) { + if (ctx.use_event) { + if (get_cq_event()) + return 1; + + ++num_cq_events; + } + + do { + ne = ibv_poll_cq(ctx.recv_cq, 1, &wc); + if (ne < 0) { + fprintf(stderr, "Error polling cq %d\n", ne); + return 1; + } else if (ne == 0) { + break; + } + + if (wc.status) { + fprintf(stderr, "Work completion error %d\n", wc.status); + return 1; + } + + pp_post_recv(ne); + qpi = find_qp(wc.qp_num); + if (ctx.rem_dest[qpi].pp_cnt < ctx.num_tests) pp_post_send(qpi); + cnt += ne; + } while (ne > 0); + } + + for (cnt = 0; cnt < ctx.num_clients; cnt += ne) { + ne = ibv_poll_cq(ctx.send_cq, 1, &wc); + if (ne < 0) { + fprintf(stderr, "Error polling cq %d\n", ne); + return 1; + } + } + + if (ctx.use_event) + ibv_ack_cq_events(ctx.recv_cq, num_cq_events); + + /* Process should get an ack from the daemon to close its resources to + * make sure latest daemon's response sent via its target QP destined + * to an XSRQ created by another client won't be lost. + * Failure to do so may cause the other client to wait for that sent + * message forever. See comment on pp_post_send. + */ + if (servername) { + if (pp_client_termination()) + return 1; + } else if (pp_server_termination()) { + return 1; + } + + if (pp_close_ctx()) + return 1; + + printf("success\n"); + return 0; +}