mirror of
https://github.com/hermitcore/libhermit.git
synced 2025-03-16 00:00:06 +01:00
1769 lines
53 KiB
C
Executable file
1769 lines
53 KiB
C
Executable file
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
#include <malloc.h>
|
|
#include <getopt.h>
|
|
#include <limits.h>
|
|
#include <errno.h>
|
|
#include <signal.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
/* #include <sys/mman.h> */
|
|
#include <sys/ipc.h>
|
|
#include <sys/shm.h>
|
|
#include <pthread.h>
|
|
|
|
#include "perftest_resources_write_bw.h"
|
|
#include "perftest_parameters_write_bw.h"
|
|
|
|
|
|
static enum ibv_wr_opcode opcode_verbs_array[] = {IBV_WR_SEND, IBV_WR_RDMA_WRITE, IBV_WR_RDMA_READ};
|
|
static enum ibv_wr_opcode opcode_atomic_array[] = {IBV_WR_ATOMIC_CMP_AND_SWP, IBV_WR_ATOMIC_FETCH_AND_ADD};
|
|
|
|
|
|
#define CPU_UTILITY "/proc/stat"
|
|
|
|
struct perftest_parameters* duration_param;
|
|
struct check_alive_data check_alive_data;
|
|
|
|
struct ibv_qp* ctx_qp_create(struct pingpong_context *ctx,
|
|
struct perftest_parameters *user_param)
|
|
{
|
|
struct ibv_qp_init_attr attr;
|
|
struct ibv_qp* qp = NULL;
|
|
|
|
memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
|
|
attr.send_cq = ctx->send_cq;
|
|
attr.recv_cq = (user_param->verb == SEND) ? ctx->recv_cq : ctx->send_cq;
|
|
attr.cap.max_send_wr = user_param->tx_depth;
|
|
attr.cap.max_send_sge = MAX_SEND_SGE;
|
|
attr.cap.max_inline_data = user_param->inline_size;
|
|
|
|
if (user_param->use_srq && (user_param->tst == LAT || user_param->machine == SERVER || user_param->duplex == ON)) {
|
|
attr.srq = ctx->srq;
|
|
attr.cap.max_recv_wr = 0;
|
|
attr.cap.max_recv_sge = 0;
|
|
} else {
|
|
attr.srq = NULL;
|
|
attr.cap.max_recv_wr = user_param->rx_depth;
|
|
attr.cap.max_recv_sge = MAX_RECV_SGE;
|
|
}
|
|
|
|
switch (user_param->connection_type) {
|
|
case RC : attr.qp_type = IBV_QPT_RC; break;
|
|
case UC : attr.qp_type = IBV_QPT_UC; break;
|
|
case UD : attr.qp_type = IBV_QPT_UD; break;
|
|
#ifdef HAVE_RAW_ETH
|
|
case RawEth : attr.qp_type = IBV_QPT_RAW_PACKET; break;
|
|
#endif
|
|
default: fprintf(stderr, "Unknown connection type \n");
|
|
return NULL;
|
|
}
|
|
|
|
qp = ibv_create_qp(ctx->pd, &attr);
|
|
return qp;
|
|
}
|
|
|
|
#if defined HAVE_EX_ODP || defined HAVE_EXP_ODP
|
|
static int check_odp_support(struct pingpong_context *ctx)
|
|
{
|
|
#ifdef HAVE_EX_ODP
|
|
struct ibv_device_attr_ex dattr;
|
|
int odp_support_send = IBV_ODP_SUPPORT_SEND;
|
|
int odp_support_recv = IBV_ODP_SUPPORT_RECV;
|
|
int ret = ibv_query_device_ex(ctx->context, NULL, &dattr);
|
|
/* #elif defined HAVE_EXP_ODP */
|
|
/* struct ibv_exp_device_attr dattr; */
|
|
/* int ret = ibv_exp_query_device(ctx->context, &dattr); */
|
|
/* int odp_support_send = IBV_EXP_ODP_SUPPORT_SEND; */
|
|
/* int odp_support_recv = IBV_EXP_ODP_SUPPORT_RECV; */
|
|
#endif
|
|
if (ret) {
|
|
fprintf(stderr, " Couldn't query device for on-demand paging capabilities.\n");
|
|
return 0;
|
|
} else if (!(dattr.odp_caps.per_transport_caps.rc_odp_caps & odp_support_send)) {
|
|
fprintf(stderr, " Send is not supported for RC transport.\n");
|
|
return 0;
|
|
} else if (!(dattr.odp_caps.per_transport_caps.rc_odp_caps & odp_support_recv)) {
|
|
fprintf(stderr, " Receive is not supported for RC transport.\n");
|
|
return 0;
|
|
}
|
|
return 1;
|
|
}
|
|
#endif
|
|
|
|
|
|
// -----------------------------------------------------------------------------
|
|
|
|
|
|
int create_single_mr(struct pingpong_context *ctx, struct perftest_parameters *user_param, int qp_index)
|
|
{
|
|
int i;
|
|
int flags = IBV_ACCESS_LOCAL_WRITE;
|
|
|
|
/* ODP */
|
|
#if defined HAVE_EX_ODP || defined HAVE_EXP_ODP
|
|
if (user_param->use_odp) {
|
|
if (!check_odp_support(ctx) )
|
|
return FAILURE;
|
|
|
|
/* ODP does not support contig pages */
|
|
ctx->is_contig_supported = FAILURE;
|
|
#ifdef HAVE_EX_ODP
|
|
flags |= IBV_ACCESS_ON_DEMAND;
|
|
#elif defined HAVE_EXP_ODP
|
|
exp_flags |= IBV_EXP_ACCESS_ON_DEMAND;
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
/* #ifdef HAVE_CUDA */
|
|
/* if (user_param->use_cuda) { */
|
|
/* ctx->is_contig_supported = FAILURE; */
|
|
/* if(pp_init_gpu(ctx, ctx->buff_size)) { */
|
|
/* fprintf(stderr, "Couldn't allocate work buf.\n"); */
|
|
/* return FAILURE; */
|
|
/* } */
|
|
/* } */
|
|
/* #endif */
|
|
|
|
/* if (user_param->mmap_file != NULL) { */
|
|
/* ctx->buf = memalign(user_param->cycle_buffer, ctx->buff_size); */
|
|
/* if (pp_init_mmap(ctx, ctx->buff_size, user_param->mmap_file, */
|
|
/* user_param->mmap_offset)) */
|
|
/* { */
|
|
/* fprintf(stderr, "Couldn't allocate work buf.\n"); */
|
|
/* return FAILURE; */
|
|
/* } */
|
|
/* } else { */
|
|
|
|
/* Allocating buffer for data, in case driver not support contig pages. */
|
|
if (ctx->is_contig_supported == FAILURE) {
|
|
/* if (user_param->use_hugepages) { */
|
|
/* if (alloc_hugepage_region(ctx) != SUCCESS){ */
|
|
/* fprintf(stderr, "Failed to allocate hugepage region.\n"); */
|
|
/* return FAILURE; */
|
|
/* } */
|
|
/* memset(ctx->buf[qp_index], 0, ctx->buff_size); */
|
|
/* } else if (ctx->is_contig_supported == FAILURE) { */
|
|
/* ctx->buf[qp_index] = memalign(user_param->cycle_buffer, ctx->buff_size); */
|
|
/* } */
|
|
|
|
ctx->buf[qp_index] = memalign(user_param->cycle_buffer, ctx->buff_size);
|
|
if (!ctx->buf[qp_index]) {
|
|
fprintf(stderr, "Couldn't allocate work buf.\n");
|
|
return FAILURE;
|
|
}
|
|
memset(ctx->buf[qp_index], 0, ctx->buff_size);
|
|
|
|
} else {
|
|
ctx->buf[qp_index] = NULL;
|
|
flags |= (1 << 5);
|
|
}
|
|
/* } */
|
|
|
|
if (user_param->verb == WRITE) {
|
|
flags |= IBV_ACCESS_REMOTE_WRITE;
|
|
} else if (user_param->verb == READ) {
|
|
flags |= IBV_ACCESS_REMOTE_READ;
|
|
if (user_param->transport_type == IBV_TRANSPORT_IWARP)
|
|
flags |= IBV_ACCESS_REMOTE_WRITE;
|
|
/* } else if (user_param->verb == ATOMIC) { */
|
|
/* flags |= IBV_ACCESS_REMOTE_ATOMIC; */
|
|
}
|
|
|
|
/* Allocating Memory region and assigning our buffer to it. */
|
|
ctx->mr[qp_index] = ibv_reg_mr(ctx->pd, ctx->buf[qp_index], ctx->buff_size, flags);
|
|
|
|
if (!ctx->mr[qp_index]) {
|
|
fprintf(stderr, "Couldn't allocate MR\n");
|
|
return FAILURE;
|
|
}
|
|
|
|
if (ctx->is_contig_supported == SUCCESS)
|
|
ctx->buf[qp_index] = ctx->mr[qp_index]->addr;
|
|
|
|
|
|
/* Initialize buffer with random numbers except in WRITE_LAT test that it 0's */
|
|
if (!user_param->use_cuda) {
|
|
srand(time(NULL));
|
|
if (user_param->verb == WRITE && user_param->tst == LAT) {
|
|
memset(ctx->buf[qp_index], 0, ctx->buff_size);
|
|
} else {
|
|
for (i = 0; i < ctx->buff_size; i++) {
|
|
((char*)ctx->buf[qp_index])[i] = (char)rand();
|
|
}
|
|
}
|
|
}
|
|
|
|
return SUCCESS;
|
|
}
|
|
|
|
int create_reg_cqs(struct pingpong_context *ctx,
|
|
struct perftest_parameters *user_param,
|
|
int tx_buffer_depth, int need_recv_cq)
|
|
{
|
|
ctx->send_cq = ibv_create_cq(ctx->context, tx_buffer_depth *
|
|
user_param->num_of_qps, NULL, ctx->channel, user_param->eq_num);
|
|
if (!ctx->send_cq) {
|
|
fprintf(stderr, "Couldn't create CQ\n");
|
|
return FAILURE;
|
|
}
|
|
|
|
if (need_recv_cq) {
|
|
ctx->recv_cq = ibv_create_cq(ctx->context, user_param->rx_depth *
|
|
user_param->num_of_qps, NULL, ctx->channel, user_param->eq_num);
|
|
if (!ctx->recv_cq) {
|
|
fprintf(stderr, "Couldn't create a receiver CQ\n");
|
|
return FAILURE;
|
|
}
|
|
}
|
|
|
|
return SUCCESS;
|
|
}
|
|
|
|
int create_reg_qp_main(struct pingpong_context *ctx,
|
|
struct perftest_parameters *user_param,
|
|
int i, int num_of_qps)
|
|
{
|
|
if (user_param->use_xrc) {
|
|
/* #ifdef HAVE_XRCD */
|
|
/* ctx->qp[i] = ctx_xrc_qp_create(ctx, user_param, i); */
|
|
/* #endif */
|
|
} else {
|
|
ctx->qp[i] = ctx_qp_create(ctx, user_param);
|
|
}
|
|
|
|
if (ctx->qp[i] == NULL) {
|
|
fprintf(stderr, "Unable to create QP.\n");
|
|
return FAILURE;
|
|
}
|
|
|
|
return SUCCESS;
|
|
}
|
|
|
|
int ctx_modify_qp_to_init(struct ibv_qp *qp, struct perftest_parameters *user_param, uint64_t init_flag)
|
|
{
|
|
int num_of_qps = user_param->num_of_qps;
|
|
int num_of_qps_per_port = user_param->num_of_qps / 2;
|
|
|
|
struct ibv_qp_attr attr;
|
|
int flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT;
|
|
|
|
static int portindex=0; /* for dual-port support */
|
|
int ret = 0;
|
|
|
|
memset(&attr, 0, sizeof(struct ibv_qp_attr));
|
|
attr.qp_state = IBV_QPS_INIT;
|
|
attr.pkey_index = user_param->pkey_index;
|
|
|
|
if ( user_param->use_xrc && (user_param->duplex || user_param->tst == LAT)) {
|
|
num_of_qps /= 2;
|
|
num_of_qps_per_port = num_of_qps / 2;
|
|
}
|
|
|
|
if (user_param->dualport==ON) {
|
|
if (portindex % num_of_qps < num_of_qps_per_port) {
|
|
attr.port_num = user_param->ib_port;
|
|
user_param->port_by_qp[portindex] = 0;
|
|
} else {
|
|
attr.port_num = user_param->ib_port2;
|
|
user_param->port_by_qp[portindex] = 1;
|
|
}
|
|
portindex++;
|
|
|
|
} else {
|
|
|
|
attr.port_num = user_param->ib_port;
|
|
}
|
|
|
|
if (user_param->connection_type == RawEth) {
|
|
flags = IBV_QP_STATE | IBV_QP_PORT;
|
|
} else if (user_param->connection_type == UD) {
|
|
attr.qkey = DEFF_QKEY;
|
|
flags |= IBV_QP_QKEY;
|
|
|
|
} else {
|
|
switch (user_param->verb) {
|
|
case ATOMIC: attr.qp_access_flags = IBV_ACCESS_REMOTE_ATOMIC; break;
|
|
case READ : attr.qp_access_flags = IBV_ACCESS_REMOTE_READ; break;
|
|
case WRITE : attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE; break;
|
|
case SEND : attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE;
|
|
}
|
|
flags |= IBV_QP_ACCESS_FLAGS;
|
|
}
|
|
|
|
#ifdef HAVE_MASKED_ATOMICS
|
|
if (user_param->masked_atomics) {
|
|
exp_attr.qp_access_flags = IBV_ACCESS_REMOTE_ATOMIC | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ;
|
|
exp_flags = IBV_EXP_QP_STATE | IBV_EXP_QP_PKEY_INDEX | IBV_EXP_QP_PORT | IBV_EXP_QP_ACCESS_FLAGS;
|
|
}
|
|
#endif
|
|
|
|
ret = ibv_modify_qp(qp, &attr, flags);
|
|
|
|
if (ret) {
|
|
fprintf(stderr, "Failed to modify QP to INIT, ret=%d\n", ret);
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
// -----------------------------------------------------------------------------
|
|
|
|
|
|
int create_mr(struct pingpong_context *ctx, struct perftest_parameters *user_param)
|
|
{
|
|
int i;
|
|
|
|
/* create first MR */
|
|
if (create_single_mr(ctx, user_param, 0)) {
|
|
fprintf(stderr, "failed to create mr\n");
|
|
return 1;
|
|
}
|
|
|
|
/* create the rest if needed, or copy the first one */
|
|
for (i = 1; i < user_param->num_of_qps; i++) {
|
|
if (user_param->mr_per_qp) {
|
|
if (create_single_mr(ctx, user_param, i)) {
|
|
fprintf(stderr, "failed to create mr\n");
|
|
return 1;
|
|
}
|
|
} else {
|
|
ALLOCATE(ctx->mr[i], struct ibv_mr, 1);
|
|
memset(ctx->mr[i], 0, sizeof(struct ibv_mr));
|
|
ctx->mr[i] = ctx->mr[0];
|
|
ctx->buf[i] = ctx->buf[0] + (i*BUFF_SIZE(ctx->size, ctx->cycle_buffer));
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int create_cqs(struct pingpong_context *ctx, struct perftest_parameters *user_param)
|
|
{
|
|
int ret;
|
|
int dct_only = 0, need_recv_cq = 0;
|
|
int tx_buffer_depth = user_param->tx_depth;
|
|
|
|
if (user_param->connection_type == DC) {
|
|
dct_only = (user_param->machine == SERVER && !(user_param->duplex || user_param->tst == LAT));
|
|
}
|
|
|
|
if (dct_only)
|
|
tx_buffer_depth = user_param->rx_depth;
|
|
|
|
if ((user_param->connection_type == DC && !dct_only) || (user_param->verb == SEND))
|
|
need_recv_cq = 1;
|
|
|
|
ret = create_reg_cqs(ctx, user_param, tx_buffer_depth, need_recv_cq);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int create_qp_main(struct pingpong_context *ctx,
|
|
struct perftest_parameters *user_param, int i, int num_of_qps)
|
|
{
|
|
int ret;
|
|
ret = create_reg_qp_main(ctx, user_param, i, num_of_qps);
|
|
return ret;
|
|
}
|
|
|
|
int modify_qp_to_init(struct pingpong_context *ctx,
|
|
struct perftest_parameters *user_param, int qp_index, int num_of_qps)
|
|
{
|
|
uint64_t init_flag = 0;
|
|
|
|
#ifdef HAVE_RSS_EXP
|
|
if (qp_index == 0 && user_param->use_rss) {
|
|
init_flag = IBV_EXP_QP_GROUP_RSS;
|
|
}
|
|
else
|
|
#endif
|
|
init_flag = 0;
|
|
|
|
if(user_param->connection_type == DC) {
|
|
if ( !((!(user_param->duplex || user_param->tst == LAT) && (user_param->machine == SERVER) )
|
|
|| ((user_param->duplex || user_param->tst == LAT) && (qp_index >= num_of_qps)))) {
|
|
#ifdef HAVE_DC
|
|
if (ctx_modify_dc_qp_to_init(ctx->qp[qp_index], user_param)) {
|
|
fprintf(stderr, " Unable to create DC QP.\n");
|
|
return FAILURE;
|
|
}
|
|
#endif
|
|
}
|
|
} else {
|
|
if (ctx_modify_qp_to_init(ctx->qp[qp_index], user_param, init_flag)) {
|
|
fprintf(stderr, "Failed to modify QP to INIT\n");
|
|
return FAILURE;
|
|
}
|
|
}
|
|
|
|
return SUCCESS;
|
|
}
|
|
|
|
#ifdef HAVE_PACKET_PACING
|
|
int check_packet_pacing_support(struct pingpong_context *ctx)
|
|
{
|
|
struct ibv_device_attr_ex attr;
|
|
memset(&attr, 0, sizeof (struct ibv_device_attr_ex));
|
|
|
|
if (ibv_query_device_ex(ctx->context, NULL, &attr)) {
|
|
fprintf(stderr, "ibv_query_device_ex failed\n");
|
|
return FAILURE;
|
|
}
|
|
|
|
/* qp_rate_limit_max > 0 if PP is supported */
|
|
return attr.packet_pacing_caps.qp_rate_limit_max > 0 ? SUCCESS : FAILURE;
|
|
}
|
|
#endif
|
|
|
|
static int ctx_modify_qp_to_rtr(struct ibv_qp *qp,
|
|
struct ibv_qp_attr *attr,
|
|
struct perftest_parameters *user_param,
|
|
struct pingpong_dest *dest,
|
|
struct pingpong_dest *my_dest,
|
|
int qpindex)
|
|
{
|
|
int num_of_qps = user_param->num_of_qps;
|
|
int num_of_qps_per_port = user_param->num_of_qps / 2;
|
|
|
|
int flags = IBV_QP_STATE;
|
|
int ooo_flags = 0;
|
|
|
|
attr->qp_state = IBV_QPS_RTR;
|
|
attr->ah_attr.src_path_bits = 0;
|
|
|
|
/* in xrc with bidirectional,
|
|
* there are send qps and recv qps. the actual number of send/recv qps
|
|
* is num_of_qps / 2.
|
|
*/
|
|
if ( user_param->use_xrc && (user_param->duplex || user_param->tst == LAT)) {
|
|
num_of_qps /= 2;
|
|
num_of_qps_per_port = num_of_qps / 2;
|
|
}
|
|
|
|
/* first half of qps are for ib_port and second half are for ib_port2
|
|
* in xrc with bidirectional, the first half of qps are xrc_send qps and
|
|
* the second half are xrc_recv qps. the first half of the send/recv qps
|
|
* are for ib_port1 and the second half are for ib_port2
|
|
*/
|
|
if (user_param->dualport == ON && (qpindex % num_of_qps >= num_of_qps_per_port))
|
|
attr->ah_attr.port_num = user_param->ib_port2;
|
|
else
|
|
attr->ah_attr.port_num = user_param->ib_port;
|
|
|
|
if (user_param->connection_type != RawEth) {
|
|
|
|
attr->ah_attr.dlid = (user_param->dlid) ? user_param->dlid : dest->lid;
|
|
attr->ah_attr.sl = user_param->sl;
|
|
|
|
if (((attr->ah_attr.port_num == user_param->ib_port) && (user_param->gid_index == DEF_GID_INDEX))
|
|
|| ((attr->ah_attr.port_num == user_param->ib_port2) && (user_param->gid_index2 == DEF_GID_INDEX) && user_param->dualport)) {
|
|
|
|
attr->ah_attr.is_global = 0;
|
|
} else {
|
|
|
|
attr->ah_attr.is_global = 1;
|
|
attr->ah_attr.grh.dgid = dest->gid;
|
|
attr->ah_attr.grh.sgid_index = (attr->ah_attr.port_num == user_param->ib_port) ? user_param->gid_index : user_param->gid_index2;
|
|
attr->ah_attr.grh.hop_limit = 0xFF;
|
|
attr->ah_attr.grh.traffic_class = user_param->traffic_class;
|
|
}
|
|
if (user_param->connection_type != UD) {
|
|
|
|
attr->path_mtu = user_param->curr_mtu;
|
|
attr->dest_qp_num = dest->qpn;
|
|
attr->rq_psn = dest->psn;
|
|
|
|
flags |= (IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN);
|
|
|
|
if (user_param->connection_type == RC || user_param->connection_type == XRC) {
|
|
|
|
attr->max_dest_rd_atomic = my_dest->out_reads;
|
|
attr->min_rnr_timer = 12;
|
|
flags |= (IBV_QP_MIN_RNR_TIMER | IBV_QP_MAX_DEST_RD_ATOMIC);
|
|
}
|
|
}
|
|
}
|
|
else if (user_param->raw_qos) {
|
|
attr->ah_attr.sl = user_param->sl;
|
|
flags |= IBV_QP_AV;
|
|
}
|
|
|
|
#ifdef HAVE_OOO_ATTR
|
|
ooo_flags |= IBV_QP_OOO_RW_DATA_PLACEMENT;
|
|
#elif HAVE_EXP_OOO_ATTR
|
|
ooo_flags |= IBV_EXP_QP_OOO_RW_DATA_PLACEMENT;
|
|
#endif
|
|
|
|
if (user_param->use_ooo)
|
|
flags |= ooo_flags;
|
|
return ibv_modify_qp(qp, attr, flags);
|
|
}
|
|
|
|
void ctx_set_send_reg_wqes(struct pingpong_context *ctx,
|
|
struct perftest_parameters *user_param,
|
|
struct pingpong_dest *rem_dest)
|
|
{
|
|
int i, j;
|
|
int num_of_qps = user_param->num_of_qps;
|
|
int xrc_offset = 0;
|
|
|
|
if((user_param->use_xrc || user_param->connection_type == DC) && (user_param->duplex || user_param->tst == LAT)) {
|
|
num_of_qps /= 2;
|
|
xrc_offset = num_of_qps;
|
|
}
|
|
|
|
for (i = 0; i < num_of_qps ; i++) {
|
|
memset(&ctx->wr[i*user_param->post_list], 0, sizeof(struct ibv_send_wr));
|
|
ctx->sge_list[i*user_param->post_list].addr = (uintptr_t)ctx->buf[i];
|
|
|
|
if (user_param->mac_fwd) {
|
|
if (user_param->mr_per_qp) {
|
|
ctx->sge_list[i*user_param->post_list].addr =
|
|
(uintptr_t)ctx->buf[0] + (num_of_qps + i)*BUFF_SIZE(ctx->size, ctx->cycle_buffer);
|
|
} else {
|
|
ctx->sge_list[i*user_param->post_list].addr = (uintptr_t)ctx->buf[i];
|
|
}
|
|
}
|
|
|
|
if (user_param->verb == WRITE || user_param->verb == READ)
|
|
ctx->wr[i*user_param->post_list].wr.rdma.remote_addr = rem_dest[xrc_offset + i].vaddr;
|
|
|
|
else if (user_param->verb == ATOMIC)
|
|
ctx->wr[i*user_param->post_list].wr.atomic.remote_addr = rem_dest[xrc_offset + i].vaddr;
|
|
|
|
if (user_param->tst == BW || user_param->tst == LAT_BY_BW) {
|
|
|
|
ctx->scnt[i] = 0;
|
|
ctx->ccnt[i] = 0;
|
|
ctx->my_addr[i] = (uintptr_t)ctx->buf[i];
|
|
if (user_param->verb != SEND)
|
|
ctx->rem_addr[i] = rem_dest[xrc_offset + i].vaddr;
|
|
}
|
|
|
|
for (j = 0; j < user_param->post_list; j++) {
|
|
|
|
ctx->sge_list[i*user_param->post_list + j].length =
|
|
(user_param->connection_type == RawEth) ? (user_param->size - HW_CRC_ADDITION) : user_param->size;
|
|
|
|
ctx->sge_list[i*user_param->post_list + j].lkey = ctx->mr[i]->lkey;
|
|
|
|
if (j > 0) {
|
|
|
|
ctx->sge_list[i*user_param->post_list +j].addr = ctx->sge_list[i*user_param->post_list + (j-1)].addr;
|
|
|
|
if ((user_param->tst == BW || user_param->tst == LAT_BY_BW) && user_param->size <= (ctx->cycle_buffer / 2))
|
|
increase_loc_addr(&ctx->sge_list[i*user_param->post_list +j], user_param->size,
|
|
j-1, ctx->my_addr[i], 0, ctx->cache_line_size, ctx->cycle_buffer);
|
|
}
|
|
|
|
ctx->wr[i*user_param->post_list + j].sg_list = &ctx->sge_list[i*user_param->post_list + j];
|
|
ctx->wr[i*user_param->post_list + j].num_sge = MAX_SEND_SGE;
|
|
ctx->wr[i*user_param->post_list + j].wr_id = i;
|
|
|
|
if (j == (user_param->post_list - 1)) {
|
|
ctx->wr[i*user_param->post_list + j].send_flags = IBV_SEND_SIGNALED;
|
|
ctx->wr[i*user_param->post_list + j].next = NULL;
|
|
}
|
|
|
|
else {
|
|
ctx->wr[i*user_param->post_list + j].next = &ctx->wr[i*user_param->post_list+j+1];
|
|
ctx->wr[i*user_param->post_list + j].send_flags = 0;
|
|
}
|
|
|
|
/* if (user_param->verb == ATOMIC) { */
|
|
/* ctx->wr[i*user_param->post_list + j].opcode = opcode_atomic_array[user_param->atomicType]; */
|
|
/* } */
|
|
/* else { */
|
|
/* ctx->wr[i*user_param->post_list + j].opcode = opcode_verbs_array[user_param->verb]; */
|
|
/* } */
|
|
|
|
ctx->wr[i*user_param->post_list + j].opcode = opcode_verbs_array[user_param->verb];
|
|
|
|
if (user_param->verb == WRITE || user_param->verb == READ) {
|
|
|
|
ctx->wr[i*user_param->post_list + j].wr.rdma.rkey = rem_dest[xrc_offset + i].rkey;
|
|
|
|
if (j > 0) {
|
|
|
|
ctx->wr[i*user_param->post_list + j].wr.rdma.remote_addr =
|
|
ctx->wr[i*user_param->post_list + (j-1)].wr.rdma.remote_addr;
|
|
|
|
if ((user_param->tst == BW || user_param->tst == LAT_BY_BW ) && user_param->size <= (ctx->cycle_buffer / 2))
|
|
increase_rem_addr(&ctx->wr[i*user_param->post_list + j], user_param->size,
|
|
j-1, ctx->rem_addr[i], WRITE, ctx->cache_line_size, ctx->cycle_buffer);
|
|
}
|
|
|
|
} else if (user_param->verb == ATOMIC) {
|
|
|
|
ctx->wr[i*user_param->post_list + j].wr.atomic.rkey = rem_dest[xrc_offset + i].rkey;
|
|
|
|
if (j > 0) {
|
|
|
|
ctx->wr[i*user_param->post_list + j].wr.atomic.remote_addr =
|
|
ctx->wr[i*user_param->post_list + j-1].wr.atomic.remote_addr;
|
|
if (user_param->tst == BW || user_param->tst == LAT_BY_BW)
|
|
increase_rem_addr(&ctx->wr[i*user_param->post_list + j], user_param->size,
|
|
j-1, ctx->rem_addr[i], ATOMIC, ctx->cache_line_size, ctx->cycle_buffer);
|
|
}
|
|
|
|
if (user_param->atomicType == FETCH_AND_ADD)
|
|
ctx->wr[i*user_param->post_list + j].wr.atomic.compare_add = ATOMIC_ADD_VALUE;
|
|
|
|
else
|
|
ctx->wr[i*user_param->post_list + j].wr.atomic.swap = ATOMIC_SWAP_VALUE;
|
|
|
|
|
|
} else if (user_param->verb == SEND) {
|
|
|
|
if (user_param->connection_type == UD) {
|
|
|
|
ctx->wr[i*user_param->post_list + j].wr.ud.ah = ctx->ah[i];
|
|
ctx->wr[i*user_param->post_list + j].wr.ud.remote_qkey = DEF_QKEY;
|
|
ctx->wr[i*user_param->post_list + j].wr.ud.remote_qpn = rem_dest[xrc_offset + i].qpn;
|
|
}
|
|
}
|
|
|
|
if ((user_param->verb == SEND || user_param->verb == WRITE) && user_param->size <= user_param->inline_size)
|
|
ctx->wr[i*user_param->post_list + j].send_flags |= IBV_SEND_INLINE;
|
|
|
|
#ifdef HAVE_XRCD
|
|
if (user_param->use_xrc)
|
|
ctx->wr[i*user_param->post_list + j].qp_type.xrc.remote_srqn = rem_dest[xrc_offset + i].srqn;
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
|
|
static int ctx_modify_qp_to_rts(struct ibv_qp *qp,
|
|
void *_attr,
|
|
struct perftest_parameters *user_param,
|
|
struct pingpong_dest *dest,
|
|
struct pingpong_dest *my_dest)
|
|
{
|
|
/* #ifdef HAVE_PACKET_PACING_EXP */
|
|
/* uint64_t flags = IBV_QP_STATE; */
|
|
/* #else */
|
|
int flags = IBV_QP_STATE;
|
|
/* #endif */
|
|
struct ibv_qp_attr *attr = (struct ibv_qp_attr*)_attr;
|
|
|
|
attr->qp_state = IBV_QPS_RTS;
|
|
|
|
if (user_param->connection_type != RawEth) {
|
|
|
|
flags |= IBV_QP_SQ_PSN;
|
|
attr->sq_psn = my_dest->psn;
|
|
|
|
if (user_param->connection_type == RC || user_param->connection_type == XRC) {
|
|
|
|
attr->timeout = user_param->qp_timeout;
|
|
attr->retry_cnt = 7;
|
|
attr->rnr_retry = 7;
|
|
attr->max_rd_atomic = dest->out_reads;
|
|
flags |= (IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC);
|
|
}
|
|
}
|
|
|
|
/* #ifdef HAVE_PACKET_PACING_EXP */
|
|
/* if (user_param->rate_limit_type == PP_RATE_LIMIT) { */
|
|
/* ((struct ibv_exp_qp_attr*)_attr)->rate_limit = user_param->rate_limit; */
|
|
/* flags |= IBV_EXP_QP_RATE_LIMIT; */
|
|
/* return ibv_exp_modify_qp(qp, (struct ibv_exp_qp_attr*)_attr, flags); */
|
|
/* } */
|
|
/* #elif defined(HAVE_PACKET_PACING) */
|
|
/* if (user_param->rate_limit_type == PP_RATE_LIMIT) { */
|
|
/* attr->rate_limit = user_param->rate_limit; */
|
|
/* flags |= IBV_QP_RATE_LIMIT; */
|
|
/* } */
|
|
/* #endif */
|
|
|
|
#ifdef HAVE_PACKET_PACING
|
|
if (user_param->rate_limit_type == PP_RATE_LIMIT) {
|
|
attr->rate_limit = user_param->rate_limit;
|
|
flags |= IBV_QP_RATE_LIMIT;
|
|
}
|
|
#endif
|
|
|
|
return ibv_modify_qp(qp, attr, flags);
|
|
}
|
|
|
|
int verify_params_with_device_context(struct ibv_context *context,
|
|
struct perftest_parameters *user_param)
|
|
{
|
|
if(user_param->use_event) {
|
|
if(user_param->eq_num > context->num_comp_vectors) {
|
|
fprintf(stderr, " Completion vector specified is invalid\n");
|
|
fprintf(stderr, " Max completion vector = %d\n",
|
|
context->num_comp_vectors - 1);
|
|
return FAILURE;
|
|
}
|
|
}
|
|
|
|
return SUCCESS;
|
|
}
|
|
|
|
|
|
// -----------------------------------------------------------------------------
|
|
|
|
|
|
struct ibv_device* ctx_find_dev(const char *ib_devname)
|
|
{
|
|
int num_of_device;
|
|
struct ibv_device **dev_list;
|
|
struct ibv_device *ib_dev = NULL;
|
|
|
|
dev_list = ibv_get_device_list(&num_of_device);
|
|
|
|
if (num_of_device <= 0) {
|
|
fprintf(stderr, " Did not detect devices \n");
|
|
fprintf(stderr, " If device exists, check if driver is up\n");
|
|
return NULL;
|
|
}
|
|
|
|
if (!ib_devname) {
|
|
ib_dev = dev_list[0];
|
|
if (!ib_dev) {
|
|
fprintf(stderr, "No IB devices found\n");
|
|
exit (1);
|
|
}
|
|
} else {
|
|
for (; (ib_dev = *dev_list); ++dev_list)
|
|
if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
|
|
break;
|
|
if (!ib_dev)
|
|
fprintf(stderr, "IB device %s not found\n", ib_devname);
|
|
}
|
|
return ib_dev;
|
|
}
|
|
|
|
void alloc_ctx(struct pingpong_context *ctx, struct perftest_parameters *user_param)
|
|
{
|
|
uint64_t tarr_size;
|
|
int num_of_qps_factor;
|
|
ctx->cycle_buffer = user_param->cycle_buffer;
|
|
ctx->cache_line_size = user_param->cache_line_size;
|
|
|
|
ALLOCATE(user_param->port_by_qp, uint64_t, user_param->num_of_qps);
|
|
|
|
tarr_size = (user_param->noPeak) ? 1 : user_param->iters*user_param->num_of_qps;
|
|
ALLOCATE(user_param->tposted, cycles_t, tarr_size);
|
|
memset(user_param->tposted, 0, sizeof(cycles_t)*tarr_size);
|
|
if ((user_param->tst == LAT || user_param->tst == FS_RATE) && user_param->test_type == DURATION)
|
|
ALLOCATE(user_param->tcompleted, cycles_t, 1);
|
|
|
|
ALLOCATE(ctx->qp, struct ibv_qp*, user_param->num_of_qps);
|
|
ALLOCATE(ctx->mr, struct ibv_mr*, user_param->num_of_qps);
|
|
ALLOCATE(ctx->buf, void* , user_param->num_of_qps);
|
|
|
|
/* #ifdef HAVE_ACCL_VERBS */
|
|
/* ALLOCATE(ctx->qp_burst_family, struct ibv_exp_qp_burst_family*, user_param->num_of_qps); */
|
|
/* #endif */
|
|
|
|
/* #ifdef HAVE_DC */
|
|
/* if (user_param->connection_type == DC) { */
|
|
/* ALLOCATE(ctx->dct, struct ibv_dct*, user_param->num_of_qps); */
|
|
/* } */
|
|
/* #endif */
|
|
|
|
if ((user_param->tst == BW || user_param->tst == LAT_BY_BW) && (user_param->machine == CLIENT || user_param->duplex)) {
|
|
ALLOCATE(user_param->tcompleted, cycles_t, tarr_size);
|
|
memset(user_param->tcompleted, 0, sizeof(cycles_t)*tarr_size);
|
|
ALLOCATE(ctx->my_addr, uint64_t, user_param->num_of_qps);
|
|
ALLOCATE(ctx->rem_addr, uint64_t, user_param->num_of_qps);
|
|
ALLOCATE(ctx->scnt, uint64_t, user_param->num_of_qps);
|
|
ALLOCATE(ctx->ccnt, uint64_t, user_param->num_of_qps);
|
|
memset(ctx->scnt, 0, user_param->num_of_qps * sizeof (uint64_t));
|
|
memset(ctx->ccnt, 0, user_param->num_of_qps * sizeof (uint64_t));
|
|
|
|
} else if ((user_param->tst == BW || user_param->tst == LAT_BY_BW)
|
|
&& user_param->verb == SEND && user_param->machine == SERVER) {
|
|
|
|
ALLOCATE(ctx->my_addr, uint64_t, user_param->num_of_qps);
|
|
ALLOCATE(user_param->tcompleted, cycles_t, 1);
|
|
|
|
} else if (user_param->tst == FS_RATE && user_param->test_type == ITERATIONS) {
|
|
ALLOCATE(user_param->tcompleted, cycles_t, tarr_size);
|
|
memset(user_param->tcompleted, 0, sizeof(cycles_t) * tarr_size);
|
|
}
|
|
|
|
if (user_param->machine == CLIENT || user_param->tst == LAT || user_param->duplex) {
|
|
ALLOCATE(ctx->sge_list, struct ibv_sge, user_param->num_of_qps * user_param->post_list);
|
|
ALLOCATE(ctx->wr, struct ibv_send_wr, user_param->num_of_qps * user_param->post_list);
|
|
|
|
if ((user_param->verb == SEND && user_param->connection_type == UD ) || user_param->connection_type == DC) {
|
|
ALLOCATE(ctx->ah, struct ibv_ah*, user_param->num_of_qps);
|
|
}
|
|
}
|
|
|
|
if (user_param->verb == SEND && (user_param->tst == LAT || user_param->machine == SERVER || user_param->duplex)) {
|
|
ALLOCATE(ctx->recv_sge_list, struct ibv_sge, user_param->num_of_qps);
|
|
ALLOCATE(ctx->rwr, struct ibv_recv_wr, user_param->num_of_qps);
|
|
ALLOCATE(ctx->rx_buffer_addr, uint64_t, user_param->num_of_qps);
|
|
}
|
|
|
|
if (user_param->mac_fwd == ON )
|
|
ctx->cycle_buffer = user_param->size * user_param->rx_depth;
|
|
|
|
ctx->size = user_param->size;
|
|
|
|
num_of_qps_factor = (user_param->mr_per_qp) ? 1 : user_param->num_of_qps;
|
|
|
|
/* holds the size of maximum between msg size and cycle buffer,
|
|
* aligned to cache line,
|
|
* it is multiply by 2 for send and receive
|
|
* with reference to number of flows and number of QPs */
|
|
ctx->buff_size = INC(BUFF_SIZE(ctx->size, ctx->cycle_buffer),
|
|
ctx->cache_line_size) * 2 * num_of_qps_factor * user_param->flows;
|
|
ctx->send_qp_buff_size = ctx->buff_size / num_of_qps_factor / 2;
|
|
ctx->flow_buff_size = ctx->send_qp_buff_size / user_param->flows;
|
|
user_param->buff_size = ctx->buff_size;
|
|
if (user_param->connection_type == UD)
|
|
ctx->buff_size += ctx->cache_line_size;
|
|
}
|
|
|
|
int ctx_init(struct pingpong_context *ctx, struct perftest_parameters *user_param)
|
|
{
|
|
int i;
|
|
int num_of_qps = user_param->num_of_qps / 2;
|
|
|
|
ctx->is_contig_supported = FAILURE;
|
|
|
|
/* #ifdef HAVE_ACCL_VERBS */
|
|
/* enum ibv_exp_query_intf_status intf_status; */
|
|
/* struct ibv_exp_query_intf_params intf_params; */
|
|
/* #endif */
|
|
|
|
/* #if defined HAVE_OOO_ATTR || defined HAVE_EXP_OOO_ATTR */
|
|
/* if (user_param->use_ooo) { */
|
|
/* if (verify_ooo_settings(ctx, user_param) != SUCCESS) { */
|
|
/* fprintf(stderr, "Incompatible OOO settings\n"); */
|
|
/* return FAILURE; */
|
|
/* } */
|
|
/* } */
|
|
/* #endif */
|
|
|
|
if (user_param->use_hugepages)
|
|
ctx->is_contig_supported = FAILURE;
|
|
|
|
/* Allocating an event channel if requested. */
|
|
if (user_param->use_event) {
|
|
ctx->channel = ibv_create_comp_channel(ctx->context);
|
|
if (!ctx->channel) {
|
|
fprintf(stderr, "Couldn't create completion channel\n");
|
|
return FAILURE;
|
|
}
|
|
}
|
|
|
|
/* Allocating the Protection domain. */
|
|
ctx->pd = ibv_alloc_pd(ctx->context);
|
|
if (!ctx->pd) {
|
|
fprintf(stderr, "Couldn't allocate PD\n");
|
|
return FAILURE;
|
|
}
|
|
|
|
/* #ifdef HAVE_ACCL_VERBS */
|
|
/* if (user_param->use_res_domain) { */
|
|
/* ctx->res_domain = create_res_domain(ctx, user_param); */
|
|
/* if (!ctx->res_domain) { */
|
|
/* fprintf(stderr, "Couldn't create resource domain\n"); */
|
|
/* return FAILURE; */
|
|
/* } */
|
|
/* } */
|
|
/* #endif */
|
|
|
|
if (create_mr(ctx, user_param)) {
|
|
fprintf(stderr, "Failed to create MR\n");
|
|
return FAILURE;
|
|
}
|
|
|
|
if (create_cqs(ctx, user_param)) {
|
|
fprintf(stderr, "Failed to create CQs\n");
|
|
return FAILURE;
|
|
}
|
|
|
|
/* #ifdef HAVE_XRCD */
|
|
/* if (user_param->use_xrc) { */
|
|
/* if (ctx_xrcd_create(ctx, user_param)) { */
|
|
/* fprintf(stderr, "Couldn't create XRC resources\n"); */
|
|
/* return FAILURE; */
|
|
/* } */
|
|
|
|
/* if (ctx_xrc_srq_create(ctx, user_param)) { */
|
|
/* fprintf(stderr, "Couldn't create SRQ XRC resources\n"); */
|
|
/* return FAILURE; */
|
|
/* } */
|
|
/* } */
|
|
/* #endif */
|
|
|
|
if (user_param->use_srq && !user_param->use_xrc && (user_param->tst == LAT ||
|
|
user_param->machine == SERVER || user_param->duplex == ON)) {
|
|
|
|
struct ibv_srq_init_attr attr = {
|
|
.attr = {
|
|
/* when using sreq, rx_depth sets the max_wr */
|
|
.max_wr = user_param->rx_depth,
|
|
.max_sge = 1
|
|
}
|
|
};
|
|
|
|
ctx->srq = ibv_create_srq(ctx->pd, &attr);
|
|
if (!ctx->srq) {
|
|
fprintf(stderr, "Couldn't create SRQ\n");
|
|
return FAILURE;
|
|
}
|
|
}
|
|
|
|
/* #ifdef HAVE_RSS_EXP */
|
|
/* if (user_param->use_rss) { */
|
|
/* struct ibv_exp_device_attr attr; */
|
|
|
|
/* attr.comp_mask = IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS | */
|
|
/* IBV_EXP_DEVICE_ATTR_RSS_TBL_SZ; */
|
|
/* if (ibv_exp_query_device(ctx->context, &attr)) { */
|
|
/* fprintf(stderr, "Experimental ibv_exp_query_device.\n"); */
|
|
/* exit(1); */
|
|
/* } */
|
|
|
|
/* if (!((attr.exp_device_cap_flags & IBV_EXP_DEVICE_QPG) && */
|
|
/* (attr.exp_device_cap_flags & IBV_EXP_DEVICE_UD_RSS) && */
|
|
/* (attr.comp_mask & IBV_EXP_DEVICE_ATTR_RSS_TBL_SZ) && */
|
|
/* (attr.max_rss_tbl_sz > 0))) { */
|
|
/* fprintf(stderr, "RSS not supported .\n"); */
|
|
/* exit(1); */
|
|
/* } */
|
|
|
|
/* [> num of qps includes the parent <] */
|
|
/* if (user_param->num_of_qps > attr.max_rss_tbl_sz + 1) { */
|
|
/* fprintf(stderr, "RSS limit is %d .\n", */
|
|
/* attr.max_rss_tbl_sz); */
|
|
/* exit(1); */
|
|
/* } */
|
|
/* } */
|
|
/* #endif */
|
|
|
|
for (i=0; i < user_param->num_of_qps; i++) {
|
|
if (create_qp_main(ctx, user_param, i, num_of_qps)) {
|
|
fprintf(stderr, "Failed to create QP.\n");
|
|
return FAILURE;
|
|
}
|
|
|
|
modify_qp_to_init(ctx, user_param, i, num_of_qps);
|
|
/* #ifdef HAVE_ACCL_VERBS */
|
|
/* if (user_param->verb_type == ACCL_INTF) { */
|
|
/* memset(&intf_params, 0, sizeof(intf_params)); */
|
|
/* intf_params.intf_scope = IBV_EXP_INTF_GLOBAL; */
|
|
/* intf_params.intf = IBV_EXP_INTF_QP_BURST; */
|
|
/* intf_params.obj = ctx->qp[i]; */
|
|
/* ctx->qp_burst_family[i] = ibv_exp_query_intf(ctx->context, &intf_params, &intf_status); */
|
|
/* if (!ctx->qp_burst_family[i]) { */
|
|
/* fprintf(stderr, "Couldn't get QP burst family.\n"); */
|
|
/* return FAILURE; */
|
|
/* } */
|
|
/* } */
|
|
/* #endif */
|
|
}
|
|
|
|
return SUCCESS;
|
|
}
|
|
|
|
int ctx_connect(struct pingpong_context *ctx,
|
|
struct pingpong_dest *dest,
|
|
struct perftest_parameters *user_param,
|
|
struct pingpong_dest *my_dest)
|
|
{
|
|
int i;
|
|
|
|
/* #ifdef HAVE_DC */
|
|
/* struct ibv_qp_attr_ex attr_ex; */
|
|
/* #elif HAVE_PACKET_PACING_EXP */
|
|
/* struct ibv_exp_qp_attr attr_ex; */
|
|
/* #endif */
|
|
|
|
struct ibv_qp_attr attr;
|
|
int xrc_offset = 0;
|
|
|
|
if((user_param->use_xrc || user_param->connection_type == DC) &&
|
|
(user_param->duplex || user_param->tst == LAT)) {
|
|
xrc_offset = user_param->num_of_qps / 2;
|
|
}
|
|
|
|
for (i=0; i < user_param->num_of_qps; i++) {
|
|
if (user_param->connection_type == DC) {
|
|
if ( ((!(user_param->duplex || user_param->tst == LAT) && (user_param->machine == SERVER) )
|
|
|| ((user_param->duplex || user_param->tst == LAT) && (i >= user_param->num_of_qps/2)))) {
|
|
continue;
|
|
}
|
|
}
|
|
/* #if defined (HAVE_DC) || defined (HAVE_PACKET_PACING_EXP) */
|
|
/* memset(&attr_ex, 0, sizeof attr_ex); */
|
|
/* #endif */
|
|
memset(&attr, 0, sizeof attr);
|
|
|
|
if (user_param->rate_limit_type == HW_RATE_LIMIT)
|
|
attr.ah_attr.static_rate = user_param->valid_hw_rate_limit;
|
|
|
|
#if defined (HAVE_PACKET_PACING_EXP) || defined (HAVE_PACKET_PACING)
|
|
if (user_param->rate_limit_type == PP_RATE_LIMIT && (check_packet_pacing_support(ctx) == FAILURE)) {
|
|
fprintf(stderr, "Packet Pacing isn't supported.\n");
|
|
return FAILURE;
|
|
}
|
|
#endif
|
|
|
|
if ((i >= xrc_offset) && (user_param->use_xrc || user_param->connection_type == DC) &&
|
|
(user_param->duplex || user_param->tst == LAT))
|
|
xrc_offset = -1*xrc_offset;
|
|
|
|
/* if(user_param->connection_type == DC) { */
|
|
/* #ifdef HAVE_DC */
|
|
/* if(ctx_modify_dc_qp_to_rtr(ctx->qp[i], &attr_ex, user_param, &dest[xrc_offset + i], &my_dest[i], i)) { */
|
|
/* fprintf(stderr, "Failed to modify QP %d to RTR\n", ctx->qp[i]->qp_num); */
|
|
/* return FAILURE; */
|
|
/* } */
|
|
/* #endif */
|
|
/* } else { */
|
|
|
|
if(ctx_modify_qp_to_rtr(ctx->qp[i], &attr, user_param, &dest[xrc_offset + i], &my_dest[i], i)) {
|
|
fprintf(stderr, "Failed to modify QP %d to RTR\n", ctx->qp[i]->qp_num);
|
|
return FAILURE;
|
|
}
|
|
|
|
/* } */
|
|
|
|
if (user_param->tst == LAT || user_param->machine == CLIENT || user_param->duplex) {
|
|
if(ctx_modify_qp_to_rts(ctx->qp[i], &attr, user_param, &dest[xrc_offset + i], &my_dest[i])) {
|
|
fprintf(stderr, "Failed to modify QP to RTS\n");
|
|
return FAILURE;
|
|
}
|
|
|
|
/* if(user_param->connection_type == DC) { */
|
|
/* #ifdef HAVE_DC */
|
|
/* if(ctx_modify_dc_qp_to_rts(ctx->qp[i], &attr_ex, user_param, &dest[xrc_offset + i], &my_dest[i])) { */
|
|
/* fprintf(stderr, "Failed to modify QP to RTS\n"); */
|
|
/* return FAILURE; */
|
|
/* } */
|
|
/* #endif */
|
|
/* } else { */
|
|
/* #ifdef HAVE_PACKET_PACING_EXP */
|
|
/* if (user_param->rate_limit_type == PP_RATE_LIMIT) { */
|
|
/* if(ctx_modify_qp_to_rts(ctx->qp[i], &attr_ex, user_param, &dest[xrc_offset + i], &my_dest[i])) { */
|
|
/* fprintf(stderr, "Failed to modify QP %x to RTS\n", ctx->qp[i]->qp_num); */
|
|
/* return FAILURE; */
|
|
/* } */
|
|
/* } else { */
|
|
/* #endif */
|
|
/* if(ctx_modify_qp_to_rts(ctx->qp[i], &attr, user_param, &dest[xrc_offset + i], &my_dest[i])) { */
|
|
/* fprintf(stderr, "Failed to modify QP to RTS\n"); */
|
|
/* return FAILURE; */
|
|
/* } */
|
|
/* #ifdef HAVE_PACKET_PACING_EXP */
|
|
/* } */
|
|
/* #endif */
|
|
}
|
|
|
|
if ((user_param->connection_type == UD || user_param->connection_type == DC) &&
|
|
(user_param->tst == LAT || user_param->machine == CLIENT || user_param->duplex)) {
|
|
/* #ifdef HAVE_DC */
|
|
/* if(user_param->connection_type == DC) */
|
|
/* ctx->ah[i] = ibv_create_ah(ctx->pd, &(attr_ex.ah_attr)); */
|
|
/* else */
|
|
/* #endif */
|
|
ctx->ah[i] = ibv_create_ah(ctx->pd, &(attr.ah_attr));
|
|
|
|
if (!ctx->ah[i]) {
|
|
fprintf(stderr, "Failed to create AH for UD\n");
|
|
return FAILURE;
|
|
}
|
|
}
|
|
|
|
if (user_param->rate_limit_type == HW_RATE_LIMIT) {
|
|
struct ibv_qp_attr qp_attr;
|
|
struct ibv_qp_init_attr init_attr;
|
|
int err, qp_static_rate = 0;
|
|
|
|
memset(&qp_attr, 0, sizeof(struct ibv_qp_attr));
|
|
memset(&init_attr, 0, sizeof(struct ibv_qp_init_attr));
|
|
|
|
err = ibv_query_qp(ctx->qp[i], &qp_attr, IBV_QP_AV, &init_attr);
|
|
if (err)
|
|
fprintf(stderr, "ibv_query_qp failed to get ah_attr\n");
|
|
else
|
|
qp_static_rate = (int)(qp_attr.ah_attr.static_rate);
|
|
|
|
//- Fall back to SW Limit only if flag undefined
|
|
if(err || (qp_static_rate != user_param->valid_hw_rate_limit)) {
|
|
if(!user_param->is_rate_limit_type) {
|
|
user_param->rate_limit_type = SW_RATE_LIMIT;
|
|
fprintf(stderr, "\x1b[31mThe QP failed to accept HW rate limit, providing SW rate limit \x1b[0m\n");
|
|
} else {
|
|
fprintf(stderr, "\x1b[31mThe QP failed to accept HW rate limit \x1b[0m\n");
|
|
return FAILURE;
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
if((user_param->use_xrc || user_param->connection_type == DC) && (user_param->duplex || user_param->tst == LAT))
|
|
xrc_offset = user_param->num_of_qps / 2;
|
|
|
|
}
|
|
return SUCCESS;
|
|
}
|
|
|
|
int destroy_ctx(struct pingpong_context *ctx,
|
|
struct perftest_parameters *user_param)
|
|
{
|
|
int i, first, dereg_counter;
|
|
int test_result = 0;
|
|
int num_of_qps = user_param->num_of_qps;
|
|
|
|
if (user_param->wait_destroy) {
|
|
printf(" Waiting %u seconds before releasing resources...\n",
|
|
user_param->wait_destroy);
|
|
|
|
/* sleep(user_param->wait_destroy); */
|
|
|
|
// sleep replacement:
|
|
struct timeval tv;
|
|
tv.tv_sec = user_param->wait_destroy;
|
|
tv.tv_usec = 0;
|
|
select(0, NULL, NULL, NULL, &tv);
|
|
}
|
|
|
|
dereg_counter = (user_param->mr_per_qp) ? user_param->num_of_qps : 1;
|
|
|
|
/* in dc with bidirectional, there are send qps and recv qps. the actual
|
|
* number of send/recv qps is num_of_qps / 2. */
|
|
if (user_param->duplex || user_param->tst == LAT) {
|
|
num_of_qps /= 2;
|
|
}
|
|
|
|
/* RSS parent should be last */
|
|
if (user_param->use_rss)
|
|
first = 1;
|
|
else
|
|
first = 0;
|
|
|
|
for (i = first; i < user_param->num_of_qps; i++) {
|
|
if (( (user_param->connection_type == DC && !((!(user_param->duplex || user_param->tst == LAT) && (user_param->machine == SERVER) )
|
|
|| ((user_param->duplex || user_param->tst == LAT) && (i >= num_of_qps)))) ||
|
|
user_param->connection_type == UD) && (user_param->tst == LAT || user_param->machine == CLIENT || user_param->duplex)) {
|
|
if (ibv_destroy_ah(ctx->ah[i])) {
|
|
fprintf(stderr, "Failed to destroy AH\n");
|
|
test_result = 1;
|
|
}
|
|
}
|
|
/* #ifdef HAVE_DC */
|
|
/* if (user_param->connection_type == DC && ((!(user_param->duplex || user_param->tst == LAT) */
|
|
/* && (user_param->machine == SERVER)) || ((user_param->duplex || user_param->tst == LAT) && (i >= num_of_qps)))) { */
|
|
/* if (ibv_exp_destroy_dct(ctx->dct[i])) { */
|
|
/* fprintf(stderr, "Failed to destroy dct\n"); */
|
|
/* test_result = 1; */
|
|
/* } */
|
|
/* if ( i == user_param->num_of_qps -1 ) */
|
|
/* return test_result; */
|
|
/* } else */
|
|
/* #endif */
|
|
|
|
if (ibv_destroy_qp(ctx->qp[i])) {
|
|
fprintf(stderr, "Couldn't destroy QP - %s\n", strerror(errno));
|
|
test_result = 1;
|
|
}
|
|
}
|
|
|
|
if (user_param->use_rss) {
|
|
if (user_param->connection_type == UD && (user_param->tst == LAT || user_param->machine == CLIENT || user_param->duplex)) {
|
|
if (ibv_destroy_ah(ctx->ah[0])) {
|
|
fprintf(stderr, "Failed to destroy AH\n");
|
|
test_result = 1;
|
|
}
|
|
}
|
|
|
|
if (ibv_destroy_qp(ctx->qp[0])) {
|
|
fprintf(stderr, "Couldn't destroy QP - %s\n", strerror(errno));
|
|
test_result = 1;
|
|
}
|
|
}
|
|
if (user_param->srq_exists) {
|
|
if (ibv_destroy_srq(ctx->srq)) {
|
|
fprintf(stderr, "Couldn't destroy SRQ\n");
|
|
test_result = 1;
|
|
}
|
|
}
|
|
|
|
/* #ifdef HAVE_XRCD */
|
|
/* if (user_param->use_xrc) { */
|
|
|
|
/* if (ibv_close_xrcd(ctx->xrc_domain)) { */
|
|
/* fprintf(stderr, "Couldn't destroy XRC domain\n"); */
|
|
/* test_result = 1; */
|
|
/* } */
|
|
|
|
/* if (ctx->fd >= 0 && close(ctx->fd)) { */
|
|
/* fprintf(stderr, "Couldn't close the file for the XRC Domain\n"); */
|
|
/* test_result = 1; */
|
|
/* } */
|
|
|
|
/* } */
|
|
/* #endif */
|
|
|
|
if (ibv_destroy_cq(ctx->send_cq)) {
|
|
fprintf(stderr, "Failed to destroy CQ - %s\n", strerror(errno));
|
|
test_result = 1;
|
|
}
|
|
|
|
if (user_param->verb == SEND && (user_param->tst == LAT || user_param->machine == SERVER || user_param->duplex || (ctx->channel)) ) {
|
|
if (!(user_param->connection_type == DC && user_param->machine == SERVER)) {
|
|
if (ibv_destroy_cq(ctx->recv_cq)) {
|
|
fprintf(stderr, "Failed to destroy CQ - %s\n", strerror(errno));
|
|
test_result = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < dereg_counter; i++) {
|
|
if (ibv_dereg_mr(ctx->mr[i])) {
|
|
fprintf(stderr, "Failed to deregister MR #%d\n", i+1);
|
|
test_result = 1;
|
|
}
|
|
}
|
|
|
|
if (ibv_dealloc_pd(ctx->pd)) {
|
|
fprintf(stderr, "Failed to deallocate PD - %s\n", strerror(errno));
|
|
test_result = 1;
|
|
}
|
|
|
|
if (ctx->channel) {
|
|
if (ibv_destroy_comp_channel(ctx->channel)) {
|
|
fprintf(stderr, "Failed to close event channel\n");
|
|
test_result = 1;
|
|
}
|
|
}
|
|
|
|
if (ibv_close_device(ctx->context)) {
|
|
fprintf(stderr, "Failed to close device context\n");
|
|
test_result = 1;
|
|
}
|
|
|
|
/* #ifdef HAVE_CUDA */
|
|
/* if (user_param->use_cuda) { */
|
|
/* pp_free_gpu(ctx); */
|
|
/* } */
|
|
/* else */
|
|
/* #endif */
|
|
|
|
/* if (user_param->mmap_file != NULL) { */
|
|
/* pp_free_mmap(ctx); */
|
|
/* } else if (ctx->is_contig_supported == FAILURE) { */
|
|
|
|
if (ctx->is_contig_supported == FAILURE) {
|
|
for (i = 0; i < dereg_counter; i++) {
|
|
/* if (user_param->use_hugepages) { */
|
|
/* shmdt(ctx->buf[i]); */
|
|
/* } else { */
|
|
/* free(ctx->buf[i]); */
|
|
/* } */
|
|
|
|
free(ctx->buf[i]);
|
|
}
|
|
}
|
|
free(ctx->qp);
|
|
|
|
if ((user_param->tst == BW || user_param->tst == LAT_BY_BW ) &&
|
|
(user_param->machine == CLIENT || user_param->duplex)) {
|
|
free(user_param->tposted);
|
|
free(user_param->tcompleted);
|
|
free(ctx->my_addr);
|
|
free(ctx->rem_addr);
|
|
free(ctx->scnt);
|
|
free(ctx->ccnt);
|
|
}
|
|
|
|
else if ((user_param->tst == BW || user_param->tst == LAT_BY_BW ) &&
|
|
user_param->verb == SEND && user_param->machine == SERVER) {
|
|
free(user_param->tposted);
|
|
free(user_param->tcompleted);
|
|
free(ctx->my_addr);
|
|
}
|
|
|
|
if (user_param->machine == CLIENT || user_param->tst == LAT || user_param->duplex) {
|
|
free(ctx->sge_list);
|
|
free(ctx->wr);
|
|
}
|
|
|
|
if (user_param->verb == SEND && (user_param->tst == LAT || user_param->machine == SERVER || user_param->duplex)) {
|
|
free(ctx->rx_buffer_addr);
|
|
free(ctx->recv_sge_list);
|
|
free(ctx->rwr);
|
|
}
|
|
|
|
return test_result;
|
|
}
|
|
|
|
void ctx_set_send_wqes(struct pingpong_context *ctx,
|
|
struct perftest_parameters *user_param,
|
|
struct pingpong_dest *rem_dest)
|
|
{
|
|
ctx_set_send_reg_wqes(ctx, user_param, rem_dest);
|
|
|
|
}
|
|
|
|
int perform_warm_up(struct pingpong_context *ctx, struct perftest_parameters *user_param)
|
|
{
|
|
int ne, index, warmindex, warmupsession;
|
|
int err = 0;
|
|
|
|
struct ibv_send_wr *bad_wr = NULL;
|
|
struct ibv_wc wc;
|
|
struct ibv_wc *wc_for_cleaning = NULL;
|
|
|
|
int num_of_qps = user_param->num_of_qps;
|
|
int return_value = 0;
|
|
|
|
if(user_param->duplex && (user_param->use_xrc || user_param->connection_type == DC))
|
|
num_of_qps /= 2;
|
|
|
|
warmupsession = (user_param->post_list == 1) ? user_param->tx_depth : user_param->post_list;
|
|
ALLOCATE(wc_for_cleaning, struct ibv_wc, user_param->tx_depth);
|
|
|
|
/* Clean up the pipe */
|
|
ne = ibv_poll_cq(ctx->send_cq, user_param->tx_depth, wc_for_cleaning);
|
|
|
|
for (index=0 ; index < num_of_qps ; index++) {
|
|
for (warmindex = 0 ;warmindex < warmupsession ;warmindex += user_param->post_list) {
|
|
err = ibv_post_send(ctx->qp[index], &ctx->wr[index*user_param->post_list], &bad_wr);
|
|
if (err) {
|
|
fprintf(stderr, "Couldn't post send during warm up: qp %d scnt=%d \n", index, warmindex);
|
|
return_value = FAILURE;
|
|
goto cleaning;
|
|
}
|
|
}
|
|
|
|
do {
|
|
ne = ibv_poll_cq(ctx->send_cq, 1, &wc);
|
|
if (ne > 0) {
|
|
if (wc.status != IBV_WC_SUCCESS) {
|
|
return_value = FAILURE;
|
|
goto cleaning;
|
|
}
|
|
warmindex -= user_param->post_list;
|
|
} else if (ne < 0) {
|
|
return_value = FAILURE;
|
|
goto cleaning;
|
|
}
|
|
} while (warmindex);
|
|
}
|
|
|
|
cleaning:
|
|
free(wc_for_cleaning);
|
|
|
|
return return_value;
|
|
}
|
|
|
|
int run_iter_bw(struct pingpong_context *ctx, struct perftest_parameters *user_param)
|
|
{
|
|
uint64_t totscnt = 0;
|
|
uint64_t totccnt = 0;
|
|
int i = 0;
|
|
int index, ne;
|
|
uint64_t tot_iters;
|
|
int err = 0;
|
|
|
|
struct ibv_send_wr *bad_wr = NULL;
|
|
struct ibv_wc *wc = NULL;
|
|
int num_of_qps = user_param->num_of_qps;
|
|
|
|
/* Rate Limiter*/
|
|
int rate_limit_pps = 0;
|
|
double gap_time = 0; /* in usec */
|
|
cycles_t gap_cycles = 0; /* in cycles */
|
|
cycles_t gap_deadline = 0;
|
|
unsigned int number_of_bursts = 0;
|
|
int burst_iter = 0;
|
|
int is_sending_burst = 0;
|
|
int cpu_mhz = 0;
|
|
int return_value = 0;
|
|
int wc_id;
|
|
int send_flows_index = 0;
|
|
uintptr_t primary_send_addr = ctx->sge_list[0].addr;
|
|
int address_offset = 0;
|
|
int flows_burst_iter = 0;
|
|
|
|
ALLOCATE(wc , struct ibv_wc , CTX_POLL_BATCH);
|
|
|
|
if (user_param->duplex && (user_param->use_xrc || user_param->connection_type == DC))
|
|
num_of_qps /= 2;
|
|
|
|
/* Will be 0, in case of Duration (look at force_dependencies or in the exp above). */
|
|
tot_iters = (uint64_t) user_param->iters*num_of_qps;
|
|
|
|
if (user_param->test_type == ITERATIONS && user_param->noPeak == ON)
|
|
user_param->tposted[0] = get_cycles();
|
|
|
|
/* If using rate limiter, calculate gap time between bursts */
|
|
if (user_param->rate_limit_type == SW_RATE_LIMIT ) {
|
|
/* Calculate rate limit in pps */
|
|
switch (user_param->rate_units) {
|
|
|
|
case MEGA_BYTE_PS:
|
|
rate_limit_pps = ((double)(user_param->rate_limit) / user_param->size) * 1048576;
|
|
break;
|
|
case GIGA_BIT_PS:
|
|
rate_limit_pps = ((double)(user_param->rate_limit) / (user_param->size * 8)) * 1000000000;
|
|
break;
|
|
case PACKET_PS:
|
|
rate_limit_pps = user_param->rate_limit;
|
|
break;
|
|
|
|
default:
|
|
fprintf(stderr, " Failed: Unknown rate limit units\n");
|
|
return_value = FAILURE;
|
|
goto cleaning;
|
|
}
|
|
|
|
cpu_mhz = get_cpu_mhz(user_param->cpu_freq_f);
|
|
if (cpu_mhz <= 0) {
|
|
fprintf(stderr, "Failed: couldn't acquire cpu frequency for rate limiter.\n");
|
|
}
|
|
number_of_bursts = rate_limit_pps / user_param->burst_size;
|
|
gap_time = 1000000 * (1.0 / number_of_bursts);
|
|
gap_cycles = cpu_mhz * gap_time;
|
|
}
|
|
|
|
/* main loop for posting */
|
|
while (totscnt < tot_iters || totccnt < tot_iters ||
|
|
(user_param->test_type == DURATION && user_param->state != END_STATE) ) {
|
|
|
|
/* main loop to run over all the qps and post each time n messages */
|
|
for (index =0; index < num_of_qps ; index++) {
|
|
if (user_param->rate_limit_type == SW_RATE_LIMIT && is_sending_burst == 0) {
|
|
if (gap_deadline > get_cycles()) {
|
|
/* Go right to cq polling until gap time is over. */
|
|
continue;
|
|
}
|
|
|
|
gap_deadline = get_cycles() + gap_cycles;
|
|
is_sending_burst = 1;
|
|
burst_iter = 0;
|
|
}
|
|
|
|
while ( (ctx->scnt[index] < user_param->iters || user_param->test_type == DURATION) &&
|
|
((ctx->scnt[index] - ctx->ccnt[index]) < (user_param->tx_depth)) &&
|
|
!((user_param->rate_limit_type == SW_RATE_LIMIT ) && is_sending_burst == 0)) {
|
|
|
|
if (ctx->send_rcredit) {
|
|
uint32_t swindow = ctx->scnt[index] + user_param->post_list - ctx->credit_buf[index];
|
|
if (swindow >= user_param->rx_depth)
|
|
break;
|
|
}
|
|
|
|
if (user_param->post_list == 1 && (ctx->scnt[index] % user_param->cq_mod == 0 && user_param->cq_mod > 1) &&
|
|
!(ctx->scnt[index] == (user_param->iters - 1) && user_param->test_type == ITERATIONS)) {
|
|
ctx->wr[index].send_flags &= ~IBV_SEND_SIGNALED;
|
|
|
|
}
|
|
|
|
if (user_param->noPeak == OFF)
|
|
user_param->tposted[totscnt] = get_cycles();
|
|
|
|
/* if (user_param->test_type == DURATION && user_param->state == END_STATE) */
|
|
/* break; */
|
|
|
|
err = ibv_post_send(ctx->qp[index], &ctx->wr[index*user_param->post_list], &bad_wr);
|
|
if (err) {
|
|
fprintf(stderr, "Couldn't post send: qp %d scnt=%lu \n", index, ctx->scnt[index]);
|
|
return_value = FAILURE;
|
|
goto cleaning;
|
|
}
|
|
|
|
/* if we have more than single flow and the burst iter is the last one */
|
|
if (user_param->flows != DEF_FLOWS) {
|
|
if (++flows_burst_iter == user_param->flows_burst) {
|
|
flows_burst_iter = 0;
|
|
/* inc the send_flows_index and update the address */
|
|
if (++send_flows_index == user_param->flows)
|
|
send_flows_index = 0;
|
|
address_offset = send_flows_index * ctx->flow_buff_size;
|
|
ctx->sge_list[0].addr = primary_send_addr + address_offset;
|
|
}
|
|
}
|
|
|
|
/* in multiple flow scenarios we will go to next cycle buffer address in the main buffer*/
|
|
if (user_param->post_list == 1 && user_param->size <= (ctx->cycle_buffer / 2)) {
|
|
increase_loc_addr(ctx->wr[index].sg_list, user_param->size,
|
|
ctx->scnt[index], ctx->my_addr[index] + address_offset , 0,
|
|
ctx->cache_line_size, ctx->cycle_buffer);
|
|
|
|
if (user_param->verb != SEND) {
|
|
increase_rem_addr(&ctx->wr[index], user_param->size,
|
|
ctx->scnt[index], ctx->rem_addr[index], user_param->verb,
|
|
ctx->cache_line_size, ctx->cycle_buffer);
|
|
}
|
|
}
|
|
|
|
ctx->scnt[index] += user_param->post_list;
|
|
totscnt += user_param->post_list;
|
|
|
|
/* ask for completion on this wr */
|
|
if (user_param->post_list == 1 &&
|
|
(ctx->scnt[index]%user_param->cq_mod == user_param->cq_mod - 1 ||
|
|
(user_param->test_type == ITERATIONS && ctx->scnt[index] == user_param->iters - 1))) {
|
|
ctx->wr[index].send_flags |= IBV_SEND_SIGNALED;
|
|
}
|
|
|
|
/* Check if a full burst was sent. */
|
|
if (user_param->rate_limit_type == SW_RATE_LIMIT) {
|
|
burst_iter += user_param->post_list;
|
|
if (burst_iter >= user_param->burst_size) {
|
|
is_sending_burst = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (totccnt < tot_iters || (user_param->test_type == DURATION && totccnt < totscnt)) {
|
|
if (user_param->use_event) {
|
|
if (ctx_notify_events(ctx->channel)) {
|
|
fprintf(stderr, "Couldn't request CQ notification\n");
|
|
return_value = FAILURE;
|
|
goto cleaning;
|
|
}
|
|
}
|
|
|
|
/* #ifdef HAVE_ACCL_VERBS */
|
|
/* if (user_param->verb_type == ACCL_INTF) */
|
|
/* ne = ctx->send_cq_family->poll_cnt(ctx->send_cq, CTX_POLL_BATCH); */
|
|
/* else */
|
|
/* #endif */
|
|
ne = ibv_poll_cq(ctx->send_cq, CTX_POLL_BATCH, wc);
|
|
|
|
if (ne > 0) {
|
|
for (i = 0; i < ne; i++) {
|
|
wc_id = (user_param->verb_type == ACCL_INTF) ?
|
|
0 : (int)wc[i].wr_id;
|
|
|
|
if (user_param->verb_type != ACCL_INTF) {
|
|
if (wc[i].status != IBV_WC_SUCCESS) {
|
|
NOTIFY_COMP_ERROR_SEND(wc[i], totscnt, totccnt);
|
|
return_value = FAILURE;
|
|
goto cleaning;
|
|
}
|
|
}
|
|
|
|
ctx->ccnt[wc_id] += user_param->cq_mod;
|
|
totccnt += user_param->cq_mod;
|
|
|
|
if (user_param->noPeak == OFF) {
|
|
if (totccnt >= tot_iters - 1)
|
|
user_param->tcompleted[user_param->iters*num_of_qps - 1] = get_cycles();
|
|
else
|
|
user_param->tcompleted[totccnt-1] = get_cycles();
|
|
}
|
|
|
|
if (user_param->test_type==DURATION && user_param->state == SAMPLE_STATE) {
|
|
if (user_param->report_per_port) {
|
|
user_param->iters_per_port[user_param->port_by_qp[wc_id]] += user_param->cq_mod;
|
|
}
|
|
user_param->iters += user_param->cq_mod;
|
|
}
|
|
}
|
|
|
|
} else if (ne < 0) {
|
|
fprintf(stderr, "poll CQ failed %d\n", ne);
|
|
return_value = FAILURE;
|
|
goto cleaning;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (user_param->noPeak == ON && user_param->test_type == ITERATIONS)
|
|
user_param->tcompleted[0] = get_cycles();
|
|
|
|
cleaning:
|
|
|
|
free(wc);
|
|
return return_value;
|
|
}
|
|
|
|
int run_iter_lat(struct pingpong_context *ctx,struct perftest_parameters *user_param)
|
|
{
|
|
uint64_t scnt = 0;
|
|
int ne;
|
|
int err = 0;
|
|
struct ibv_send_wr *bad_wr = NULL;
|
|
struct ibv_wc wc;
|
|
|
|
int cpu_mhz = get_cpu_mhz(user_param->cpu_freq_f);
|
|
int total_gap_cycles = user_param->latency_gap * cpu_mhz;
|
|
cycles_t end_cycle, start_gap=0;
|
|
|
|
ctx->wr[0].sg_list->length = user_param->size;
|
|
ctx->wr[0].send_flags = IBV_SEND_SIGNALED;
|
|
|
|
/* Duration support in latency tests. */
|
|
/* if (user_param->test_type == DURATION) { */
|
|
/* duration_param=user_param; */
|
|
/* duration_param->state = START_STATE; */
|
|
/* signal(SIGALRM, catch_alarm); */
|
|
/* user_param->iters = 0; */
|
|
/* if (user_param->margin > 0) */
|
|
/* alarm(user_param->margin); */
|
|
/* else */
|
|
/* catch_alarm(0); */
|
|
/* } */
|
|
|
|
while (scnt < user_param->iters || (user_param->test_type == DURATION && user_param->state != END_STATE)) {
|
|
if (user_param->latency_gap) {
|
|
start_gap = get_cycles();
|
|
end_cycle = start_gap + total_gap_cycles;
|
|
while (get_cycles() < end_cycle) {
|
|
continue;
|
|
}
|
|
}
|
|
if (user_param->test_type == ITERATIONS)
|
|
user_param->tposted[scnt++] = get_cycles();
|
|
|
|
err = ibv_post_send(ctx->qp[0],&ctx->wr[0],&bad_wr);
|
|
if (err) {
|
|
fprintf(stderr,"Couldn't post send: scnt=%lu\n",scnt);
|
|
return 1;
|
|
}
|
|
|
|
if (user_param->test_type == DURATION && user_param->state == END_STATE)
|
|
break;
|
|
|
|
if (user_param->use_event) {
|
|
if (ctx_notify_events(ctx->channel)) {
|
|
fprintf(stderr, "Couldn't request CQ notification\n");
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
do {
|
|
ne = ibv_poll_cq(ctx->send_cq, 1, &wc);
|
|
if(ne > 0) {
|
|
if (wc.status != IBV_WC_SUCCESS) {
|
|
NOTIFY_COMP_ERROR_SEND(wc,scnt,scnt);
|
|
return 1;
|
|
}
|
|
if (user_param->test_type==DURATION && user_param->state == SAMPLE_STATE)
|
|
user_param->iters++;
|
|
|
|
} else if (ne < 0) {
|
|
fprintf(stderr, "poll CQ failed %d\n", ne);
|
|
return FAILURE;
|
|
}
|
|
|
|
} while (!user_param->use_event && ne == 0);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int run_iter_lat_write(struct pingpong_context *ctx,struct perftest_parameters *user_param)
|
|
{
|
|
uint64_t scnt = 0;
|
|
uint64_t ccnt = 0;
|
|
uint64_t rcnt = 0;
|
|
int ne;
|
|
int err = 0;
|
|
int poll_buf_offset = 0;
|
|
volatile char *poll_buf = NULL;
|
|
volatile char *post_buf = NULL;
|
|
struct ibv_send_wr *bad_wr = NULL;
|
|
struct ibv_wc wc;
|
|
|
|
int cpu_mhz = get_cpu_mhz(user_param->cpu_freq_f);
|
|
int total_gap_cycles = user_param->latency_gap * cpu_mhz;
|
|
cycles_t end_cycle, start_gap=0;
|
|
|
|
ctx->wr[0].sg_list->length = user_param->size;
|
|
ctx->wr[0].send_flags = IBV_SEND_SIGNALED;
|
|
if (user_param->size <= user_param->inline_size)
|
|
ctx->wr[0].send_flags |= IBV_SEND_INLINE;
|
|
|
|
if((user_param->use_xrc || user_param->connection_type == DC))
|
|
poll_buf_offset = 1;
|
|
|
|
post_buf = (char*)ctx->buf[0] + user_param->size - 1;
|
|
poll_buf = (char*)ctx->buf[0] + (user_param->num_of_qps + poll_buf_offset)*BUFF_SIZE(ctx->size, ctx->cycle_buffer) + user_param->size - 1;
|
|
|
|
/* Duration support in latency tests. */
|
|
/* if (user_param->test_type == DURATION) { */
|
|
/* duration_param=user_param; */
|
|
/* duration_param->state = START_STATE; */
|
|
/* signal(SIGALRM, catch_alarm); */
|
|
/* user_param->iters = 0; */
|
|
/* if (user_param->margin > 0) */
|
|
/* alarm(user_param->margin); */
|
|
/* else */
|
|
/* catch_alarm(0); */
|
|
/* } */
|
|
|
|
/* Done with setup. Start the test. */
|
|
while (scnt < user_param->iters || ccnt < user_param->iters || rcnt < user_param->iters
|
|
|| ((user_param->test_type == DURATION && user_param->state != END_STATE))) {
|
|
|
|
if ((rcnt < user_param->iters || user_param->test_type == DURATION) &&
|
|
!(scnt < 1 && user_param->machine == SERVER)) {
|
|
rcnt++;
|
|
while (*poll_buf != (char)rcnt && user_param->state != END_STATE);
|
|
}
|
|
|
|
if (scnt < user_param->iters || user_param->test_type == DURATION) {
|
|
if (user_param->latency_gap) {
|
|
start_gap = get_cycles();
|
|
end_cycle = start_gap + total_gap_cycles;
|
|
while (get_cycles() < end_cycle) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (user_param->test_type == ITERATIONS)
|
|
user_param->tposted[scnt] = get_cycles();
|
|
|
|
*post_buf = (char)++scnt;
|
|
err = ibv_post_send(ctx->qp[0],&ctx->wr[0],&bad_wr);
|
|
if (err) {
|
|
fprintf(stderr,"Couldn't post send: scnt=%lu\n",scnt);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
if (user_param->test_type == DURATION && user_param->state == END_STATE)
|
|
break;
|
|
|
|
if (ccnt < user_param->iters || user_param->test_type == DURATION) {
|
|
do {
|
|
ne = ibv_poll_cq(ctx->send_cq, 1, &wc);
|
|
} while (ne == 0);
|
|
|
|
if(ne > 0) {
|
|
if (wc.status != IBV_WC_SUCCESS) {
|
|
NOTIFY_COMP_ERROR_SEND(wc,scnt,ccnt);
|
|
return 1;
|
|
}
|
|
ccnt++;
|
|
if (user_param->test_type==DURATION && user_param->state == SAMPLE_STATE) {
|
|
user_param->iters++;
|
|
}
|
|
|
|
} else if (ne < 0) {
|
|
fprintf(stderr, "poll CQ failed %d\n", ne);
|
|
return FAILURE;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|