#include #include #include #include #include #include #include #include #include #include #include /* #include */ #include #include #include #include "perftest_resources.h" #include "perftest_parameters.h" /* #include "config.h" */ #ifdef HAVE_VERBS_EXP static enum ibv_exp_wr_opcode exp_opcode_verbs_array[] = {IBV_EXP_WR_SEND,IBV_EXP_WR_RDMA_WRITE,IBV_EXP_WR_RDMA_READ}; static enum ibv_exp_wr_opcode exp_opcode_atomic_array[] = {IBV_EXP_WR_ATOMIC_CMP_AND_SWP,IBV_EXP_WR_ATOMIC_FETCH_AND_ADD}; #endif static enum ibv_wr_opcode opcode_verbs_array[] = {IBV_WR_SEND,IBV_WR_RDMA_WRITE,IBV_WR_RDMA_READ}; static enum ibv_wr_opcode opcode_atomic_array[] = {IBV_WR_ATOMIC_CMP_AND_SWP,IBV_WR_ATOMIC_FETCH_AND_ADD}; #define CPU_UTILITY "/proc/stat" struct perftest_parameters* duration_param; struct check_alive_data check_alive_data; /****************************************************************************** * Beginning ******************************************************************************/ #ifdef HAVE_CUDA #define ASSERT(x) \ do { \ if (!(x)) { \ fprintf(stdout, "Assertion \"%s\" failed at %s:%d\n", #x, __FILE__, __LINE__); \ } \ } while (0) #define CUCHECK(stmt) \ do { \ CUresult result = (stmt); \ ASSERT(CUDA_SUCCESS == result); \ } while (0) /*----------------------------------------------------------------------------*/ static CUdevice cuDevice; static CUcontext cuContext; static int pp_init_gpu(struct pingpong_context *ctx, size_t _size) { const size_t gpu_page_size = 64*1024; size_t size = (_size + gpu_page_size - 1) & ~(gpu_page_size - 1); printf("initializing CUDA\n"); CUresult error = cuInit(0); if (error != CUDA_SUCCESS) { printf("cuInit(0) returned %d\n", error); exit(1); } int deviceCount = 0; error = cuDeviceGetCount(&deviceCount); if (error != CUDA_SUCCESS) { printf("cuDeviceGetCount() returned %d\n", error); exit(1); } /* This function call returns 0 if there are no CUDA capable devices. */ if (deviceCount == 0) { printf("There are no available device(s) that support CUDA\n"); return 1; } else if (deviceCount == 1) printf("There is 1 device supporting CUDA\n"); else printf("There are %d devices supporting CUDA, picking first...\n", deviceCount); int devID = 0; /* pick up device with zero ordinal (default, or devID) */ CUCHECK(cuDeviceGet(&cuDevice, devID)); char name[128]; CUCHECK(cuDeviceGetName(name, sizeof(name), devID)); printf("[pid = %d, dev = %d] device name = [%s]\n", getpid(), cuDevice, name); printf("creating CUDA Ctx\n"); /* Create context */ error = cuCtxCreate(&cuContext, CU_CTX_MAP_HOST, cuDevice); if (error != CUDA_SUCCESS) { printf("cuCtxCreate() error=%d\n", error); return 1; } printf("making it the current CUDA Ctx\n"); error = cuCtxSetCurrent(cuContext); if (error != CUDA_SUCCESS) { printf("cuCtxSetCurrent() error=%d\n", error); return 1; } printf("cuMemAlloc() of a %zd bytes GPU buffer\n", size); CUdeviceptr d_A; error = cuMemAlloc(&d_A, size); if (error != CUDA_SUCCESS) { printf("cuMemAlloc error=%d\n", error); return 1; } printf("allocated GPU buffer address at %016llx pointer=%p\n", d_A, (void *) d_A); ctx->buf[0] = (void*)d_A; return 0; } static int pp_free_gpu(struct pingpong_context *ctx) { int ret = 0; CUdeviceptr d_A = (CUdeviceptr) ctx->buf[0]; printf("deallocating RX GPU buffer\n"); cuMemFree(d_A); d_A = 0; printf("destroying current CUDA Ctx\n"); CUCHECK(cuCtxDestroy(cuContext)); return ret; } #endif /* static int pp_init_mmap(struct pingpong_context *ctx, size_t size, */ /* const char *fname, unsigned long offset) */ /* { */ /* int fd = open(fname, O_RDWR); */ /* if (fd < 0) { */ /* printf("Unable to open '%s': %s\n", fname, strerror(errno)); */ /* return 1; */ /* } */ /* ctx->buf[0] = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, fd, offset); */ /* close(fd); */ /* if (ctx->buf[0] == MAP_FAILED) { */ /* printf("Unable to mmap '%s': %s\n", fname, strerror(errno)); */ /* return 1; */ /* } */ /* printf("allocated mmap buffer of size %zd at %p\n", size, ctx->buf[0]); */ /* return 0; */ /* } */ /* static int pp_free_mmap(struct pingpong_context *ctx) */ /* { */ /* munmap(ctx->buf[0], ctx->buff_size); */ /* return 0; */ /* } */ #ifdef HAVE_VERBS_EXP static void get_verbs_pointers(struct pingpong_context *ctx) { ctx->exp_post_send_func_pointer = ibv_exp_get_provider_func(ctx->context,IBV_EXP_POST_SEND_FUNC); if (!ctx->exp_post_send_func_pointer) { fprintf(stderr, "Couldn't get ibv_exp_post_send pointer\n"); ctx->exp_post_send_func_pointer = &ibv_exp_post_send; } ctx->post_send_func_pointer = ibv_exp_get_provider_func(ctx->context,IBV_POST_SEND_FUNC); if (!ctx->post_send_func_pointer) { fprintf(stderr, "Couldn't get ibv_post_send pointer\n"); ctx->post_send_func_pointer = &ibv_post_send; } ctx->poll_cq_func_pointer = ibv_exp_get_provider_func(ctx->context,IBV_POLL_CQ_FUNC); if (!ctx->poll_cq_func_pointer) { fprintf(stderr, "Couldn't get ibv_poll_cq pointer\n"); } } #endif static int next_word_string(char* input, char* output, int from_index) { int i = from_index; int j = 0; while (input[i] != ' ') { output[j] = input[i]; j++; i++; } output[j]=0; return i+1; } static int get_n_word_string(char *input, char *output,int from_index, int iters) { for (;iters > 0; iters--) { from_index = next_word_string(input,output,from_index); } return from_index; } static void compress_spaces(char *str, char *dst) { for (; *str; ++str) { *dst++ = *str; if (isspace(*str)) { do ++str; while (isspace(*str)); --str; } } *dst = 0; } static void get_cpu_stats(struct perftest_parameters *duration_param,int stat_index) { char* file_name = CPU_UTILITY; FILE *fp; char line[100]; char tmp[100]; int index=0; fp = fopen(file_name, "r"); if (fp != NULL) { if (fgets(line,100,fp) != NULL) { compress_spaces(line,line); index=get_n_word_string(line,tmp,index,2); /* skip first word */ duration_param->cpu_util_data.ustat[stat_index-1] = atoll(tmp); index=get_n_word_string(line,tmp,index,3); /* skip 2 stats */ duration_param->cpu_util_data.idle[stat_index-1] = atoll(tmp); fclose(fp); } } } #ifdef HAVE_VERBS_EXP static int check_for_contig_pages_support(struct ibv_context *context) { int answer; struct ibv_exp_device_attr attr; memset(&attr,0,sizeof attr); if (ibv_exp_query_device(context,&attr)) { fprintf(stderr, "Couldn't get device attributes\n"); return FAILURE; } answer = ( attr.exp_device_cap_flags &= IBV_EXP_DEVICE_MR_ALLOCATE) ? SUCCESS : FAILURE; return answer; } #endif #ifdef HAVE_XRCD /****************************************************************************** * ******************************************************************************/ static int ctx_xrcd_create(struct pingpong_context *ctx,struct perftest_parameters *user_param) { char *tmp_file_name; struct ibv_xrcd_init_attr xrcd_init_attr; memset(&xrcd_init_attr , 0 , sizeof xrcd_init_attr); tmp_file_name = (user_param->machine == SERVER) ? SERVER_FD : CLIENT_FD; ctx->fd = open(tmp_file_name, O_RDONLY | O_CREAT, S_IRUSR | S_IRGRP); if (ctx->fd < 0) { fprintf(stderr,"Error opening file %s errno: %s", tmp_file_name,strerror(errno)); return FAILURE; } xrcd_init_attr.comp_mask = IBV_XRCD_INIT_ATTR_FD | IBV_XRCD_INIT_ATTR_OFLAGS; xrcd_init_attr.fd = ctx->fd; xrcd_init_attr.oflags = O_CREAT ; ctx->xrc_domain = ibv_open_xrcd(ctx->context,&xrcd_init_attr); if (ctx->xrc_domain == NULL) { fprintf(stderr,"Error opening XRC domain\n"); return FAILURE; } return 0; } /****************************************************************************** * ******************************************************************************/ static int ctx_xrc_srq_create(struct pingpong_context *ctx, struct perftest_parameters *user_param) { struct ibv_srq_init_attr_ex srq_init_attr; memset(&srq_init_attr, 0, sizeof(srq_init_attr)); srq_init_attr.attr.max_wr = user_param->rx_depth; srq_init_attr.attr.max_sge = 1; srq_init_attr.comp_mask = IBV_SRQ_INIT_ATTR_TYPE | IBV_SRQ_INIT_ATTR_XRCD | IBV_SRQ_INIT_ATTR_CQ | IBV_SRQ_INIT_ATTR_PD; srq_init_attr.srq_type = IBV_SRQT_XRC; srq_init_attr.xrcd = ctx->xrc_domain; if(user_param->verb == SEND) srq_init_attr.cq = ctx->recv_cq; else srq_init_attr.cq = ctx->send_cq; srq_init_attr.pd = ctx->pd; ctx->srq = ibv_create_srq_ex(ctx->context, &srq_init_attr); if (ctx->srq == NULL) { fprintf(stderr, "Couldn't open XRC SRQ\n"); return FAILURE; } return 0; } /****************************************************************************** * ******************************************************************************/ static struct ibv_qp *ctx_xrc_qp_create(struct pingpong_context *ctx, struct perftest_parameters *user_param, int qp_index) { struct ibv_qp* qp = NULL; int num_of_qps = user_param->num_of_qps / 2; #ifdef HAVE_VERBS_EXP struct ibv_exp_qp_init_attr qp_init_attr; #else struct ibv_qp_init_attr_ex qp_init_attr; #endif memset(&qp_init_attr, 0, sizeof(qp_init_attr)); if ( (!(user_param->duplex || user_param->tst == LAT) && (user_param->machine == SERVER) ) || ((user_param->duplex || user_param->tst == LAT) && (qp_index >= num_of_qps))) { qp_init_attr.qp_type = IBV_QPT_XRC_RECV; qp_init_attr.comp_mask = IBV_QP_INIT_ATTR_XRCD; qp_init_attr.xrcd = ctx->xrc_domain; qp_init_attr.cap.max_recv_wr = user_param->rx_depth; qp_init_attr.cap.max_recv_sge = 1; qp_init_attr.cap.max_inline_data = user_param->inline_size; } else { qp_init_attr.qp_type = IBV_QPT_XRC_SEND; qp_init_attr.send_cq = ctx->send_cq; qp_init_attr.cap.max_send_wr = user_param->tx_depth; qp_init_attr.cap.max_send_sge = 1; qp_init_attr.comp_mask = IBV_QP_INIT_ATTR_PD; qp_init_attr.pd = ctx->pd; qp_init_attr.cap.max_inline_data = user_param->inline_size; } #ifdef HAVE_ACCL_VERBS if (user_param->use_res_domain) { qp_init_attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_RES_DOMAIN; qp_init_attr.res_domain = ctx->res_domain; } #endif #ifdef HAVE_VERBS_EXP qp = ibv_exp_create_qp(ctx->context, &qp_init_attr); #else qp = ibv_create_qp_ex(ctx->context, &qp_init_attr); #endif return qp; } #endif #ifdef HAVE_DC /****************************************************************************** * ******************************************************************************/ static int ctx_dc_tgt_create(struct pingpong_context *ctx,struct perftest_parameters *user_param,int dct_index) { struct ibv_exp_device_attr dattr; int err; int num_of_qps = user_param->num_of_qps; int num_of_qps_per_port = user_param->num_of_qps / 2; int port_num; memset(&dattr,0,sizeof(struct ibv_exp_device_attr)); /* in dc with bidirectional, * there are send qps and recv qps. the actual number of send/recv qps * is num_of_qps / 2. */ if (user_param->duplex || user_param->tst == LAT) { num_of_qps /= 2; num_of_qps_per_port = num_of_qps / 2; } /* first half of qps are for ib_port and second half are for ib_port2 * in dc with bidirectional, the first half of qps are dc_ini qps and * the second half are dc_tgts . the first half of the send/recv qps * are for ib_port1 and the second half are for ib_port2 */ if (user_param->dualport == ON && (dct_index % num_of_qps >= num_of_qps_per_port)) port_num = user_param->ib_port2; else port_num = user_param->ib_port; dattr.comp_mask = IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS | IBV_EXP_DEVICE_DC_RD_REQ | IBV_EXP_DEVICE_DC_RD_RES; err = ibv_exp_query_device(ctx->context, &dattr); if (err) { printf("couldn't query device extended attributes\n"); return -1; } else { if (!(dattr.comp_mask & IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS)) { printf("no extended capability flgas\n"); return -1; } if (!(dattr.exp_device_cap_flags & IBV_EXP_DEVICE_DC_TRANSPORT)) { printf("DC transport not enabled\n"); return -1; } if (!(dattr.comp_mask & IBV_EXP_DEVICE_DC_RD_REQ)) { printf("no report on max requestor rdma/atomic resources\n"); return -1; } if (!(dattr.comp_mask & IBV_EXP_DEVICE_DC_RD_RES)) { printf("no report on max responder rdma/atomic resources\n"); return -1; } } struct ibv_exp_dct_init_attr dctattr = { .pd = ctx->pd, .cq = (user_param->verb == SEND && (user_param->duplex || user_param->tst == LAT)) ? ctx->recv_cq : ctx->send_cq, .srq = ctx->srq, .dc_key = user_param->dct_key, .port = port_num, .access_flags = IBV_ACCESS_REMOTE_WRITE, .min_rnr_timer = 2, .tclass = 0, .flow_label = 0, .mtu = user_param->curr_mtu, .pkey_index = user_param->pkey_index, .gid_index = user_param->gid_index, .hop_limit = 1, .inline_size = user_param->inline_size, }; ctx->dct[dct_index] = ibv_exp_create_dct(ctx->context, &dctattr); if (!ctx->dct[dct_index]) { printf("create dct failed\n"); return FAILURE; } struct ibv_exp_dct_attr dcqattr; memset(&dcqattr,0,sizeof(struct ibv_exp_dct_attr)); err = ibv_exp_query_dct(ctx->dct[dct_index], &dcqattr); if (err) { printf("query dct failed\n"); return FAILURE; } else if (dcqattr.dc_key != user_param->dct_key) { printf("queried dckry (0x%llx) is different then provided at create (0x%llx)\n", (unsigned long long)dcqattr.dc_key, (unsigned long long)user_param->dct_key); return FAILURE; } else if (dcqattr.state != IBV_EXP_DCT_STATE_ACTIVE) { printf("state is not active %d\n", dcqattr.state); return FAILURE; } return 0; } #endif #ifdef HAVE_RSS_EXP static struct ibv_qp *ctx_rss_eth_qp_create(struct pingpong_context *ctx,struct perftest_parameters *user_param,int qp_index) { struct ibv_exp_qp_init_attr attr; struct ibv_qp* qp = NULL; memset(&attr, 0, sizeof(struct ibv_exp_qp_init_attr)); attr.send_cq = ctx->send_cq; attr.recv_cq = ctx->recv_cq; attr.cap.max_send_wr = user_param->tx_depth; attr.cap.max_send_sge = MAX_SEND_SGE; attr.cap.max_inline_data = user_param->inline_size; attr.cap.max_recv_wr = user_param->rx_depth; attr.cap.max_recv_sge = MAX_RECV_SGE; attr.qp_type = IBV_QPT_RAW_PACKET; attr.comp_mask = IBV_EXP_QP_INIT_ATTR_PD | IBV_EXP_QP_INIT_ATTR_QPG; attr.pd = ctx->pd; if (qp_index == 0) { /* rss parent */ #ifdef HAVE_VERBS_EXP attr.qpg.qpg_type = IBV_EXP_QPG_PARENT; #else attr.qpg.qpg_type = IBV_QPG_PARENT; #endif attr.qpg.qpg_parent = NULL; attr.qpg.parent_attrib.tss_child_count = 0; attr.qpg.parent_attrib.rss_child_count = user_param->num_of_qps - 1; } else { /* rss childs */ #ifdef HAVE_VERBS_EXP attr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX; #else attr.qpg.qpg_type = IBV_QPG_CHILD_RX; #endif attr.qpg.qpg_parent = ctx->qp[0]; } qp = ibv_exp_create_qp(ctx->context,&attr); return qp; } #endif /****************************************************************************** * ******************************************************************************/ /* int check_add_port(char **service,int port, */ /* const char *servername, */ /* struct addrinfo *hints, */ /* struct addrinfo **res) */ /* { */ /* int number; */ /* if (asprintf(service,"%d", port) < 0) { */ /* return FAILURE; */ /* } */ /* number = getaddrinfo(servername,*service,hints,res); */ /* if (number < 0) { */ /* fprintf(stderr, "%s for %s:%d\n", gai_strerror(number), servername, port); */ /* return FAILURE; */ /* } */ /* return SUCCESS; */ /* } */ /****************************************************************************** + * + ******************************************************************************/ struct ibv_device* ctx_find_dev(const char *ib_devname) { int num_of_device; struct ibv_device **dev_list; struct ibv_device *ib_dev = NULL; dev_list = ibv_get_device_list(&num_of_device); if (num_of_device <= 0) { fprintf(stderr," Did not detect devices \n"); fprintf(stderr," If device exists, check if driver is up\n"); return NULL; } if (!ib_devname) { ib_dev = dev_list[0]; if (!ib_dev) { fprintf(stderr, "No IB devices found\n"); exit (1); } } else { for (; (ib_dev = *dev_list); ++dev_list) if (!strcmp(ibv_get_device_name(ib_dev), ib_devname)) break; if (!ib_dev) fprintf(stderr, "IB device %s not found\n", ib_devname); } return ib_dev; } /****************************************************************************** * ******************************************************************************/ void alloc_ctx(struct pingpong_context *ctx,struct perftest_parameters *user_param) { uint64_t tarr_size; int num_of_qps_factor; ctx->cycle_buffer = user_param->cycle_buffer; ctx->cache_line_size = user_param->cache_line_size; ALLOCATE(user_param->port_by_qp, uint64_t, user_param->num_of_qps); tarr_size = (user_param->noPeak) ? 1 : user_param->iters*user_param->num_of_qps; ALLOCATE(user_param->tposted, cycles_t, tarr_size); memset(user_param->tposted, 0, sizeof(cycles_t)*tarr_size); if ((user_param->tst == LAT || user_param->tst == FS_RATE) && user_param->test_type == DURATION) ALLOCATE(user_param->tcompleted, cycles_t, 1); ALLOCATE(ctx->qp, struct ibv_qp*, user_param->num_of_qps); ALLOCATE(ctx->mr, struct ibv_mr*, user_param->num_of_qps); ALLOCATE(ctx->buf, void* , user_param->num_of_qps); #ifdef HAVE_ACCL_VERBS ALLOCATE(ctx->qp_burst_family, struct ibv_exp_qp_burst_family*, user_param->num_of_qps); #endif #ifdef HAVE_DC if (user_param->connection_type == DC) { #ifdef HAVE_VERBS_EXP ALLOCATE(ctx->dct, struct ibv_exp_dct*, user_param->num_of_qps); #else ALLOCATE(ctx->dct, struct ibv_dct*, user_param->num_of_qps); #endif } #endif if ((user_param->tst == BW || user_param->tst == LAT_BY_BW) && (user_param->machine == CLIENT || user_param->duplex)) { ALLOCATE(user_param->tcompleted,cycles_t,tarr_size); memset(user_param->tcompleted, 0, sizeof(cycles_t)*tarr_size); ALLOCATE(ctx->my_addr,uint64_t,user_param->num_of_qps); ALLOCATE(ctx->rem_addr,uint64_t,user_param->num_of_qps); ALLOCATE(ctx->scnt,uint64_t,user_param->num_of_qps); ALLOCATE(ctx->ccnt,uint64_t,user_param->num_of_qps); memset(ctx->scnt, 0, user_param->num_of_qps * sizeof (uint64_t)); memset(ctx->ccnt, 0, user_param->num_of_qps * sizeof (uint64_t)); } else if ((user_param->tst == BW || user_param->tst == LAT_BY_BW) && user_param->verb == SEND && user_param->machine == SERVER) { ALLOCATE(ctx->my_addr, uint64_t, user_param->num_of_qps); ALLOCATE(user_param->tcompleted, cycles_t, 1); } else if (user_param->tst == FS_RATE && user_param->test_type == ITERATIONS) { ALLOCATE(user_param->tcompleted, cycles_t, tarr_size); memset(user_param->tcompleted, 0, sizeof(cycles_t) * tarr_size); } if (user_param->machine == CLIENT || user_param->tst == LAT || user_param->duplex) { ALLOCATE(ctx->sge_list, struct ibv_sge,user_param->num_of_qps * user_param->post_list); #ifdef HAVE_VERBS_EXP ALLOCATE(ctx->exp_wr, struct ibv_exp_send_wr, user_param->num_of_qps * user_param->post_list); #endif ALLOCATE(ctx->wr, struct ibv_send_wr, user_param->num_of_qps * user_param->post_list); if ((user_param->verb == SEND && user_param->connection_type == UD ) || user_param->connection_type == DC) { ALLOCATE(ctx->ah, struct ibv_ah*, user_param->num_of_qps); } } if (user_param->verb == SEND && (user_param->tst == LAT || user_param->machine == SERVER || user_param->duplex)) { ALLOCATE(ctx->recv_sge_list,struct ibv_sge,user_param->num_of_qps); ALLOCATE(ctx->rwr,struct ibv_recv_wr,user_param->num_of_qps); ALLOCATE(ctx->rx_buffer_addr,uint64_t,user_param->num_of_qps); } if (user_param->mac_fwd == ON ) ctx->cycle_buffer = user_param->size * user_param->rx_depth; ctx->size = user_param->size; num_of_qps_factor = (user_param->mr_per_qp) ? 1 : user_param->num_of_qps; /* holds the size of maximum between msg size and cycle buffer, * aligned to cache line, * it is multiply by 2 for send and receive * with reference to number of flows and number of QPs */ ctx->buff_size = INC(BUFF_SIZE(ctx->size, ctx->cycle_buffer), ctx->cache_line_size) * 2 * num_of_qps_factor * user_param->flows; ctx->send_qp_buff_size = ctx->buff_size / num_of_qps_factor / 2; ctx->flow_buff_size = ctx->send_qp_buff_size / user_param->flows; user_param->buff_size = ctx->buff_size; if (user_param->connection_type == UD) ctx->buff_size += ctx->cache_line_size; } /****************************************************************************** * ******************************************************************************/ int destroy_ctx(struct pingpong_context *ctx, struct perftest_parameters *user_param) { int i, first, dereg_counter; int test_result = 0; int num_of_qps = user_param->num_of_qps; if (user_param->wait_destroy) { printf(" Waiting %u seconds before releasing resources...\n", user_param->wait_destroy); sleep(user_param->wait_destroy); } dereg_counter = (user_param->mr_per_qp) ? user_param->num_of_qps : 1; /* in dc with bidirectional, * there are send qps and recv qps. the actual number of send/recv qps * is num_of_qps / 2. */ if (user_param->duplex || user_param->tst == LAT) { num_of_qps /= 2; } /* RSS parent should be last */ if (user_param->use_rss) first = 1; else first = 0; for (i = first; i < user_param->num_of_qps; i++) { if (( (user_param->connection_type == DC && !((!(user_param->duplex || user_param->tst == LAT) && (user_param->machine == SERVER) ) || ((user_param->duplex || user_param->tst == LAT) && (i >= num_of_qps)))) || user_param->connection_type == UD) && (user_param->tst == LAT || user_param->machine == CLIENT || user_param->duplex)) { if (ibv_destroy_ah(ctx->ah[i])) { fprintf(stderr, "Failed to destroy AH\n"); test_result = 1; } } #ifdef HAVE_DC if (user_param->connection_type == DC && ((!(user_param->duplex || user_param->tst == LAT) && (user_param->machine == SERVER)) || ((user_param->duplex || user_param->tst == LAT) && (i >= num_of_qps)))) { if (ibv_exp_destroy_dct(ctx->dct[i])) { fprintf(stderr, "Failed to destroy dct\n"); test_result = 1; } if ( i == user_param->num_of_qps -1 ) return test_result; } else #endif if (ibv_destroy_qp(ctx->qp[i])) { fprintf(stderr, "Couldn't destroy QP - %s\n", strerror(errno)); test_result = 1; } } if (user_param->use_rss) { if (user_param->connection_type == UD && (user_param->tst == LAT || user_param->machine == CLIENT || user_param->duplex)) { if (ibv_destroy_ah(ctx->ah[0])) { fprintf(stderr, "Failed to destroy AH\n"); test_result = 1; } } if (ibv_destroy_qp(ctx->qp[0])) { fprintf(stderr, "Couldn't destroy QP - %s\n", strerror(errno)); test_result = 1; } } if (user_param->srq_exists) { if (ibv_destroy_srq(ctx->srq)) { fprintf(stderr, "Couldn't destroy SRQ\n"); test_result = 1; } } #ifdef HAVE_XRCD if (user_param->use_xrc) { if (ibv_close_xrcd(ctx->xrc_domain)) { fprintf(stderr, "Couldn't destroy XRC domain\n"); test_result = 1; } if (ctx->fd >= 0 && close(ctx->fd)) { fprintf(stderr, "Couldn't close the file for the XRC Domain\n"); test_result = 1; } } #endif if (ibv_destroy_cq(ctx->send_cq)) { fprintf(stderr, "Failed to destroy CQ - %s\n", strerror(errno)); test_result = 1; } if (user_param->verb == SEND && (user_param->tst == LAT || user_param->machine == SERVER || user_param->duplex || (ctx->channel)) ) { if (!(user_param->connection_type == DC && user_param->machine == SERVER)) { if (ibv_destroy_cq(ctx->recv_cq)) { fprintf(stderr, "Failed to destroy CQ - %s\n", strerror(errno)); test_result = 1; } } } for (i = 0; i < dereg_counter; i++) { if (ibv_dereg_mr(ctx->mr[i])) { fprintf(stderr, "Failed to deregister MR #%d\n", i+1); test_result = 1; } } if (ibv_dealloc_pd(ctx->pd)) { fprintf(stderr, "Failed to deallocate PD - %s\n", strerror(errno)); test_result = 1; } if (ctx->channel) { if (ibv_destroy_comp_channel(ctx->channel)) { fprintf(stderr, "Failed to close event channel\n"); test_result = 1; } } if (ibv_close_device(ctx->context)) { fprintf(stderr, "Failed to close device context\n"); test_result = 1; } #ifdef HAVE_CUDA if (user_param->use_cuda) { pp_free_gpu(ctx); } else #endif /* if (user_param->mmap_file != NULL) { */ /* pp_free_mmap(ctx); */ /* } else if (ctx->is_contig_supported == FAILURE) { */ if (ctx->is_contig_supported == FAILURE) { for (i = 0; i < dereg_counter; i++) { if (user_param->use_hugepages) { shmdt(ctx->buf[i]); } else { free(ctx->buf[i]); } } } free(ctx->qp); if ((user_param->tst == BW || user_param->tst == LAT_BY_BW ) && (user_param->machine == CLIENT || user_param->duplex)) { free(user_param->tposted); free(user_param->tcompleted); free(ctx->my_addr); free(ctx->rem_addr); free(ctx->scnt); free(ctx->ccnt); } else if ((user_param->tst == BW || user_param->tst == LAT_BY_BW ) && user_param->verb == SEND && user_param->machine == SERVER) { free(user_param->tposted); free(user_param->tcompleted); free(ctx->my_addr); } if (user_param->machine == CLIENT || user_param->tst == LAT || user_param->duplex) { free(ctx->sge_list); free(ctx->wr); #ifdef HAVE_VERBS_EXP free(ctx->exp_wr); #endif } if (user_param->verb == SEND && (user_param->tst == LAT || user_param->machine == SERVER || user_param->duplex)) { free(ctx->rx_buffer_addr); free(ctx->recv_sge_list); free(ctx->rwr); } return test_result; } /****************************************************************************** * ******************************************************************************/ #ifdef HAVE_VERBS_EXP static int check_inline_recv_support(struct pingpong_context *ctx, struct perftest_parameters *user_param) { struct ibv_exp_device_attr dattr; int ret = 0; memset(&dattr, 0, sizeof(dattr)); dattr.comp_mask |= IBV_EXP_DEVICE_ATTR_INLINE_RECV_SZ; ret = ibv_exp_query_device(ctx->context, &dattr); if (ret) { printf(" Couldn't query device for inline-receive capabilities.\n"); } else if (!(dattr.comp_mask & IBV_EXP_DEVICE_ATTR_INLINE_RECV_SZ)) { printf(" Inline-receive not supported by driver.\n"); ret = 1; } else if (dattr.inline_recv_sz < user_param->inline_recv_size) { printf(" Max inline-receive(%d) < Requested inline-receive(%d).\n", dattr.inline_recv_sz, user_param->inline_recv_size); } return ret; } #endif /****************************************************************************** * ******************************************************************************/ #if defined HAVE_EX_ODP || defined HAVE_EXP_ODP static int check_odp_support(struct pingpong_context *ctx) { #ifdef HAVE_EX_ODP struct ibv_device_attr_ex dattr; int odp_support_send = IBV_ODP_SUPPORT_SEND; int odp_support_recv = IBV_ODP_SUPPORT_RECV; int ret = ibv_query_device_ex(ctx->context, NULL, &dattr); #elif defined HAVE_EXP_ODP struct ibv_exp_device_attr dattr; int ret = ibv_exp_query_device(ctx->context, &dattr); int odp_support_send = IBV_EXP_ODP_SUPPORT_SEND; int odp_support_recv = IBV_EXP_ODP_SUPPORT_RECV; #endif if (ret) { fprintf(stderr, " Couldn't query device for on-demand paging capabilities.\n"); return 0; } else if (!(dattr.odp_caps.per_transport_caps.rc_odp_caps & odp_support_send)) { fprintf(stderr, " Send is not supported for RC transport.\n"); return 0; } else if (!(dattr.odp_caps.per_transport_caps.rc_odp_caps & odp_support_recv)) { fprintf(stderr, " Receive is not supported for RC transport.\n"); return 0; } return 1; } #endif /****************************************************************************** * ******************************************************************************/ int create_reg_cqs(struct pingpong_context *ctx, struct perftest_parameters *user_param, int tx_buffer_depth, int need_recv_cq) { ctx->send_cq = ibv_create_cq(ctx->context,tx_buffer_depth * user_param->num_of_qps, NULL, ctx->channel, user_param->eq_num); if (!ctx->send_cq) { fprintf(stderr, "Couldn't create CQ\n"); return FAILURE; } if (need_recv_cq) { ctx->recv_cq = ibv_create_cq(ctx->context,user_param->rx_depth * user_param->num_of_qps, NULL, ctx->channel, user_param->eq_num); if (!ctx->recv_cq) { fprintf(stderr, "Couldn't create a receiver CQ\n"); return FAILURE; } } return SUCCESS; } /****************************************************************************** * ******************************************************************************/ #ifdef HAVE_VERBS_EXP int create_exp_cqs(struct pingpong_context *ctx, struct perftest_parameters *user_param, int tx_buffer_depth, int need_recv_cq) { struct ibv_exp_cq_init_attr attr; #ifdef HAVE_ACCL_VERBS enum ibv_exp_query_intf_status intf_status; struct ibv_exp_query_intf_params intf_params; #endif memset(&attr, 0, sizeof(attr)); #ifdef HAVE_ACCL_VERBS if (user_param->use_res_domain) { attr.res_domain = ctx->res_domain; } if (user_param->verb_type == ACCL_INTF) { memset(&intf_params, 0, sizeof(intf_params)); intf_params.intf_scope = IBV_EXP_INTF_GLOBAL; intf_params.intf = IBV_EXP_INTF_CQ; } #endif ctx->send_cq = ibv_exp_create_cq(ctx->context, tx_buffer_depth * user_param->num_of_qps, NULL, ctx->channel, 0, &attr); if (!ctx->send_cq) { fprintf(stderr, "Couldn't create exp CQ\n"); return FAILURE; } if (need_recv_cq) { ctx->recv_cq = ibv_create_cq(ctx->context,user_param->rx_depth * user_param->num_of_qps,NULL,ctx->channel,0); if (!ctx->recv_cq) { fprintf(stderr, "Couldn't create a receiver CQ\n"); return FAILURE; } } #ifdef HAVE_ACCL_VERBS if (user_param->verb_type == ACCL_INTF) { /* Check CQ family */ intf_params.obj = ctx->send_cq; ctx->send_cq_family = ibv_exp_query_intf(ctx->context, &intf_params, &intf_status); intf_params.obj = ctx->recv_cq; ctx->recv_cq_family = ibv_exp_query_intf(ctx->context, &intf_params, &intf_status); if (!ctx->send_cq_family || (!ctx->recv_cq_family && need_recv_cq)) { fprintf(stderr, "Couldn't create CQ family.\n"); return FAILURE; } } #endif return SUCCESS; } #endif /****************************************************************************** * ******************************************************************************/ int create_cqs(struct pingpong_context *ctx, struct perftest_parameters *user_param) { int ret; int dct_only = 0, need_recv_cq = 0; int tx_buffer_depth = user_param->tx_depth; if (user_param->connection_type == DC) { dct_only = (user_param->machine == SERVER && !(user_param->duplex || user_param->tst == LAT)); } if (dct_only) tx_buffer_depth = user_param->rx_depth; if ((user_param->connection_type == DC && !dct_only) || (user_param->verb == SEND)) need_recv_cq = 1; #ifdef HAVE_VERBS_EXP if (user_param->is_exp_cq) ret = create_exp_cqs(ctx, user_param, tx_buffer_depth, need_recv_cq); else #endif ret = create_reg_cqs(ctx, user_param, tx_buffer_depth, need_recv_cq); return ret; } /****************************************************************************** * ******************************************************************************/ #ifdef HAVE_ACCL_VERBS struct ibv_exp_res_domain* create_res_domain(struct pingpong_context *ctx, struct perftest_parameters *user_param) { struct ibv_exp_res_domain_init_attr res_domain_attr; struct ibv_exp_device_attr dattr; uint32_t req_comp_mask; /* Query device */ req_comp_mask = IBV_EXP_DEVICE_ATTR_CALC_CAP | IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS | IBV_EXP_DEVICE_ATTR_MAX_CTX_RES_DOMAIN; dattr.comp_mask = req_comp_mask; if (ibv_exp_query_device(ctx->context, &dattr)) { fprintf(stderr, "Couldn't query device capabilities.\n"); return NULL; } else if (dattr.comp_mask != req_comp_mask) { fprintf(stderr, "This device does not support resource domain / accelerated verbs.\n"); return NULL; } /* Allocate resource domain */ res_domain_attr.comp_mask = IBV_EXP_RES_DOMAIN_THREAD_MODEL | IBV_EXP_RES_DOMAIN_MSG_MODEL; res_domain_attr.thread_model = IBV_EXP_THREAD_SINGLE; if (user_param->tst == BW) res_domain_attr.msg_model = IBV_EXP_MSG_HIGH_BW; else if (user_param->tst == LAT) res_domain_attr.msg_model = IBV_EXP_MSG_LOW_LATENCY; else res_domain_attr.msg_model = IBV_EXP_MSG_DEFAULT; return ibv_exp_create_res_domain(ctx->context, &res_domain_attr); } #endif /****************************************************************************** * ******************************************************************************/ int create_single_mr(struct pingpong_context *ctx, struct perftest_parameters *user_param, int qp_index) { int i; int flags = IBV_ACCESS_LOCAL_WRITE; #ifdef HAVE_VERBS_EXP struct ibv_exp_reg_mr_in reg_mr_exp_in; uint64_t exp_flags = IBV_EXP_ACCESS_LOCAL_WRITE; #endif /* ODP */ #if defined HAVE_EX_ODP || defined HAVE_EXP_ODP if (user_param->use_odp) { if ( !check_odp_support(ctx) ) return FAILURE; /* ODP does not support contig pages */ ctx->is_contig_supported = FAILURE; #ifdef HAVE_EX_ODP flags |= IBV_ACCESS_ON_DEMAND; #elif defined HAVE_EXP_ODP exp_flags |= IBV_EXP_ACCESS_ON_DEMAND; #endif } #endif #ifdef HAVE_CUDA if (user_param->use_cuda) { ctx->is_contig_supported = FAILURE; if(pp_init_gpu(ctx, ctx->buff_size)) { fprintf(stderr, "Couldn't allocate work buf.\n"); return FAILURE; } } #endif /* if (user_param->mmap_file != NULL) { */ /* ctx->buf = memalign(user_param->cycle_buffer, ctx->buff_size); */ /* if (pp_init_mmap(ctx, ctx->buff_size, user_param->mmap_file, */ /* user_param->mmap_offset)) */ /* { */ /* fprintf(stderr, "Couldn't allocate work buf.\n"); */ /* return FAILURE; */ /* } */ /* } else { */ /* Allocating buffer for data, in case driver not support contig pages. */ if (ctx->is_contig_supported == FAILURE) { if (user_param->use_hugepages) { if (alloc_hugepage_region(ctx) != SUCCESS){ fprintf(stderr, "Failed to allocate hugepage region.\n"); return FAILURE; } memset(ctx->buf[qp_index], 0, ctx->buff_size); } else if (ctx->is_contig_supported == FAILURE) { ctx->buf[qp_index] = memalign(user_param->cycle_buffer, ctx->buff_size); } if (!ctx->buf[qp_index]) { fprintf(stderr, "Couldn't allocate work buf.\n"); return FAILURE; } memset(ctx->buf[qp_index], 0, ctx->buff_size); } else { ctx->buf[qp_index] = NULL; #ifdef HAVE_VERBS_EXP exp_flags |= IBV_EXP_ACCESS_ALLOCATE_MR; #else flags |= (1 << 5); #endif } /* } */ if (user_param->verb == WRITE) { flags |= IBV_ACCESS_REMOTE_WRITE; #ifdef HAVE_VERBS_EXP exp_flags |= IBV_EXP_ACCESS_REMOTE_WRITE; #endif } else if (user_param->verb == READ) { flags |= IBV_ACCESS_REMOTE_READ; #ifdef HAVE_VERBS_EXP exp_flags |= IBV_EXP_ACCESS_REMOTE_READ; #endif if (user_param->transport_type == IBV_TRANSPORT_IWARP) flags |= IBV_ACCESS_REMOTE_WRITE; #ifdef HAVE_VERBS_EXP exp_flags |= IBV_EXP_ACCESS_REMOTE_WRITE; #endif } else if (user_param->verb == ATOMIC) { flags |= IBV_ACCESS_REMOTE_ATOMIC; #ifdef HAVE_VERBS_EXP exp_flags |= IBV_EXP_ACCESS_REMOTE_ATOMIC; #endif } /* Allocating Memory region and assigning our buffer to it. */ #ifdef HAVE_VERBS_EXP if (ctx->is_contig_supported == SUCCESS || user_param->use_odp) { reg_mr_exp_in.pd = ctx->pd; reg_mr_exp_in.addr = ctx->buf[qp_index]; reg_mr_exp_in.length = ctx->buff_size; reg_mr_exp_in.exp_access = exp_flags; reg_mr_exp_in.comp_mask = 0; ctx->mr[qp_index] = ibv_exp_reg_mr(®_mr_exp_in); } else ctx->mr[qp_index] = ibv_reg_mr(ctx->pd, ctx->buf[qp_index], ctx->buff_size, flags); #else ctx->mr[qp_index] = ibv_reg_mr(ctx->pd, ctx->buf[qp_index], ctx->buff_size, flags); #endif if (!ctx->mr[qp_index]) { fprintf(stderr, "Couldn't allocate MR\n"); return FAILURE; } if (ctx->is_contig_supported == SUCCESS) ctx->buf[qp_index] = ctx->mr[qp_index]->addr; /* Initialize buffer with random numbers except in WRITE_LAT test that it 0's */ if (!user_param->use_cuda) { srand(time(NULL)); if (user_param->verb == WRITE && user_param->tst == LAT) { memset(ctx->buf[qp_index], 0, ctx->buff_size); } else { for (i = 0; i < ctx->buff_size; i++) { ((char*)ctx->buf[qp_index])[i] = (char)rand(); } } } return SUCCESS; } /****************************************************************************** * ******************************************************************************/ int create_mr(struct pingpong_context *ctx, struct perftest_parameters *user_param) { int i; /* create first MR */ if (create_single_mr(ctx, user_param, 0)) { fprintf(stderr, "failed to create mr\n"); return 1; } /* create the rest if needed, or copy the first one */ for (i = 1; i < user_param->num_of_qps; i++) { if (user_param->mr_per_qp) { if (create_single_mr(ctx, user_param, i)) { fprintf(stderr, "failed to create mr\n"); return 1; } } else { ALLOCATE(ctx->mr[i], struct ibv_mr, 1); memset(ctx->mr[i], 0, sizeof(struct ibv_mr)); ctx->mr[i] = ctx->mr[0]; ctx->buf[i] = ctx->buf[0] + (i*BUFF_SIZE(ctx->size, ctx->cycle_buffer)); } } return 0; } /****************************************************************************** * ******************************************************************************/ #define HUGEPAGE_ALIGN (2*1024*1024) #define SHMAT_ADDR (void *)(0x0UL) #define SHMAT_FLAGS (0) #define SHM_HUGETLB (2048) // ! int alloc_hugepage_region (struct pingpong_context *ctx) { int buf_size; int alignment = (((ctx->cycle_buffer + HUGEPAGE_ALIGN -1) / HUGEPAGE_ALIGN) * HUGEPAGE_ALIGN); buf_size = (((ctx->buff_size + alignment -1 ) / alignment ) * alignment); /* create hugepage shared region */ ctx->huge_shmid = shmget(IPC_PRIVATE, buf_size, SHM_HUGETLB | IPC_CREAT /* | IPC_R | IPC_W */); // ! if (ctx->huge_shmid < 0) { fprintf(stderr, "Failed to allocate hugepages. Please configure hugepages\n"); return FAILURE; } /* attach shared memory */ ctx->buf = (void *) shmat(ctx->huge_shmid, SHMAT_ADDR, SHMAT_FLAGS); if (ctx->buf == (void *) -1) { fprintf(stderr, "Failed to attach shared memory region\n"); return FAILURE; } /* Mark shmem for removal */ if (shmctl(ctx->huge_shmid, IPC_RMID, 0) != 0) { fprintf(stderr, "Failed to mark shm for removal\n"); return FAILURE; } return SUCCESS; } int verify_params_with_device_context(struct ibv_context *context, struct perftest_parameters *user_param) { if(user_param->use_event) { if(user_param->eq_num > context->num_comp_vectors) { fprintf(stderr, " Completion vector specified is invalid\n"); fprintf(stderr, " Max completion vector = %d\n", context->num_comp_vectors - 1); return FAILURE; } } return SUCCESS; } #if defined HAVE_OOO_ATTR || defined HAVE_EXP_OOO_ATTR static int verify_ooo_settings(struct pingpong_context *ctx, struct perftest_parameters *user_param) { #ifdef HAVE_OOO_ATTR struct ibv_device_attr_ex attr = { }; if (ibv_query_device_ex(ctx->context, NULL, &attr)) #elif HAVE_EXP_OOO_ATTR struct ibv_exp_device_attr attr = { }; attr.comp_mask = IBV_EXP_DEVICE_ATTR_RESERVED - 1; if (ibv_exp_query_device(ctx->context, &attr)) #endif return FAILURE; if (user_param->connection_type == RC) { if (attr.ooo_caps.rc_caps == 0) { fprintf(stderr, " OOO unsupported by HCA on RC QP\n"); return FAILURE; } else { return SUCCESS; } } else if (user_param->connection_type == XRC) { if (attr.ooo_caps.xrc_caps == 0) { fprintf(stderr, " OOO unsupported by HCA on XRC QP\n"); return FAILURE; } else { return SUCCESS; } } else if (user_param->connection_type == UD) { if (attr.ooo_caps.ud_caps == 0) { fprintf(stderr, " OOO unsupported by HCA on UD QP\n"); return FAILURE; } else { return SUCCESS; } #if HAVE_OOO_ATTR } else if (user_param->connection_type == UC) { if (attr.ooo_caps.uc_caps == 0) { fprintf(stderr, " OOO unsupported by HCA on UC QP\n"); return FAILURE; } else { return SUCCESS; } #elif HAVE_EXP_OOO_ATTR } else if (user_param->connection_type == DC) { if (attr.ooo_caps.dc_caps == 0) { fprintf(stderr, " OOO unsupported by HCA on DC QP\n"); return FAILURE; } else { return SUCCESS; } #endif } else { return FAILURE; } } #endif int ctx_init(struct pingpong_context *ctx, struct perftest_parameters *user_param) { int i; int num_of_qps = user_param->num_of_qps / 2; ctx->is_contig_supported = FAILURE; #ifdef HAVE_ACCL_VERBS enum ibv_exp_query_intf_status intf_status; struct ibv_exp_query_intf_params intf_params; #endif #ifdef HAVE_VERBS_EXP struct ibv_exp_device_attr dattr; memset(&dattr, 0, sizeof(dattr)); get_verbs_pointers(ctx); #endif #if defined HAVE_OOO_ATTR || defined HAVE_EXP_OOO_ATTR if (user_param->use_ooo) { if (verify_ooo_settings(ctx, user_param) != SUCCESS) { fprintf(stderr, "Incompatible OOO settings\n"); return FAILURE; } } #endif #ifdef HAVE_VERBS_EXP ctx->is_contig_supported = check_for_contig_pages_support(ctx->context); #endif if (user_param->use_hugepages) ctx->is_contig_supported = FAILURE; /* Allocating an event channel if requested. */ if (user_param->use_event) { ctx->channel = ibv_create_comp_channel(ctx->context); if (!ctx->channel) { fprintf(stderr, "Couldn't create completion channel\n"); return FAILURE; } } /* Allocating the Protection domain. */ ctx->pd = ibv_alloc_pd(ctx->context); if (!ctx->pd) { fprintf(stderr, "Couldn't allocate PD\n"); return FAILURE; } #ifdef HAVE_ACCL_VERBS if (user_param->use_res_domain) { ctx->res_domain = create_res_domain(ctx, user_param); if (!ctx->res_domain) { fprintf(stderr, "Couldn't create resource domain\n"); return FAILURE; } } #endif if (create_mr(ctx, user_param)) { fprintf(stderr, "Failed to create MR\n"); return FAILURE; } if (create_cqs(ctx, user_param)) { fprintf(stderr, "Failed to create CQs\n"); return FAILURE; } #ifdef HAVE_XRCD if (user_param->use_xrc) { if (ctx_xrcd_create(ctx,user_param)) { fprintf(stderr, "Couldn't create XRC resources\n"); return FAILURE; } if (ctx_xrc_srq_create(ctx,user_param)) { fprintf(stderr, "Couldn't create SRQ XRC resources\n"); return FAILURE; } } #endif if (user_param->use_srq && !user_param->use_xrc && (user_param->tst == LAT || user_param->machine == SERVER || user_param->duplex == ON)) { struct ibv_srq_init_attr attr = { .attr = { /* when using sreq, rx_depth sets the max_wr */ .max_wr = user_param->rx_depth, .max_sge = 1 } }; ctx->srq = ibv_create_srq(ctx->pd, &attr); if (!ctx->srq) { fprintf(stderr, "Couldn't create SRQ\n"); return FAILURE; } } #ifdef HAVE_RSS_EXP if (user_param->use_rss) { struct ibv_exp_device_attr attr; attr.comp_mask = IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS | IBV_EXP_DEVICE_ATTR_RSS_TBL_SZ; if (ibv_exp_query_device(ctx->context, &attr)) { fprintf(stderr, "Experimental ibv_exp_query_device.\n"); exit(1); } if (!((attr.exp_device_cap_flags & IBV_EXP_DEVICE_QPG) && (attr.exp_device_cap_flags & IBV_EXP_DEVICE_UD_RSS) && (attr.comp_mask & IBV_EXP_DEVICE_ATTR_RSS_TBL_SZ) && (attr.max_rss_tbl_sz > 0))) { fprintf(stderr, "RSS not supported .\n"); exit(1); } /* num of qps includes the parent */ if (user_param->num_of_qps > attr.max_rss_tbl_sz + 1) { fprintf(stderr, "RSS limit is %d .\n", attr.max_rss_tbl_sz); exit(1); } } #endif for (i=0; i < user_param->num_of_qps; i++) { if (create_qp_main(ctx, user_param, i, num_of_qps)) { fprintf(stderr, "Failed to create QP.\n"); return FAILURE; } modify_qp_to_init(ctx, user_param, i, num_of_qps); #ifdef HAVE_ACCL_VERBS if (user_param->verb_type == ACCL_INTF) { memset(&intf_params, 0, sizeof(intf_params)); intf_params.intf_scope = IBV_EXP_INTF_GLOBAL; intf_params.intf = IBV_EXP_INTF_QP_BURST; intf_params.obj = ctx->qp[i]; ctx->qp_burst_family[i] = ibv_exp_query_intf(ctx->context, &intf_params, &intf_status); if (!ctx->qp_burst_family[i]) { fprintf(stderr, "Couldn't get QP burst family.\n"); return FAILURE; } } #endif } return SUCCESS; } int modify_qp_to_init(struct pingpong_context *ctx, struct perftest_parameters *user_param, int qp_index, int num_of_qps) { uint64_t init_flag = 0; #ifdef HAVE_RSS_EXP if (qp_index == 0 && user_param->use_rss) { init_flag = IBV_EXP_QP_GROUP_RSS; } else #endif init_flag = 0; if(user_param->connection_type == DC) { if ( !((!(user_param->duplex || user_param->tst == LAT) && (user_param->machine == SERVER) ) || ((user_param->duplex || user_param->tst == LAT) && (qp_index >= num_of_qps)))) { #ifdef HAVE_DC if (ctx_modify_dc_qp_to_init(ctx->qp[qp_index],user_param)) { fprintf(stderr," Unable to create DC QP.\n"); return FAILURE; } #endif } } else { if (ctx_modify_qp_to_init(ctx->qp[qp_index],user_param,init_flag)) { fprintf(stderr, "Failed to modify QP to INIT\n"); return FAILURE; } } return SUCCESS; } /****************************************************************************** * ******************************************************************************/ int create_reg_qp_main(struct pingpong_context *ctx, struct perftest_parameters *user_param, int i, int num_of_qps) { if (user_param->use_xrc) { #ifdef HAVE_XRCD ctx->qp[i] = ctx_xrc_qp_create(ctx, user_param, i); #endif } else { ctx->qp[i] = ctx_qp_create(ctx, user_param); } if (ctx->qp[i] == NULL) { fprintf(stderr, "Unable to create QP.\n"); return FAILURE; } return SUCCESS; } int create_exp_qp_main(struct pingpong_context *ctx, struct perftest_parameters *user_param, int i, int num_of_qps) { int is_dc_tgt_query = 0; is_dc_tgt_query |= !(user_param->duplex || user_param->tst == LAT) && (user_param->machine == SERVER); is_dc_tgt_query |= (user_param->duplex || user_param->tst == LAT) && (i >= num_of_qps); is_dc_tgt_query &= user_param->connection_type == DC; if (is_dc_tgt_query) { #ifdef HAVE_DC if(ctx_dc_tgt_create(ctx,user_param,i)) { return FAILURE; } /* in order to not change anything in the test */ ALLOCATE(ctx->qp[i], struct ibv_qp, 1); ctx->qp[i]->qp_num = ctx->dct[i]->dct_num; #else fprintf(stderr, "DC is not supported.\n"); return FAILURE; #endif } else if (user_param->use_rss && user_param->connection_type == RawEth) { #ifdef HAVE_RSS_EXP ctx->qp[i] = ctx_rss_eth_qp_create(ctx, user_param, i); #endif if (ctx->qp[i] == NULL) { fprintf(stderr," Unable to create RSS QP.\n"); return FAILURE; } } else if (user_param->use_xrc) { #ifdef HAVE_XRCD ctx->qp[i] = ctx_xrc_qp_create(ctx, user_param, i); #endif if (ctx->qp[i] == NULL) { fprintf(stderr," Unable to create XRC QP.\n"); return FAILURE; } } else { #ifdef HAVE_VERBS_EXP ctx->qp[i] = ctx_exp_qp_create(ctx, user_param, i); #endif if (ctx->qp[i] == NULL) { fprintf(stderr, " Unable to create exp QP.\n"); return FAILURE; } } return SUCCESS; } int create_qp_main(struct pingpong_context *ctx, struct perftest_parameters *user_param, int i, int num_of_qps) { int ret; #ifdef HAVE_VERBS_EXP int query; /* flag that indicates that we are going to use exp QP */ query = (user_param->connection_type == DC); query |= (user_param->use_rss && user_param->connection_type == RawEth); query |= user_param->use_xrc; query |= user_param->inline_recv_size != 0; query |= user_param->masked_atomics; query |= user_param->verb_type != NORMAL_INTF; query |= user_param->use_res_domain; query |= user_param->use_exp; if (query == 1) user_param->is_exp_qp = 1; if (user_param->is_exp_qp) ret = create_exp_qp_main(ctx, user_param, i, num_of_qps); else #endif ret = create_reg_qp_main(ctx, user_param, i, num_of_qps); return ret; } #ifdef HAVE_VERBS_EXP #ifdef HAVE_SCATTER_FCS static int check_scatter_fcs_support(struct pingpong_context *ctx, struct perftest_parameters *user_param) { struct ibv_exp_device_attr dev_attr; memset(&dev_attr, 0, sizeof(dev_attr)); dev_attr.comp_mask = IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS; dev_attr.exp_device_cap_flags = IBV_EXP_DEVICE_SCATTER_FCS; if (ibv_exp_query_device(ctx->context, &dev_attr)) { fprintf(stderr, "ibv_exp_query_device failed\n"); return 1; } return MASK_IS_SET(IBV_EXP_DEVICE_SCATTER_FCS, dev_attr.exp_device_cap_flags); } #endif #endif #ifdef HAVE_VERBS_EXP struct ibv_qp* ctx_exp_qp_create(struct pingpong_context *ctx, struct perftest_parameters *user_param, int qp_index) { struct ibv_exp_qp_init_attr attr; struct ibv_qp* qp = NULL; struct ibv_exp_device_attr dev_attr; memset(&attr, 0, sizeof(attr)); memset(&dev_attr, 0, sizeof(dev_attr)); attr.comp_mask = IBV_EXP_QP_INIT_ATTR_PD | IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS; if (user_param->masked_atomics) { #ifdef HAVE_MASKED_ATOMICS dev_attr.comp_mask = IBV_EXP_DEVICE_ATTR_EXT_ATOMIC_ARGS | IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS; if (ibv_exp_query_device(ctx->context, &dev_attr)) { fprintf(stderr, "ibv_exp_query_device failed\n"); return NULL; } attr.max_atomic_arg = pow(2,dev_attr.ext_atom.log_max_atomic_inline); attr.exp_create_flags |= IBV_EXP_QP_CREATE_ATOMIC_BE_REPLY; attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG; #else fprintf(stderr, "Can't create masked atomic QP.\n"); return NULL; #endif } if (user_param->inline_recv_size) { if (check_inline_recv_support(ctx, user_param)) { fprintf(stderr, "Failed to create QP with inline receive.\n"); return NULL; } attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_INL_RECV; attr.max_inl_recv = user_param->inline_recv_size; attr.sq_sig_all = (1 == user_param->cq_mod) ? 1 : 0; //inline receive on requestor must QP's sq_sig_all to be applied } #ifdef HAVE_ACCL_VERBS if (user_param->use_res_domain) { attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_RES_DOMAIN; attr.res_domain = ctx->res_domain; } #endif attr.pd = ctx->pd; attr.send_cq = ctx->send_cq; attr.recv_cq = (user_param->verb == SEND) ? ctx->recv_cq : ctx->send_cq; attr.cap.max_send_wr = user_param->tx_depth; attr.cap.max_send_sge = MAX_SEND_SGE; attr.cap.max_inline_data = user_param->inline_size; if (user_param->use_srq && (user_param->tst == LAT || user_param->machine == SERVER || user_param->duplex == ON)) { attr.srq = ctx->srq; attr.cap.max_recv_wr = 0; attr.cap.max_recv_sge = 0; } else { attr.srq = NULL; attr.cap.max_recv_wr = user_param->rx_depth; attr.cap.max_recv_sge = MAX_RECV_SGE; } switch (user_param->connection_type) { case RC : attr.qp_type = IBV_QPT_RC; break; case UC : attr.qp_type = IBV_QPT_UC; break; case UD : attr.qp_type = IBV_QPT_UD; break; #ifdef HAVE_RAW_ETH case RawEth : attr.qp_type = IBV_QPT_RAW_PACKET; break; #endif case DC : attr.qp_type = IBV_EXP_QPT_DC_INI; break; default: fprintf(stderr, "Unknown connection type \n"); return NULL; } #ifdef HAVE_SCATTER_FCS if (!user_param->disable_fcs && (user_param->connection_type == RawEth)) { if(check_scatter_fcs_support(ctx, user_param)) { attr.exp_create_flags |= IBV_EXP_QP_CREATE_SCATTER_FCS; } } #endif qp = ibv_exp_create_qp(ctx->context, &attr); if (!qp) return NULL; if (user_param->inline_recv_size > attr.max_inl_recv) printf(" Actual inline-receive(%d) < requested inline-receive(%d)\n", attr.max_inl_recv, user_param->inline_recv_size); return qp; } #endif struct ibv_qp* ctx_qp_create(struct pingpong_context *ctx, struct perftest_parameters *user_param) { struct ibv_qp_init_attr attr; struct ibv_qp* qp = NULL; memset(&attr, 0, sizeof(struct ibv_qp_init_attr)); attr.send_cq = ctx->send_cq; attr.recv_cq = (user_param->verb == SEND) ? ctx->recv_cq : ctx->send_cq; attr.cap.max_send_wr = user_param->tx_depth; attr.cap.max_send_sge = MAX_SEND_SGE; attr.cap.max_inline_data = user_param->inline_size; if (user_param->use_srq && (user_param->tst == LAT || user_param->machine == SERVER || user_param->duplex == ON)) { attr.srq = ctx->srq; attr.cap.max_recv_wr = 0; attr.cap.max_recv_sge = 0; } else { attr.srq = NULL; attr.cap.max_recv_wr = user_param->rx_depth; attr.cap.max_recv_sge = MAX_RECV_SGE; } switch (user_param->connection_type) { case RC : attr.qp_type = IBV_QPT_RC; break; case UC : attr.qp_type = IBV_QPT_UC; break; case UD : attr.qp_type = IBV_QPT_UD; break; #ifdef HAVE_RAW_ETH case RawEth : attr.qp_type = IBV_QPT_RAW_PACKET; break; #endif default: fprintf(stderr, "Unknown connection type \n"); return NULL; } qp = ibv_create_qp(ctx->pd,&attr); return qp; } #ifdef HAVE_MASKED_ATOMICS /****************************************************************************** * ******************************************************************************/ struct ibv_qp* ctx_atomic_qp_create(struct pingpong_context *ctx, struct perftest_parameters *user_param) { struct ibv_exp_qp_init_attr attr; struct ibv_qp* qp = NULL; struct ibv_exp_device_attr dev_attr; memset(&dev_attr, 0, sizeof(dev_attr)); dev_attr.comp_mask |= IBV_EXP_DEVICE_ATTR_EXT_ATOMIC_ARGS | IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS; if (ibv_exp_query_device(ctx->context, &dev_attr)) { fprintf(stderr, "ibv_exp_query_device failed\n"); exit(1); } memset(&attr, 0, sizeof(struct ibv_exp_qp_init_attr)); attr.pd = ctx->pd; attr.send_cq = ctx->send_cq; attr.recv_cq = (user_param->verb == SEND) ? ctx->recv_cq : ctx->send_cq; attr.cap.max_send_wr = user_param->tx_depth; attr.cap.max_send_sge = MAX_SEND_SGE; attr.cap.max_inline_data = user_param->inline_size; attr.max_atomic_arg = pow(2,dev_attr.ext_atom.log_max_atomic_inline); attr.exp_create_flags = IBV_EXP_QP_CREATE_ATOMIC_BE_REPLY; attr.comp_mask = IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS | IBV_EXP_QP_INIT_ATTR_PD; attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG; if (user_param->use_srq && (user_param->tst == LAT || user_param->machine == SERVER || user_param->duplex == ON)) { attr.srq = ctx->srq; attr.cap.max_recv_wr = 0; attr.cap.max_recv_sge = 0; } else { attr.srq = NULL; attr.cap.max_recv_wr = user_param->rx_depth; attr.cap.max_recv_sge = MAX_RECV_SGE; } switch (user_param->connection_type) { case RC : attr.qp_type = IBV_QPT_RC; break; case UC : attr.qp_type = IBV_QPT_UC; break; case UD : attr.qp_type = IBV_QPT_UD; break; #ifdef HAVE_XRCD case XRC : attr.qp_type = IBV_QPT_XRC; break; #endif #ifdef HAVE_RAW_ETH case RawEth : attr.qp_type = IBV_QPT_RAW_PACKET; break; #endif default: fprintf(stderr, "Unknown connection type \n"); return NULL; } qp = ibv_exp_create_qp(ctx->context, &attr); return qp; } #endif #ifdef HAVE_DC /****************************************************************************** * ******************************************************************************/ int ctx_modify_dc_qp_to_init(struct ibv_qp *qp,struct perftest_parameters *user_param) { int num_of_qps = user_param->num_of_qps; int num_of_qps_per_port = user_param->num_of_qps / 2; int err; uint64_t flags; #ifdef HAVE_VERBS_EXP struct ibv_exp_qp_attr attr; memset(&attr, 0, sizeof(struct ibv_exp_qp_attr)); flags = IBV_EXP_QP_STATE | IBV_EXP_QP_PKEY_INDEX | IBV_EXP_QP_PORT; #else struct ibv_qp_attr attr; memset(&attr, 0, sizeof(struct ibv_qp_attr)); flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT; #endif static int portindex=0; /* for dual-port support */ attr.qp_state = IBV_QPS_INIT; attr.pkey_index = user_param->pkey_index; attr.qp_access_flags = 0; attr.dct_key = user_param->dct_key; if (user_param->duplex || user_param->tst == LAT) { num_of_qps /= 2; num_of_qps_per_port = num_of_qps / 2; } if (user_param->dualport==ON) { if (portindex % num_of_qps < num_of_qps_per_port) { attr.port_num = user_param->ib_port; user_param->port_by_qp[portindex] = 0; } else { attr.port_num = user_param->ib_port2; user_param->port_by_qp[portindex] = 1; } portindex++; } else { attr.port_num = user_param->ib_port; } #ifdef HAVE_VERBS_EXP flags |= IBV_EXP_QP_DC_KEY; err = ibv_exp_modify_qp(qp,&attr,flags); #else flags |= IBV_QP_DC_KEY; attr.comp_mask = IBV_QP_ATTR_DCT_KEY; err = ibv_modify_qp(qp,&attr,flags); #endif if (err) { fprintf(stderr, "Failed to modify QP to INIT\n"); return 1; } return 0; } #endif /****************************************************************************** * ******************************************************************************/ int ctx_modify_qp_to_init(struct ibv_qp *qp,struct perftest_parameters *user_param, uint64_t init_flag) { int num_of_qps = user_param->num_of_qps; int num_of_qps_per_port = user_param->num_of_qps / 2; struct ibv_qp_attr attr; int flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT; #ifdef HAVE_VERBS_EXP struct ibv_exp_qp_attr exp_attr; uint64_t exp_flags = 0; #endif static int portindex=0; /* for dual-port support */ int ret = 0; memset(&attr, 0, sizeof(struct ibv_qp_attr)); attr.qp_state = IBV_QPS_INIT; attr.pkey_index = user_param->pkey_index; #ifdef HAVE_VERBS_EXP memset(&exp_attr, 0, sizeof(struct ibv_exp_qp_attr)); exp_attr.qp_state = attr.qp_state; exp_attr.pkey_index = attr.pkey_index; #endif if ( user_param->use_xrc && (user_param->duplex || user_param->tst == LAT)) { num_of_qps /= 2; num_of_qps_per_port = num_of_qps / 2; } if (user_param->dualport==ON) { if (portindex % num_of_qps < num_of_qps_per_port) { attr.port_num = user_param->ib_port; user_param->port_by_qp[portindex] = 0; } else { attr.port_num = user_param->ib_port2; user_param->port_by_qp[portindex] = 1; } portindex++; } else { attr.port_num = user_param->ib_port; } #ifdef HAVE_VERBS_EXP exp_attr.port_num = attr.port_num; #endif if (user_param->connection_type == RawEth) { flags = IBV_QP_STATE | IBV_QP_PORT; #ifdef HAVE_VERBS_EXP exp_flags = init_flag | IBV_EXP_QP_STATE | IBV_EXP_QP_PORT; #endif } else if (user_param->connection_type == UD) { attr.qkey = DEFF_QKEY; flags |= IBV_QP_QKEY; } else { switch (user_param->verb) { case ATOMIC: attr.qp_access_flags = IBV_ACCESS_REMOTE_ATOMIC; break; case READ : attr.qp_access_flags = IBV_ACCESS_REMOTE_READ; break; case WRITE : attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE; break; case SEND : attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE; } flags |= IBV_QP_ACCESS_FLAGS; } #ifdef HAVE_MASKED_ATOMICS if (user_param->masked_atomics) { exp_attr.qp_access_flags = IBV_ACCESS_REMOTE_ATOMIC | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ; exp_flags = IBV_EXP_QP_STATE | IBV_EXP_QP_PKEY_INDEX | IBV_EXP_QP_PORT | IBV_EXP_QP_ACCESS_FLAGS; } #endif #ifdef HAVE_VERBS_EXP if ( (init_flag != 0 && user_param->use_rss) || user_param->masked_atomics ) { ret = ibv_exp_modify_qp(qp,&exp_attr,exp_flags); } else #endif ret = ibv_modify_qp(qp,&attr,flags); if (ret) { fprintf(stderr, "Failed to modify QP to INIT, ret=%d\n",ret); return 1; } return 0; } #ifdef HAVE_DC /****************************************************************************** * ******************************************************************************/ static int ctx_modify_dc_qp_to_rtr(struct ibv_qp *qp, struct ibv_exp_qp_attr *attr, struct perftest_parameters *user_param, struct pingpong_dest *dest, struct pingpong_dest *my_dest, int qpindex) { int num_of_qps = user_param->num_of_qps; int num_of_qps_per_port = user_param->num_of_qps / 2; int flags = IBV_EXP_QP_STATE | IBV_EXP_QP_PATH_MTU | IBV_EXP_QP_AV; attr->qp_state = IBV_QPS_RTR; attr->ah_attr.src_path_bits = 0; /* in DC with bidirectional, * there are send qps and recv qps. the actual number of send/recv qps * is num_of_qps / 2. */ if (user_param->duplex || user_param->tst == LAT) { num_of_qps /= 2; num_of_qps_per_port = num_of_qps / 2; } /* first half of qps are for ib_port and second half are for ib_port2 * in DC with bidirectional, the first half of qps are DC_INI qps and * the second half are DC_TGT qps. the first half of the send/recv qps * are for ib_port1 and the second half are for ib_port2 */ if (user_param->dualport == ON && (qpindex % num_of_qps >= num_of_qps_per_port)) attr->ah_attr.port_num = user_param->ib_port2; else attr->ah_attr.port_num = user_param->ib_port; attr->ah_attr.dlid = (user_param->dlid) ? user_param->dlid : dest->lid; if (user_param->gid_index == DEF_GID_INDEX) { attr->ah_attr.is_global = 0; attr->ah_attr.sl = user_param->sl; } else { attr->ah_attr.is_global = 1; attr->ah_attr.grh.dgid = dest->gid; attr->ah_attr.grh.sgid_index = user_param->gid_index; attr->ah_attr.grh.hop_limit = 1; attr->ah_attr.grh.traffic_class = user_param->traffic_class; attr->ah_attr.sl = 0; } attr->max_dest_rd_atomic = 0; attr->min_rnr_timer = 0; attr->dct_key = user_param->dct_key; attr->path_mtu = user_param->curr_mtu; return ibv_exp_modify_qp(qp,attr,flags); } #endif /****************************************************************************** * ******************************************************************************/ static int ctx_modify_qp_to_rtr(struct ibv_qp *qp, struct ibv_qp_attr *attr, struct perftest_parameters *user_param, struct pingpong_dest *dest, struct pingpong_dest *my_dest, int qpindex) { int num_of_qps = user_param->num_of_qps; int num_of_qps_per_port = user_param->num_of_qps / 2; int flags = IBV_QP_STATE; int ooo_flags = 0; attr->qp_state = IBV_QPS_RTR; attr->ah_attr.src_path_bits = 0; /* in xrc with bidirectional, * there are send qps and recv qps. the actual number of send/recv qps * is num_of_qps / 2. */ if ( user_param->use_xrc && (user_param->duplex || user_param->tst == LAT)) { num_of_qps /= 2; num_of_qps_per_port = num_of_qps / 2; } /* first half of qps are for ib_port and second half are for ib_port2 * in xrc with bidirectional, the first half of qps are xrc_send qps and * the second half are xrc_recv qps. the first half of the send/recv qps * are for ib_port1 and the second half are for ib_port2 */ if (user_param->dualport == ON && (qpindex % num_of_qps >= num_of_qps_per_port)) attr->ah_attr.port_num = user_param->ib_port2; else attr->ah_attr.port_num = user_param->ib_port; if (user_param->connection_type != RawEth) { attr->ah_attr.dlid = (user_param->dlid) ? user_param->dlid : dest->lid; attr->ah_attr.sl = user_param->sl; if (((attr->ah_attr.port_num == user_param->ib_port) && (user_param->gid_index == DEF_GID_INDEX)) || ((attr->ah_attr.port_num == user_param->ib_port2) && (user_param->gid_index2 == DEF_GID_INDEX) && user_param->dualport)) { attr->ah_attr.is_global = 0; } else { attr->ah_attr.is_global = 1; attr->ah_attr.grh.dgid = dest->gid; attr->ah_attr.grh.sgid_index = (attr->ah_attr.port_num == user_param->ib_port) ? user_param->gid_index : user_param->gid_index2; attr->ah_attr.grh.hop_limit = 0xFF; attr->ah_attr.grh.traffic_class = user_param->traffic_class; } if (user_param->connection_type != UD) { attr->path_mtu = user_param->curr_mtu; attr->dest_qp_num = dest->qpn; attr->rq_psn = dest->psn; flags |= (IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN); if (user_param->connection_type == RC || user_param->connection_type == XRC) { attr->max_dest_rd_atomic = my_dest->out_reads; attr->min_rnr_timer = 12; flags |= (IBV_QP_MIN_RNR_TIMER | IBV_QP_MAX_DEST_RD_ATOMIC); } } } else if (user_param->raw_qos) { attr->ah_attr.sl = user_param->sl; flags |= IBV_QP_AV; } #ifdef HAVE_OOO_ATTR ooo_flags |= IBV_QP_OOO_RW_DATA_PLACEMENT; #elif HAVE_EXP_OOO_ATTR ooo_flags |= IBV_EXP_QP_OOO_RW_DATA_PLACEMENT; #endif if (user_param->use_ooo) flags |= ooo_flags; return ibv_modify_qp(qp, attr, flags); } #ifdef HAVE_DC /****************************************************************************** * ******************************************************************************/ static int ctx_modify_dc_qp_to_rts(struct ibv_qp *qp, #ifdef HAVE_VERBS_EXP struct ibv_exp_qp_attr *attr, #else struct ibv_qp_attr_ex *attr, #endif struct perftest_parameters *user_param, struct pingpong_dest *dest, struct pingpong_dest *my_dest) { #ifdef HAVE_VERBS_EXP int flags = IBV_EXP_QP_STATE | IBV_EXP_QP_TIMEOUT | IBV_EXP_QP_RETRY_CNT | IBV_EXP_QP_RNR_RETRY | IBV_EXP_QP_MAX_QP_RD_ATOMIC; #else int flags = IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC; #endif attr->qp_state = IBV_QPS_RTS; attr->timeout = user_param->qp_timeout; attr->retry_cnt = 7; attr->rnr_retry = 7; attr->max_rd_atomic = dest->out_reads; #ifdef HAVE_VERBS_EXP return ibv_exp_modify_qp(qp,attr,flags); #else return ibv_modify_qp_ex(qp,attr,flags); #endif } #endif /****************************************************************************** * ******************************************************************************/ static int ctx_modify_qp_to_rts(struct ibv_qp *qp, void *_attr, struct perftest_parameters *user_param, struct pingpong_dest *dest, struct pingpong_dest *my_dest) { #ifdef HAVE_PACKET_PACING_EXP uint64_t flags = IBV_QP_STATE; #else int flags = IBV_QP_STATE; #endif struct ibv_qp_attr *attr = (struct ibv_qp_attr*)_attr; attr->qp_state = IBV_QPS_RTS; if (user_param->connection_type != RawEth) { flags |= IBV_QP_SQ_PSN; attr->sq_psn = my_dest->psn; if (user_param->connection_type == RC || user_param->connection_type == XRC) { attr->timeout = user_param->qp_timeout; attr->retry_cnt = 7; attr->rnr_retry = 7; attr->max_rd_atomic = dest->out_reads; flags |= (IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC); } } #ifdef HAVE_PACKET_PACING_EXP if (user_param->rate_limit_type == PP_RATE_LIMIT) { ((struct ibv_exp_qp_attr*)_attr)->rate_limit = user_param->rate_limit; flags |= IBV_EXP_QP_RATE_LIMIT; return ibv_exp_modify_qp(qp, (struct ibv_exp_qp_attr*)_attr, flags); } #elif defined(HAVE_PACKET_PACING) if (user_param->rate_limit_type == PP_RATE_LIMIT) { attr->rate_limit = user_param->rate_limit; flags |= IBV_QP_RATE_LIMIT; } #endif return ibv_modify_qp(qp, attr, flags); } /****************************************************************************** * ******************************************************************************/ int ctx_connect(struct pingpong_context *ctx, struct pingpong_dest *dest, struct perftest_parameters *user_param, struct pingpong_dest *my_dest) { int i; #ifdef HAVE_DC #ifdef HAVE_VERBS_EXP struct ibv_exp_qp_attr attr_ex; #else struct ibv_qp_attr_ex attr_ex; #endif #elif HAVE_PACKET_PACING_EXP struct ibv_exp_qp_attr attr_ex; #endif struct ibv_qp_attr attr; int xrc_offset = 0; if((user_param->use_xrc || user_param->connection_type == DC) && (user_param->duplex || user_param->tst == LAT)) { xrc_offset = user_param->num_of_qps / 2; } for (i=0; i < user_param->num_of_qps; i++) { if (user_param->connection_type == DC) { if ( ((!(user_param->duplex || user_param->tst == LAT) && (user_param->machine == SERVER) ) || ((user_param->duplex || user_param->tst == LAT) && (i >= user_param->num_of_qps/2)))) { continue; } } #if defined (HAVE_DC) || defined (HAVE_PACKET_PACING_EXP) memset(&attr_ex, 0, sizeof attr_ex); #endif memset(&attr, 0, sizeof attr); if (user_param->rate_limit_type == HW_RATE_LIMIT) attr.ah_attr.static_rate = user_param->valid_hw_rate_limit; #if defined (HAVE_PACKET_PACING_EXP) || defined (HAVE_PACKET_PACING) if (user_param->rate_limit_type == PP_RATE_LIMIT && (check_packet_pacing_support(ctx) == FAILURE)) { fprintf(stderr, "Packet Pacing isn't supported.\n"); return FAILURE; } #endif if ((i >= xrc_offset) && (user_param->use_xrc || user_param->connection_type == DC) && (user_param->duplex || user_param->tst == LAT)) xrc_offset = -1*xrc_offset; if(user_param->connection_type == DC) { #ifdef HAVE_DC if(ctx_modify_dc_qp_to_rtr(ctx->qp[i],&attr_ex,user_param,&dest[xrc_offset + i],&my_dest[i],i)) { fprintf(stderr, "Failed to modify QP %d to RTR\n",ctx->qp[i]->qp_num); return FAILURE; } #endif } else { if(ctx_modify_qp_to_rtr(ctx->qp[i], &attr, user_param, &dest[xrc_offset + i], &my_dest[i], i)) { fprintf(stderr, "Failed to modify QP %d to RTR\n",ctx->qp[i]->qp_num); return FAILURE; } } if (user_param->tst == LAT || user_param->machine == CLIENT || user_param->duplex) { if(user_param->connection_type == DC) { #ifdef HAVE_DC if(ctx_modify_dc_qp_to_rts(ctx->qp[i], &attr_ex, user_param, &dest[xrc_offset + i], &my_dest[i])) { fprintf(stderr, "Failed to modify QP to RTS\n"); return FAILURE; } #endif } else { #ifdef HAVE_PACKET_PACING_EXP if (user_param->rate_limit_type == PP_RATE_LIMIT) { if(ctx_modify_qp_to_rts(ctx->qp[i], &attr_ex, user_param, &dest[xrc_offset + i], &my_dest[i])) { fprintf(stderr, "Failed to modify QP %x to RTS\n", ctx->qp[i]->qp_num); return FAILURE; } } else { #endif if(ctx_modify_qp_to_rts(ctx->qp[i], &attr, user_param, &dest[xrc_offset + i], &my_dest[i])) { fprintf(stderr, "Failed to modify QP to RTS\n"); return FAILURE; } #ifdef HAVE_PACKET_PACING_EXP } #endif } } if ((user_param->connection_type == UD || user_param->connection_type == DC) && (user_param->tst == LAT || user_param->machine == CLIENT || user_param->duplex)) { #ifdef HAVE_DC if(user_param->connection_type == DC) ctx->ah[i] = ibv_create_ah(ctx->pd,&(attr_ex.ah_attr)); else #endif ctx->ah[i] = ibv_create_ah(ctx->pd,&(attr.ah_attr)); if (!ctx->ah[i]) { fprintf(stderr, "Failed to create AH for UD\n"); return FAILURE; } } if (user_param->rate_limit_type == HW_RATE_LIMIT) { struct ibv_qp_attr qp_attr; struct ibv_qp_init_attr init_attr; int err, qp_static_rate=0; memset(&qp_attr,0,sizeof(struct ibv_qp_attr)); memset(&init_attr,0,sizeof(struct ibv_qp_init_attr)); err = ibv_query_qp(ctx->qp[i], &qp_attr, IBV_QP_AV, &init_attr); if (err) fprintf(stderr, "ibv_query_qp failed to get ah_attr\n"); else qp_static_rate = (int)(qp_attr.ah_attr.static_rate); //- Fall back to SW Limit only if flag undefined if(err || (qp_static_rate != user_param->valid_hw_rate_limit)) { if(!user_param->is_rate_limit_type) { user_param->rate_limit_type = SW_RATE_LIMIT; fprintf(stderr, "\x1b[31mThe QP failed to accept HW rate limit, providing SW rate limit \x1b[0m\n"); } else { fprintf(stderr, "\x1b[31mThe QP failed to accept HW rate limit \x1b[0m\n"); return FAILURE; } } } if((user_param->use_xrc || user_param->connection_type == DC) && (user_param->duplex || user_param->tst == LAT)) xrc_offset = user_param->num_of_qps / 2; } return SUCCESS; } /****************************************************************************** * ******************************************************************************/ void ctx_set_send_wqes(struct pingpong_context *ctx, struct perftest_parameters *user_param, struct pingpong_dest *rem_dest) { #ifdef HAVE_VERBS_EXP if (user_param->use_exp == 1) { ctx_set_send_exp_wqes(ctx,user_param,rem_dest); } else { #endif ctx_set_send_reg_wqes(ctx,user_param,rem_dest); #ifdef HAVE_VERBS_EXP } #endif } #ifdef HAVE_VERBS_EXP /****************************************************************************** * ******************************************************************************/ void ctx_set_send_exp_wqes(struct pingpong_context *ctx, struct perftest_parameters *user_param, struct pingpong_dest *rem_dest) { int i,j; int num_of_qps = user_param->num_of_qps; int xrc_offset = 0; if((user_param->use_xrc || user_param->connection_type == DC) && (user_param->duplex || user_param->tst == LAT)) { num_of_qps /= 2; xrc_offset = num_of_qps; } for (i = 0; i < num_of_qps ; i++) { memset(&ctx->exp_wr[i*user_param->post_list],0,sizeof(struct ibv_exp_send_wr)); ctx->sge_list[i*user_param->post_list].addr = (uintptr_t)ctx->buf[i]; if (user_param->mac_fwd) { if (user_param->mr_per_qp) { ctx->sge_list[i*user_param->post_list].addr = (uintptr_t)ctx->buf[0] + (num_of_qps + i)*BUFF_SIZE(ctx->size,ctx->cycle_buffer); } else { ctx->sge_list[i*user_param->post_list].addr = (uintptr_t)ctx->buf[i]; } } if (user_param->verb == WRITE || user_param->verb == READ) ctx->exp_wr[i*user_param->post_list].wr.rdma.remote_addr = rem_dest[xrc_offset + i].vaddr; else if (user_param->verb == ATOMIC) ctx->exp_wr[i*user_param->post_list].wr.atomic.remote_addr = rem_dest[xrc_offset + i].vaddr; if (user_param->tst == BW || user_param->tst == LAT_BY_BW ) { ctx->scnt[i] = 0; ctx->ccnt[i] = 0; ctx->my_addr[i] = (uintptr_t)ctx->buf[i]; if (user_param->verb != SEND) ctx->rem_addr[i] = rem_dest[xrc_offset + i].vaddr; } for (j = 0; j < user_param->post_list; j++) { ctx->sge_list[i*user_param->post_list + j].length = (user_param->connection_type == RawEth) ? (user_param->size - HW_CRC_ADDITION) : user_param->size; ctx->sge_list[i*user_param->post_list + j].lkey = ctx->mr[i]->lkey; if (j > 0) { ctx->sge_list[i*user_param->post_list +j].addr = ctx->sge_list[i*user_param->post_list + (j-1)].addr; if ((user_param->tst == BW || user_param->tst == LAT_BY_BW) && user_param->size <= (ctx->cycle_buffer / 2)) increase_loc_addr(&ctx->sge_list[i*user_param->post_list +j],user_param->size, j-1,ctx->my_addr[i],0,ctx->cache_line_size,ctx->cycle_buffer); } ctx->exp_wr[i*user_param->post_list + j].sg_list = &ctx->sge_list[i*user_param->post_list + j]; ctx->exp_wr[i*user_param->post_list + j].num_sge = MAX_SEND_SGE; ctx->exp_wr[i*user_param->post_list + j].wr_id = i; if (j == (user_param->post_list - 1)) { #ifdef HAVE_ACCL_VERBS if (user_param->verb_type == ACCL_INTF) ctx->exp_wr[i*user_param->post_list + j].exp_send_flags = IBV_EXP_QP_BURST_SIGNALED; else #endif ctx->exp_wr[i*user_param->post_list + j].exp_send_flags = IBV_EXP_SEND_SIGNALED; ctx->exp_wr[i*user_param->post_list + j].next = NULL; } else { ctx->exp_wr[i*user_param->post_list + j].next = &ctx->exp_wr[i*user_param->post_list+j+1]; ctx->exp_wr[i*user_param->post_list + j].exp_send_flags = 0; } if (user_param->verb == ATOMIC) { ctx->exp_wr[i*user_param->post_list + j].exp_opcode = exp_opcode_atomic_array[user_param->atomicType]; } else { ctx->exp_wr[i*user_param->post_list + j].exp_opcode = exp_opcode_verbs_array[user_param->verb]; } if (user_param->verb == WRITE || user_param->verb == READ) { ctx->exp_wr[i*user_param->post_list + j].wr.rdma.rkey = rem_dest[xrc_offset + i].rkey; if (j > 0) { ctx->exp_wr[i*user_param->post_list + j].wr.rdma.remote_addr = ctx->exp_wr[i*user_param->post_list + (j-1)].wr.rdma.remote_addr; if ((user_param->tst == BW || user_param->tst == LAT_BY_BW) && user_param->size <= (ctx->cycle_buffer / 2)) increase_exp_rem_addr(&ctx->exp_wr[i*user_param->post_list + j],user_param->size, j-1,ctx->rem_addr[i],WRITE,ctx->cache_line_size,ctx->cycle_buffer); } } else if (user_param->verb == ATOMIC) { ctx->exp_wr[i*user_param->post_list + j].wr.atomic.rkey = rem_dest[xrc_offset + i].rkey; if (j > 0) { ctx->exp_wr[i*user_param->post_list + j].wr.atomic.remote_addr = ctx->exp_wr[i*user_param->post_list + j-1].wr.atomic.remote_addr; if (user_param->tst == BW || user_param->tst == LAT_BY_BW) increase_exp_rem_addr(&ctx->exp_wr[i*user_param->post_list + j],user_param->size, j-1,ctx->rem_addr[i],ATOMIC,ctx->cache_line_size,ctx->cycle_buffer); } if (user_param->atomicType == FETCH_AND_ADD) ctx->exp_wr[i*user_param->post_list + j].wr.atomic.compare_add = ATOMIC_ADD_VALUE; else ctx->exp_wr[i*user_param->post_list + j].wr.atomic.swap = ATOMIC_SWAP_VALUE; } else if (user_param->verb == SEND) { if (user_param->connection_type == UD) { ctx->exp_wr[i*user_param->post_list + j].wr.ud.ah = ctx->ah[i]; ctx->exp_wr[i*user_param->post_list + j].wr.ud.remote_qkey = DEF_QKEY; ctx->exp_wr[i*user_param->post_list + j].wr.ud.remote_qpn = rem_dest[xrc_offset + i].qpn; #ifdef HAVE_DC } else if (user_param->connection_type == DC) { ctx->exp_wr[i*user_param->post_list + j].dc.ah = ctx->ah[i]; ctx->exp_wr[i*user_param->post_list + j].dc.dct_access_key = user_param->dct_key; ctx->exp_wr[i*user_param->post_list + j].dc.dct_number = rem_dest[xrc_offset + i].qpn; #endif } } #ifdef HAVE_DC if (user_param->connection_type == DC) { ctx->exp_wr[i*user_param->post_list + j].dc.ah = ctx->ah[i]; ctx->exp_wr[i*user_param->post_list + j].dc.dct_access_key = user_param->dct_key; ctx->exp_wr[i*user_param->post_list + j].dc.dct_number = rem_dest[xrc_offset + i].qpn; } #endif if ((user_param->verb == SEND || user_param->verb == WRITE) && user_param->size <= user_param->inline_size) ctx->exp_wr[i*user_param->post_list + j].exp_send_flags |= IBV_EXP_SEND_INLINE; #ifdef HAVE_XRCD if (user_param->use_xrc) ctx->exp_wr[i*user_param->post_list + j].qp_type.xrc.remote_srqn = rem_dest[xrc_offset + i].srqn; #endif } } } #endif /****************************************************************************** * ******************************************************************************/ void ctx_set_send_reg_wqes(struct pingpong_context *ctx, struct perftest_parameters *user_param, struct pingpong_dest *rem_dest) { int i,j; int num_of_qps = user_param->num_of_qps; int xrc_offset = 0; if((user_param->use_xrc || user_param->connection_type == DC) && (user_param->duplex || user_param->tst == LAT)) { num_of_qps /= 2; xrc_offset = num_of_qps; } for (i = 0; i < num_of_qps ; i++) { memset(&ctx->wr[i*user_param->post_list],0,sizeof(struct ibv_send_wr)); ctx->sge_list[i*user_param->post_list].addr = (uintptr_t)ctx->buf[i]; if (user_param->mac_fwd) { if (user_param->mr_per_qp) { ctx->sge_list[i*user_param->post_list].addr = (uintptr_t)ctx->buf[0] + (num_of_qps + i)*BUFF_SIZE(ctx->size,ctx->cycle_buffer); } else { ctx->sge_list[i*user_param->post_list].addr = (uintptr_t)ctx->buf[i]; } } if (user_param->verb == WRITE || user_param->verb == READ) ctx->wr[i*user_param->post_list].wr.rdma.remote_addr = rem_dest[xrc_offset + i].vaddr; else if (user_param->verb == ATOMIC) ctx->wr[i*user_param->post_list].wr.atomic.remote_addr = rem_dest[xrc_offset + i].vaddr; if (user_param->tst == BW || user_param->tst == LAT_BY_BW) { ctx->scnt[i] = 0; ctx->ccnt[i] = 0; ctx->my_addr[i] = (uintptr_t)ctx->buf[i]; if (user_param->verb != SEND) ctx->rem_addr[i] = rem_dest[xrc_offset + i].vaddr; } for (j = 0; j < user_param->post_list; j++) { ctx->sge_list[i*user_param->post_list + j].length = (user_param->connection_type == RawEth) ? (user_param->size - HW_CRC_ADDITION) : user_param->size; ctx->sge_list[i*user_param->post_list + j].lkey = ctx->mr[i]->lkey; if (j > 0) { ctx->sge_list[i*user_param->post_list +j].addr = ctx->sge_list[i*user_param->post_list + (j-1)].addr; if ((user_param->tst == BW || user_param->tst == LAT_BY_BW) && user_param->size <= (ctx->cycle_buffer / 2)) increase_loc_addr(&ctx->sge_list[i*user_param->post_list +j],user_param->size, j-1,ctx->my_addr[i],0,ctx->cache_line_size,ctx->cycle_buffer); } ctx->wr[i*user_param->post_list + j].sg_list = &ctx->sge_list[i*user_param->post_list + j]; ctx->wr[i*user_param->post_list + j].num_sge = MAX_SEND_SGE; ctx->wr[i*user_param->post_list + j].wr_id = i; if (j == (user_param->post_list - 1)) { ctx->wr[i*user_param->post_list + j].send_flags = IBV_SEND_SIGNALED; ctx->wr[i*user_param->post_list + j].next = NULL; } else { ctx->wr[i*user_param->post_list + j].next = &ctx->wr[i*user_param->post_list+j+1]; ctx->wr[i*user_param->post_list + j].send_flags = 0; } if (user_param->verb == ATOMIC) { ctx->wr[i*user_param->post_list + j].opcode = opcode_atomic_array[user_param->atomicType]; } else { ctx->wr[i*user_param->post_list + j].opcode = opcode_verbs_array[user_param->verb]; } if (user_param->verb == WRITE || user_param->verb == READ) { ctx->wr[i*user_param->post_list + j].wr.rdma.rkey = rem_dest[xrc_offset + i].rkey; if (j > 0) { ctx->wr[i*user_param->post_list + j].wr.rdma.remote_addr = ctx->wr[i*user_param->post_list + (j-1)].wr.rdma.remote_addr; if ((user_param->tst == BW || user_param->tst == LAT_BY_BW ) && user_param->size <= (ctx->cycle_buffer / 2)) increase_rem_addr(&ctx->wr[i*user_param->post_list + j],user_param->size, j-1,ctx->rem_addr[i],WRITE,ctx->cache_line_size,ctx->cycle_buffer); } } else if (user_param->verb == ATOMIC) { ctx->wr[i*user_param->post_list + j].wr.atomic.rkey = rem_dest[xrc_offset + i].rkey; if (j > 0) { ctx->wr[i*user_param->post_list + j].wr.atomic.remote_addr = ctx->wr[i*user_param->post_list + j-1].wr.atomic.remote_addr; if (user_param->tst == BW || user_param->tst == LAT_BY_BW) increase_rem_addr(&ctx->wr[i*user_param->post_list + j],user_param->size, j-1,ctx->rem_addr[i],ATOMIC,ctx->cache_line_size,ctx->cycle_buffer); } if (user_param->atomicType == FETCH_AND_ADD) ctx->wr[i*user_param->post_list + j].wr.atomic.compare_add = ATOMIC_ADD_VALUE; else ctx->wr[i*user_param->post_list + j].wr.atomic.swap = ATOMIC_SWAP_VALUE; } else if (user_param->verb == SEND) { if (user_param->connection_type == UD) { ctx->wr[i*user_param->post_list + j].wr.ud.ah = ctx->ah[i]; ctx->wr[i*user_param->post_list + j].wr.ud.remote_qkey = DEF_QKEY; ctx->wr[i*user_param->post_list + j].wr.ud.remote_qpn = rem_dest[xrc_offset + i].qpn; } } if ((user_param->verb == SEND || user_param->verb == WRITE) && user_param->size <= user_param->inline_size) ctx->wr[i*user_param->post_list + j].send_flags |= IBV_SEND_INLINE; #ifdef HAVE_XRCD if (user_param->use_xrc) ctx->wr[i*user_param->post_list + j].qp_type.xrc.remote_srqn = rem_dest[xrc_offset + i].srqn; #endif } } } /****************************************************************************** * ******************************************************************************/ int ctx_set_recv_wqes(struct pingpong_context *ctx,struct perftest_parameters *user_param) { int i = 0,j,k; int num_of_qps = user_param->num_of_qps; struct ibv_recv_wr *bad_wr_recv; int size_per_qp = user_param->rx_depth; if((user_param->use_xrc || user_param->connection_type == DC) && (user_param->duplex || user_param->tst == LAT)) { i = user_param->num_of_qps / 2; num_of_qps /= 2; } if (user_param->use_srq) size_per_qp /= user_param->num_of_qps; if (user_param->use_rss) { i = 1; num_of_qps = 1; } for (k = 0; i < user_param->num_of_qps; i++,k++) { if (!user_param->mr_per_qp) { ctx->recv_sge_list[i].addr = (uintptr_t)ctx->buf[0] + (num_of_qps + k) * ctx->send_qp_buff_size; } else { ctx->recv_sge_list[i].addr = (uintptr_t)ctx->buf[i]; } if (user_param->connection_type == UD) ctx->recv_sge_list[i].addr += (ctx->cache_line_size - UD_ADDITION); ctx->recv_sge_list[i].length = SIZE(user_param->connection_type,user_param->size,1); ctx->recv_sge_list[i].lkey = ctx->mr[i]->lkey; ctx->rwr[i].sg_list = &ctx->recv_sge_list[i]; ctx->rwr[i].wr_id = i; ctx->rwr[i].next = NULL; ctx->rwr[i].num_sge = MAX_RECV_SGE; ctx->rx_buffer_addr[i] = ctx->recv_sge_list[i].addr; for (j = 0; j < size_per_qp ; ++j) { if (user_param->use_srq) { if (ibv_post_srq_recv(ctx->srq,&ctx->rwr[i], &bad_wr_recv)) { fprintf(stderr, "Couldn't post recv SRQ = %d: counter=%d\n",i,j); return 1; } } else { if (ibv_post_recv(ctx->qp[i],&ctx->rwr[i],&bad_wr_recv)) { fprintf(stderr, "Couldn't post recv Qp = %d: counter=%d\n",i,j); return 1; } } if ((user_param->tst == BW || user_param->tst == LAT_BY_BW) && user_param->size <= (ctx->cycle_buffer / 2)) { increase_loc_addr(&ctx->recv_sge_list[i], user_param->size, j, ctx->rx_buffer_addr[i], user_param->connection_type,ctx->cache_line_size,ctx->cycle_buffer); } } ctx->recv_sge_list[i].addr = ctx->rx_buffer_addr[i]; } return 0; } int ctx_alloc_credit(struct pingpong_context *ctx, struct perftest_parameters *user_param, struct pingpong_dest *my_dest) { int buf_size = 2*user_param->num_of_qps*sizeof(uint32_t); int flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE; int i; ALLOCATE(ctx->ctrl_buf,uint32_t,user_param->num_of_qps); memset(&ctx->ctrl_buf[0],0,buf_size); ctx->credit_buf = (uint32_t *)ctx->ctrl_buf + user_param->num_of_qps; ctx->credit_cnt = user_param->rx_depth/3; ctx->credit_mr = ibv_reg_mr(ctx->pd,ctx->ctrl_buf,buf_size,flags); if (!ctx->credit_mr) { fprintf(stderr, "Couldn't allocate MR\n"); return FAILURE; } for (i = 0; i < user_param->num_of_qps; i++) { my_dest[i].rkey = ctx->credit_mr->rkey; my_dest[i].vaddr = (uintptr_t)ctx->credit_buf + i*sizeof(uint32_t); } return 0; } /* Should be called after the remote keys have been exchanged */ int ctx_set_credit_wqes(struct pingpong_context *ctx, struct perftest_parameters *user_param, struct pingpong_dest *rem_dest) { int i; ALLOCATE(ctx->ctrl_wr,struct ibv_send_wr,user_param->num_of_qps); ALLOCATE(ctx->ctrl_sge_list,struct ibv_sge,user_param->num_of_qps); for (i = 0; i < user_param->num_of_qps; i++) { memset(&ctx->ctrl_wr[i],0,sizeof(struct ibv_send_wr)); ctx->ctrl_sge_list[i].addr = (uintptr_t)ctx->ctrl_buf + (i*sizeof(uint32_t)); ctx->ctrl_sge_list[i].length = sizeof(uint32_t); ctx->ctrl_sge_list[i].lkey = ctx->credit_mr->lkey; ctx->ctrl_wr[i].opcode = IBV_WR_RDMA_WRITE; ctx->ctrl_wr[i].sg_list = &ctx->ctrl_sge_list[i]; ctx->ctrl_wr[i].num_sge = 1; ctx->ctrl_wr[i].wr_id = i; ctx->ctrl_wr[i].send_flags = IBV_SEND_SIGNALED; ctx->ctrl_wr[i].next = NULL; ctx->ctrl_wr[i].wr.rdma.remote_addr = rem_dest[i].vaddr; ctx->ctrl_wr[i].wr.rdma.rkey = rem_dest[i].rkey; } return 0; } static int clean_scq_credit(int send_cnt,struct pingpong_context *ctx,struct perftest_parameters *user_param) { int i= 0, sne = 0; struct ibv_wc *swc = NULL; int return_value = 0; if (!send_cnt) return 0; ALLOCATE(swc,struct ibv_wc,user_param->tx_depth); do { sne = ibv_poll_cq(ctx->send_cq,user_param->tx_depth,swc); if (sne > 0) { for (i = 0; i < sne; i++) { if (swc[i].status != IBV_WC_SUCCESS) { fprintf(stderr, "Poll send CQ error status=%u qp %d\n", swc[i].status,(int)swc[i].wr_id); return_value = FAILURE; goto cleaning; } send_cnt--; } } else if (sne < 0) { fprintf(stderr, "Poll send CQ to clean credit failed ne=%d\n",sne); return_value = FAILURE; goto cleaning; } } while(send_cnt > 0); cleaning: free(swc); return return_value; } /****************************************************************************** * ******************************************************************************/ int perform_warm_up(struct pingpong_context *ctx,struct perftest_parameters *user_param) { int ne,index,warmindex,warmupsession; int err = 0; #ifdef HAVE_VERBS_EXP struct ibv_exp_send_wr *bad_exp_wr = NULL; #endif struct ibv_send_wr *bad_wr = NULL; struct ibv_wc wc; struct ibv_wc *wc_for_cleaning = NULL; int num_of_qps = user_param->num_of_qps; int return_value = 0; if(user_param->duplex && (user_param->use_xrc || user_param->connection_type == DC)) num_of_qps /= 2; warmupsession = (user_param->post_list == 1) ? user_param->tx_depth : user_param->post_list; ALLOCATE(wc_for_cleaning,struct ibv_wc,user_param->tx_depth); /* Clean up the pipe */ ne = ibv_poll_cq(ctx->send_cq,user_param->tx_depth,wc_for_cleaning); for (index=0 ; index < num_of_qps ; index++) { for (warmindex = 0 ;warmindex < warmupsession ;warmindex += user_param->post_list) { #ifdef HAVE_VERBS_EXP if (user_param->use_exp == 1) err = (ctx->exp_post_send_func_pointer)(ctx->qp[index], &ctx->exp_wr[index*user_param->post_list], &bad_exp_wr); else err = (ctx->post_send_func_pointer)(ctx->qp[index],&ctx->wr[index*user_param->post_list],&bad_wr); #else err = ibv_post_send(ctx->qp[index],&ctx->wr[index*user_param->post_list],&bad_wr); #endif if (err) { fprintf(stderr,"Couldn't post send during warm up: qp %d scnt=%d \n",index,warmindex); return_value = FAILURE; goto cleaning; } } do { ne = ibv_poll_cq(ctx->send_cq,1,&wc); if (ne > 0) { if (wc.status != IBV_WC_SUCCESS) { return_value = FAILURE; goto cleaning; } warmindex -= user_param->post_list; } else if (ne < 0) { return_value = FAILURE; goto cleaning; } } while (warmindex); } cleaning: free(wc_for_cleaning); return return_value; } /****************************************************************************** * ******************************************************************************/ int run_iter_bw(struct pingpong_context *ctx,struct perftest_parameters *user_param) { uint64_t totscnt = 0; uint64_t totccnt = 0; int i = 0; int index,ne; uint64_t tot_iters; int err = 0; #ifdef HAVE_VERBS_EXP struct ibv_exp_send_wr *bad_exp_wr = NULL; #ifdef HAVE_ACCL_VERBS int pl_index; struct ibv_sge *sg_l; #endif #endif struct ibv_send_wr *bad_wr = NULL; struct ibv_wc *wc = NULL; int num_of_qps = user_param->num_of_qps; /* Rate Limiter*/ int rate_limit_pps = 0; double gap_time = 0; /* in usec */ cycles_t gap_cycles = 0; /* in cycles */ cycles_t gap_deadline = 0; unsigned int number_of_bursts = 0; int burst_iter = 0; int is_sending_burst = 0; int cpu_mhz = 0; int return_value = 0; int wc_id; int send_flows_index = 0; uintptr_t primary_send_addr = ctx->sge_list[0].addr; int address_offset = 0; int flows_burst_iter = 0; ALLOCATE(wc ,struct ibv_wc ,CTX_POLL_BATCH); /* if (user_param->test_type == DURATION) { */ /* duration_param=user_param; */ /* duration_param->state = START_STATE; */ /* signal(SIGALRM, catch_alarm); */ /* if (user_param->margin > 0 ) */ /* alarm(user_param->margin); */ /* else */ /* catch_alarm(0); [> move to next state <] */ /* user_param->iters = 0; */ /* } */ if (user_param->duplex && (user_param->use_xrc || user_param->connection_type == DC)) num_of_qps /= 2; /* Will be 0, in case of Duration (look at force_dependencies or in the exp above). */ tot_iters = (uint64_t)user_param->iters*num_of_qps; if (user_param->test_type == DURATION && user_param->state != START_STATE && user_param->margin > 0) { fprintf(stderr, "Failed: margin is not long enough (taking samples before warmup ends)\n"); fprintf(stderr, "Please increase margin or decrease tx_depth\n"); return_value = FAILURE; goto cleaning; } if (user_param->test_type == ITERATIONS && user_param->noPeak == ON) user_param->tposted[0] = get_cycles(); /* If using rate limiter, calculate gap time between bursts */ if (user_param->rate_limit_type == SW_RATE_LIMIT ) { /* Calculate rate limit in pps */ switch (user_param->rate_units) { case MEGA_BYTE_PS: rate_limit_pps = ((double)(user_param->rate_limit) / user_param->size) * 1048576; break; case GIGA_BIT_PS: rate_limit_pps = ((double)(user_param->rate_limit) / (user_param->size * 8)) * 1000000000; break; case PACKET_PS: rate_limit_pps = user_param->rate_limit; break; default: fprintf(stderr, " Failed: Unknown rate limit units\n"); return_value = FAILURE; goto cleaning; } cpu_mhz = get_cpu_mhz(user_param->cpu_freq_f); if (cpu_mhz <= 0) { fprintf(stderr, "Failed: couldn't acquire cpu frequency for rate limiter.\n"); } number_of_bursts = rate_limit_pps / user_param->burst_size; gap_time = 1000000 * (1.0 / number_of_bursts); gap_cycles = cpu_mhz * gap_time; } /* main loop for posting */ while (totscnt < tot_iters || totccnt < tot_iters || (user_param->test_type == DURATION && user_param->state != END_STATE) ) { /* main loop to run over all the qps and post each time n messages */ for (index =0 ; index < num_of_qps ; index++) { if (user_param->rate_limit_type == SW_RATE_LIMIT && is_sending_burst == 0) { if (gap_deadline > get_cycles()) { /* Go right to cq polling until gap time is over. */ continue; } gap_deadline = get_cycles() + gap_cycles; is_sending_burst = 1; burst_iter = 0; } while ((ctx->scnt[index] < user_param->iters || user_param->test_type == DURATION) && (ctx->scnt[index] - ctx->ccnt[index]) < (user_param->tx_depth) && !((user_param->rate_limit_type == SW_RATE_LIMIT ) && is_sending_burst == 0)) { if (ctx->send_rcredit) { uint32_t swindow = ctx->scnt[index] + user_param->post_list - ctx->credit_buf[index]; if (swindow >= user_param->rx_depth) break; } if (user_param->post_list == 1 && (ctx->scnt[index] % user_param->cq_mod == 0 && user_param->cq_mod > 1) && !(ctx->scnt[index] == (user_param->iters - 1) && user_param->test_type == ITERATIONS)) { #ifdef HAVE_VERBS_EXP #ifdef HAVE_ACCL_VERBS if (user_param->verb_type == ACCL_INTF) ctx->exp_wr[index].exp_send_flags &= ~IBV_EXP_QP_BURST_SIGNALED; else { #endif if (user_param->use_exp == 1) ctx->exp_wr[index].exp_send_flags &= ~IBV_EXP_SEND_SIGNALED; else #endif ctx->wr[index].send_flags &= ~IBV_SEND_SIGNALED; #ifdef HAVE_ACCL_VERBS } #endif } if (user_param->noPeak == OFF) user_param->tposted[totscnt] = get_cycles(); if (user_param->test_type == DURATION && user_param->state == END_STATE) break; #ifdef HAVE_VERBS_EXP #ifdef HAVE_ACCL_VERBS if (user_param->verb_type == ACCL_INTF) { for (pl_index = 0; pl_index < user_param->post_list; pl_index++) { sg_l = ctx->exp_wr[index*user_param->post_list + pl_index].sg_list; ctx->qp_burst_family[index]->send_pending(ctx->qp[index], sg_l->addr, sg_l->length, sg_l->lkey, ctx->exp_wr[index*user_param->post_list + pl_index].exp_send_flags); } ctx->qp_burst_family[index]->send_flush(ctx->qp[index]); } else { #endif if (user_param->use_exp == 1) { err = (ctx->exp_post_send_func_pointer)(ctx->qp[index], &ctx->exp_wr[index*user_param->post_list],&bad_exp_wr); } else { err = (ctx->post_send_func_pointer)(ctx->qp[index], &ctx->wr[index*user_param->post_list],&bad_wr); } #ifdef HAVE_ACCL_VERBS } #endif #else err = ibv_post_send(ctx->qp[index],&ctx->wr[index*user_param->post_list],&bad_wr); #endif if (err) { fprintf(stderr,"Couldn't post send: qp %d scnt=%lu \n",index,ctx->scnt[index]); return_value = FAILURE; goto cleaning; } /* if we have more than single flow and the burst iter is the last one */ if (user_param->flows != DEF_FLOWS) { if (++flows_burst_iter == user_param->flows_burst) { flows_burst_iter = 0; /* inc the send_flows_index and update the address */ if (++send_flows_index == user_param->flows) send_flows_index = 0; address_offset = send_flows_index * ctx->flow_buff_size; ctx->sge_list[0].addr = primary_send_addr + address_offset; } } /* in multiple flow scenarios we will go to next cycle buffer address in the main buffer*/ if (user_param->post_list == 1 && user_param->size <= (ctx->cycle_buffer / 2)) { #ifdef HAVE_VERBS_EXP if (user_param->use_exp == 1) increase_loc_addr(ctx->exp_wr[index].sg_list,user_param->size, ctx->scnt[index], ctx->my_addr[index] + address_offset, 0, ctx->cache_line_size, ctx->cycle_buffer); else #endif increase_loc_addr(ctx->wr[index].sg_list,user_param->size, ctx->scnt[index], ctx->my_addr[index] + address_offset , 0, ctx->cache_line_size, ctx->cycle_buffer); if (user_param->verb != SEND) { #ifdef HAVE_VERBS_EXP if (user_param->use_exp == 1) increase_exp_rem_addr(&ctx->exp_wr[index], user_param->size, ctx->scnt[index], ctx->rem_addr[index], user_param->verb, ctx->cache_line_size, ctx->cycle_buffer); else #endif increase_rem_addr(&ctx->wr[index], user_param->size, ctx->scnt[index], ctx->rem_addr[index], user_param->verb, ctx->cache_line_size, ctx->cycle_buffer); } } ctx->scnt[index] += user_param->post_list; totscnt += user_param->post_list; /* ask for completion on this wr */ if (user_param->post_list == 1 && (ctx->scnt[index]%user_param->cq_mod == user_param->cq_mod - 1 || (user_param->test_type == ITERATIONS && ctx->scnt[index] == user_param->iters - 1))) { #ifdef HAVE_VERBS_EXP #ifdef HAVE_ACCL_VERBS if (user_param->verb_type == ACCL_INTF) ctx->exp_wr[index].exp_send_flags |= IBV_EXP_QP_BURST_SIGNALED; else { #endif if (user_param->use_exp == 1) ctx->exp_wr[index].exp_send_flags |= IBV_EXP_SEND_SIGNALED; else #endif ctx->wr[index].send_flags |= IBV_SEND_SIGNALED; #ifdef HAVE_ACCL_VERBS } #endif } /* Check if a full burst was sent. */ if (user_param->rate_limit_type == SW_RATE_LIMIT) { burst_iter += user_param->post_list; if (burst_iter >= user_param->burst_size) { is_sending_burst = 0; } } } } if (totccnt < tot_iters || (user_param->test_type == DURATION && totccnt < totscnt)) { if (user_param->use_event) { if (ctx_notify_events(ctx->channel)) { fprintf(stderr, "Couldn't request CQ notification\n"); return_value = FAILURE; goto cleaning; } } #ifdef HAVE_ACCL_VERBS if (user_param->verb_type == ACCL_INTF) ne = ctx->send_cq_family->poll_cnt(ctx->send_cq, CTX_POLL_BATCH); else #endif ne = ibv_poll_cq(ctx->send_cq,CTX_POLL_BATCH,wc); if (ne > 0) { for (i = 0; i < ne; i++) { wc_id = (user_param->verb_type == ACCL_INTF) ? 0 : (int)wc[i].wr_id; if (user_param->verb_type != ACCL_INTF) { if (wc[i].status != IBV_WC_SUCCESS) { NOTIFY_COMP_ERROR_SEND(wc[i],totscnt,totccnt); return_value = FAILURE; goto cleaning; } } ctx->ccnt[wc_id] += user_param->cq_mod; totccnt += user_param->cq_mod; if (user_param->noPeak == OFF) { if (totccnt >= tot_iters - 1) user_param->tcompleted[user_param->iters*num_of_qps - 1] = get_cycles(); else user_param->tcompleted[totccnt-1] = get_cycles(); } if (user_param->test_type==DURATION && user_param->state == SAMPLE_STATE) { if (user_param->report_per_port) { user_param->iters_per_port[user_param->port_by_qp[wc_id]] += user_param->cq_mod; } user_param->iters += user_param->cq_mod; } } } else if (ne < 0) { fprintf(stderr, "poll CQ failed %d\n",ne); return_value = FAILURE; goto cleaning; } } } if (user_param->noPeak == ON && user_param->test_type == ITERATIONS) user_param->tcompleted[0] = get_cycles(); cleaning: free(wc); return return_value; } /****************************************************************************** * ******************************************************************************/ static inline void set_on_first_rx_packet(struct perftest_parameters *user_param) { /* if (user_param->test_type == DURATION) { */ /* duration_param=user_param; */ /* user_param->iters=0; */ /* duration_param->state = START_STATE; */ /* signal(SIGALRM, catch_alarm); */ /* if (user_param->margin > 0) */ /* alarm(user_param->margin); */ /* else */ /* catch_alarm(0); */ /* } else if (user_param->tst == BW) { */ if (user_param->tst == BW) { user_param->tposted[0] = get_cycles(); } } /****************************************************************************** * ******************************************************************************/ int run_iter_bw_server(struct pingpong_context *ctx, struct perftest_parameters *user_param) { uint64_t rcnt = 0; int ne = 0; int i; uint64_t tot_iters; uint64_t *rcnt_for_qp = NULL; struct ibv_wc *wc = NULL; struct ibv_recv_wr *bad_wr_recv = NULL; struct ibv_wc *swc = NULL; long *scredit_for_qp = NULL; int tot_scredit = 0; int firstRx = 1; int size_per_qp = (user_param->use_srq) ? user_param->rx_depth/user_param->num_of_qps : user_param->rx_depth; int return_value = 0; int wc_id; int recv_flows_index = 0; uintptr_t primary_recv_addr = ctx->recv_sge_list[0].addr; int recv_flows_burst = 0; int address_flows_offset =0; ALLOCATE(wc ,struct ibv_wc ,CTX_POLL_BATCH); ALLOCATE(swc ,struct ibv_wc ,user_param->tx_depth); ALLOCATE(rcnt_for_qp,uint64_t,user_param->num_of_qps); memset(rcnt_for_qp,0,sizeof(uint64_t)*user_param->num_of_qps); ALLOCATE(scredit_for_qp,long,user_param->num_of_qps); memset(scredit_for_qp,0,sizeof(long)*user_param->num_of_qps); if (user_param->use_rss) tot_iters = (uint64_t)user_param->iters*(user_param->num_of_qps-1); else tot_iters = (uint64_t)user_param->iters*user_param->num_of_qps; if (user_param->test_type == ITERATIONS) { check_alive_data.is_events = user_param->use_event; signal(SIGALRM, check_alive); alarm(60); // TODO } check_alive_data.g_total_iters = tot_iters; while (rcnt < tot_iters || (user_param->test_type == DURATION && user_param->state != END_STATE)) { if (user_param->use_event) { if (ctx_notify_events(ctx->channel)) { fprintf(stderr ," Failed to notify events to CQ"); return_value = FAILURE; goto cleaning; } } do { if (user_param->test_type == DURATION && user_param->state == END_STATE) break; #ifdef HAVE_ACCL_VERBS if (user_param->verb_type == ACCL_INTF) ne = ctx->recv_cq_family->poll_cnt(ctx->recv_cq, CTX_POLL_BATCH); else { #endif if (user_param->connection_type == DC) ne = ibv_poll_cq(ctx->send_cq,CTX_POLL_BATCH,wc); else ne = ibv_poll_cq(ctx->recv_cq,CTX_POLL_BATCH,wc); #ifdef HAVE_ACCL_VERBS } #endif if (ne > 0) { if (firstRx) { set_on_first_rx_packet(user_param); firstRx = 0; } for (i = 0; i < ne; i++) { wc_id = (user_param->verb_type == ACCL_INTF) ? 0 : (int)wc[i].wr_id; if (user_param->verb_type != ACCL_INTF) { if (wc[i].status != IBV_WC_SUCCESS) { NOTIFY_COMP_ERROR_RECV(wc[i],rcnt_for_qp[wc_id]); return_value = FAILURE; goto cleaning; } } rcnt_for_qp[wc_id]++; rcnt++; check_alive_data.current_totrcnt = rcnt; if (user_param->test_type==DURATION && user_param->state == SAMPLE_STATE) { if (user_param->report_per_port) { user_param->iters_per_port[user_param->port_by_qp[wc_id]]++; } user_param->iters++; } if (user_param->test_type==DURATION || rcnt_for_qp[wc_id] + size_per_qp <= user_param->iters) { #ifdef HAVE_ACCL_VERBS if (user_param->verb_type == ACCL_INTF) { if (ctx->qp_burst_family[wc_id]->recv_burst(ctx->qp[wc_id], ctx->rwr[wc_id].sg_list, 1)) { fprintf(stderr, "Couldn't post recv burst (accelerated verbs).\n"); return_value = FAILURE; goto cleaning; } } else { #endif if (user_param->use_srq) { if (ibv_post_srq_recv(ctx->srq, &ctx->rwr[wc_id],&bad_wr_recv)) { fprintf(stderr, "Couldn't post recv SRQ. QP = %d: counter=%lu\n", wc_id,rcnt); return_value = FAILURE; goto cleaning; } } else { if (ibv_post_recv(ctx->qp[wc_id],&ctx->rwr[wc_id],&bad_wr_recv)) { fprintf(stderr, "Couldn't post recv Qp=%d rcnt=%ld\n",wc_id,rcnt_for_qp[wc_id]); return_value = 15; goto cleaning; } } if (user_param->flows != DEF_FLOWS) { if (++recv_flows_burst == user_param->flows_burst) { recv_flows_burst = 0; if (++recv_flows_index == user_param->flows) recv_flows_index = 0; address_flows_offset = recv_flows_index * ctx->cycle_buffer; ctx->recv_sge_list[0].addr = primary_recv_addr + address_flows_offset; } } #ifdef HAVE_ACCL_VERBS } #endif if (SIZE(user_param->connection_type,user_param->size,!(int)user_param->machine) <= (ctx->cycle_buffer / 2)) { increase_loc_addr(ctx->rwr[wc_id].sg_list, user_param->size, rcnt_for_qp[wc_id] + size_per_qp, ctx->rx_buffer_addr[wc_id] + address_flows_offset, user_param->connection_type,ctx->cache_line_size,ctx->cycle_buffer); } } if (ctx->send_rcredit) { int credit_cnt = rcnt_for_qp[wc_id]%user_param->rx_depth; if (credit_cnt%ctx->credit_cnt == 0) { struct ibv_send_wr *bad_wr = NULL; int sne = 0, j = 0; ctx->ctrl_buf[wc_id] = rcnt_for_qp[wc_id]; while (scredit_for_qp[wc_id] == user_param->tx_depth) { sne = ibv_poll_cq(ctx->send_cq,user_param->tx_depth,swc); if (sne > 0) { for (j = 0; j < sne; j++) { if (swc[j].status != IBV_WC_SUCCESS) { fprintf(stderr, "Poll send CQ error status=%u qp %d credit=%lu scredit=%lu\n", swc[j].status,(int)swc[j].wr_id, rcnt_for_qp[swc[j].wr_id],scredit_for_qp[swc[j].wr_id]); return_value = FAILURE; goto cleaning; } scredit_for_qp[swc[j].wr_id]--; tot_scredit--; } } else if (sne < 0) { fprintf(stderr, "Poll send CQ failed ne=%d\n",sne); return_value = FAILURE; goto cleaning; } } if (ibv_post_send(ctx->qp[wc_id],&ctx->ctrl_wr[wc_id],&bad_wr)) { fprintf(stderr,"Couldn't post send qp %d credit = %lu\n", wc_id,rcnt_for_qp[wc_id]); return_value = FAILURE; goto cleaning; } scredit_for_qp[wc_id]++; tot_scredit++; } } } } } while (ne > 0); if (ne < 0) { fprintf(stderr, "Poll Receive CQ failed %d\n", ne); return_value = FAILURE; goto cleaning; } else if (ne == 0) { if (check_alive_data.to_exit) { user_param->check_alive_exited = 1; return_value = FAILURE; goto cleaning; } } } if (user_param->test_type == ITERATIONS) user_param->tcompleted[0] = get_cycles(); cleaning: if (ctx->send_rcredit) { if (clean_scq_credit(tot_scredit, ctx, user_param)) return_value = FAILURE; } check_alive_data.last_totrcnt=0; free(wc); free(rcnt_for_qp); free(swc); free(scredit_for_qp); return return_value; } /****************************************************************************** * ******************************************************************************/ /* int run_iter_bw_infinitely(struct pingpong_context *ctx,struct perftest_parameters *user_param) */ /* { */ /* uint64_t totscnt = 0; */ /* uint64_t totccnt = 0; */ /* int i = 0; */ /* int index = 0,ne; */ /* int err = 0; */ /* int wc_id; */ /* #ifdef HAVE_VERBS_EXP */ /* struct ibv_exp_send_wr *bad_exp_wr = NULL; */ /* #endif */ /* uint64_t *scnt_for_qp = NULL; */ /* struct ibv_send_wr *bad_wr = NULL; */ /* struct ibv_wc *wc = NULL; */ /* int num_of_qps = user_param->num_of_qps; */ /* int return_value = 0; */ /* int single_thread_handler; */ /* ALLOCATE(wc ,struct ibv_wc ,CTX_POLL_BATCH); */ /* ALLOCATE(scnt_for_qp,uint64_t,user_param->num_of_qps); */ /* memset(scnt_for_qp,0,sizeof(uint64_t)*user_param->num_of_qps); */ /* duration_param=user_param; */ /* sigset_t set; */ /* sigemptyset(&set); */ /* sigaddset(&set, SIGALRM); */ /* single_thread_handler = pthread_sigmask(SIG_BLOCK, &set, NULL); */ /* if (single_thread_handler != 0){ */ /* printf("error when try to mask alram for signal to thread\n"); */ /* return FAILURE; */ /* } */ /* pthread_t print_thread; */ /* if (pthread_create(&print_thread, NULL, &handle_signal_print_thread,(void *)&set) != 0){ */ /* printf("Fail to create thread \n"); */ /* return FAILURE; */ /* } */ /* alarm(user_param->duration); */ /* user_param->iters = 0; */ /* [> Will be 0, in case of Duration (look at force_dependencies or in the exp above) <] */ /* if (user_param->duplex && (user_param->use_xrc || user_param->connection_type == DC)) */ /* num_of_qps /= 2; */ /* user_param->tposted[0] = get_cycles(); */ /* [> main loop for posting <] */ /* while (1) { */ /* [> main loop to run over all the qps and post each time n messages <] */ /* for (index =0 ; index < num_of_qps ; index++) { */ /* while ((ctx->scnt[index] - ctx->ccnt[index]) < user_param->tx_depth) { */ /* if (ctx->send_rcredit) { */ /* uint32_t swindow = scnt_for_qp[index] + user_param->post_list - ctx->credit_buf[index]; */ /* if (swindow >= user_param->rx_depth) */ /* break; */ /* } */ /* if (user_param->post_list == 1 && (ctx->scnt[index] % user_param->cq_mod == 0 && user_param->cq_mod > 1)) { */ /* #ifdef HAVE_VERBS_EXP */ /* #ifdef HAVE_ACCL_VERBS */ /* if (user_param->verb_type == ACCL_INTF) */ /* ctx->exp_wr[index].exp_send_flags &= ~IBV_EXP_QP_BURST_SIGNALED; */ /* else { */ /* #endif */ /* if (user_param->use_exp == 1) */ /* ctx->exp_wr[index].exp_send_flags &= ~IBV_EXP_SEND_SIGNALED; */ /* else */ /* #endif */ /* ctx->wr[index].send_flags &= ~IBV_SEND_SIGNALED; */ /* #ifdef HAVE_ACCL_VERBS */ /* } */ /* #endif */ /* } */ /* #ifdef HAVE_VERBS_EXP */ /* if (user_param->use_exp == 1) */ /* err = (ctx->exp_post_send_func_pointer)(ctx->qp[index],&ctx->exp_wr[index*user_param->post_list],&bad_exp_wr); */ /* else */ /* err = (ctx->post_send_func_pointer)(ctx->qp[index],&ctx->wr[index*user_param->post_list],&bad_wr); */ /* #else */ /* err = ibv_post_send(ctx->qp[index],&ctx->wr[index*user_param->post_list],&bad_wr); */ /* #endif */ /* if (err) { */ /* fprintf(stderr,"Couldn't post send: %d scnt=%lu \n",index,ctx->scnt[index]); */ /* return_value = FAILURE; */ /* goto cleaning; */ /* } */ /* ctx->scnt[index] += user_param->post_list; */ /* scnt_for_qp[index] += user_param->post_list; */ /* totscnt += user_param->post_list; */ /* [> ask for completion on this wr <] */ /* if (user_param->post_list == 1 && */ /* (ctx->scnt[index]%user_param->cq_mod == user_param->cq_mod - 1 || */ /* (user_param->test_type == ITERATIONS && ctx->scnt[index] == user_param->iters - 1))) { */ /* #ifdef HAVE_VERBS_EXP */ /* #ifdef HAVE_ACCL_VERBS */ /* if (user_param->verb_type == ACCL_INTF) */ /* ctx->exp_wr[index].exp_send_flags |= IBV_EXP_QP_BURST_SIGNALED; */ /* else { */ /* #endif */ /* if (user_param->use_exp == 1) */ /* ctx->exp_wr[index].exp_send_flags |= IBV_EXP_SEND_SIGNALED; */ /* else */ /* #endif */ /* ctx->wr[index].send_flags |= IBV_SEND_SIGNALED; */ /* #ifdef HAVE_ACCL_VERBS */ /* } */ /* #endif */ /* } */ /* } */ /* } */ /* if (totccnt < totscnt) { */ /* ne = ibv_poll_cq(ctx->send_cq,CTX_POLL_BATCH,wc); */ /* if (ne > 0) { */ /* for (i = 0; i < ne; i++) { */ /* if (wc[i].status != IBV_WC_SUCCESS) { */ /* NOTIFY_COMP_ERROR_SEND(wc[i],ctx->scnt[(int)wc[i].wr_id],ctx->scnt[(int)wc[i].wr_id]); */ /* return_value = FAILURE; */ /* goto cleaning; */ /* } */ /* wc_id = (user_param->verb_type == ACCL_INTF) ? */ /* 0 : (int)wc[i].wr_id; */ /* user_param->iters += user_param->cq_mod; */ /* totccnt += user_param->cq_mod; */ /* ctx->ccnt[wc_id] += user_param->cq_mod; */ /* } */ /* } else if (ne < 0) { */ /* fprintf(stderr, "poll CQ failed %d\n",ne); */ /* return_value = FAILURE; */ /* goto cleaning; */ /* } */ /* } */ /* } */ /* cleaning: */ /* free(scnt_for_qp); */ /* free(wc); */ /* return return_value; */ /* } */ /****************************************************************************** * ******************************************************************************/ int run_iter_bw_infinitely_server(struct pingpong_context *ctx, struct perftest_parameters *user_param) { int i,ne; struct ibv_wc *wc = NULL; struct ibv_wc *swc = NULL; struct ibv_recv_wr *bad_wr_recv = NULL; uint64_t *rcnt_for_qp = NULL; uint64_t *ccnt_for_qp = NULL; int *scredit_for_qp = NULL; int return_value = 0; ALLOCATE(wc ,struct ibv_wc ,CTX_POLL_BATCH); ALLOCATE(swc ,struct ibv_wc ,user_param->tx_depth); ALLOCATE(rcnt_for_qp,uint64_t,user_param->num_of_qps); memset(rcnt_for_qp,0,sizeof(uint64_t)*user_param->num_of_qps); ALLOCATE(ccnt_for_qp,uint64_t,user_param->num_of_qps); memset(ccnt_for_qp,0,sizeof(uint64_t)*user_param->num_of_qps); ALLOCATE(scredit_for_qp,int,user_param->num_of_qps); memset(scredit_for_qp,0,sizeof(int)*user_param->num_of_qps); while (1) { ne = ibv_poll_cq(ctx->recv_cq,CTX_POLL_BATCH,wc); if (ne > 0) { for (i = 0; i < ne; i++) { if (wc[i].status != IBV_WC_SUCCESS) { fprintf(stderr,"A completion with Error in run_infinitely_bw_server function"); return_value = FAILURE; goto cleaning; } if (user_param->use_srq) { if (ibv_post_srq_recv(ctx->srq, &ctx->rwr[wc[i].wr_id],&bad_wr_recv)) { fprintf(stderr, "Couldn't post recv SRQ. QP = %d:\n",(int)wc[i].wr_id); return_value = FAILURE; goto cleaning; } } else { if (ibv_post_recv(ctx->qp[wc[i].wr_id],&ctx->rwr[wc[i].wr_id],&bad_wr_recv)) { fprintf(stderr, "Couldn't post recv Qp=%d\n",(int)wc[i].wr_id); return_value = 15; goto cleaning; } if (ctx->send_rcredit) { rcnt_for_qp[wc[i].wr_id]++; scredit_for_qp[wc[i].wr_id]++; if (scredit_for_qp[wc[i].wr_id] == ctx->credit_cnt) { struct ibv_send_wr *bad_wr = NULL; ctx->ctrl_buf[wc[i].wr_id] = rcnt_for_qp[wc[i].wr_id]; while (ccnt_for_qp[wc[i].wr_id] == user_param->tx_depth) { int sne, j = 0; sne = ibv_poll_cq(ctx->send_cq,user_param->tx_depth,swc); if (sne > 0) { for (j = 0; j < sne; j++) { if (swc[j].status != IBV_WC_SUCCESS) { fprintf(stderr, "Poll send CQ error status=%u qp %d credit=%lu scredit=%lu\n", swc[j].status,(int)swc[j].wr_id, rcnt_for_qp[swc[j].wr_id],ccnt_for_qp[swc[j].wr_id]); return_value = FAILURE; goto cleaning; } ccnt_for_qp[swc[j].wr_id]--; } } else if (sne < 0) { fprintf(stderr, "Poll send CQ failed ne=%d\n",sne); return_value = FAILURE; goto cleaning; } } if (ibv_post_send(ctx->qp[wc[i].wr_id],&ctx->ctrl_wr[wc[i].wr_id],&bad_wr)) { fprintf(stderr,"Couldn't post send qp %d credit=%lu\n", (int)wc[i].wr_id,rcnt_for_qp[wc[i].wr_id]); return_value = FAILURE; goto cleaning; } ccnt_for_qp[wc[i].wr_id]++; scredit_for_qp[wc[i].wr_id] = 0; } } } } } else if (ne < 0) { fprintf(stderr, "Poll Receive CQ failed %d\n", ne); return_value = FAILURE; goto cleaning; } } cleaning: free(wc); free(swc); free(rcnt_for_qp); free(ccnt_for_qp); free(scredit_for_qp); return return_value; } /****************************************************************************** * ******************************************************************************/ int run_iter_bi(struct pingpong_context *ctx, struct perftest_parameters *user_param) { uint64_t totscnt = 0; uint64_t totccnt = 0; uint64_t totrcnt = 0; int i,index = 0; int ne = 0; int err = 0; uint64_t *rcnt_for_qp = NULL; uint64_t tot_iters = 0; uint64_t iters = 0; int tot_scredit = 0; int *scredit_for_qp = NULL; struct ibv_wc *wc = NULL; struct ibv_wc *wc_tx = NULL; struct ibv_recv_wr *bad_wr_recv = NULL; #ifdef HAVE_VERBS_EXP struct ibv_exp_send_wr *bad_exp_wr = NULL; #endif struct ibv_send_wr *bad_wr = NULL; int num_of_qps = user_param->num_of_qps; /* This is to ensure SERVER will not start to send packets before CLIENT start the test. */ int before_first_rx = ON; int size_per_qp = (user_param->use_srq) ? user_param->rx_depth/user_param->num_of_qps : user_param->rx_depth; int return_value = 0; ALLOCATE(wc_tx,struct ibv_wc,CTX_POLL_BATCH); ALLOCATE(rcnt_for_qp,uint64_t,user_param->num_of_qps); ALLOCATE(scredit_for_qp,int,user_param->num_of_qps); ALLOCATE(wc,struct ibv_wc,user_param->rx_depth); memset(rcnt_for_qp,0,sizeof(uint64_t)*user_param->num_of_qps); memset(scredit_for_qp,0,sizeof(int)*user_param->num_of_qps); if (user_param->noPeak == ON) user_param->tposted[0] = get_cycles(); /* This is a very important point. Since this function do RX and TX in the same time, we need to give some priority to RX to avoid deadlock in UC/UD test scenarios (Recv WQEs depleted due to fast TX) */ if (user_param->machine == CLIENT) { before_first_rx = OFF; /* if (user_param->test_type == DURATION) { */ /* duration_param=user_param; */ /* user_param->iters=0; */ /* duration_param->state = START_STATE; */ /* signal(SIGALRM, catch_alarm); */ /* if (user_param->margin > 0 ) */ /* alarm(user_param->margin); */ /* else */ /* catch_alarm(0); [> move to next state <] */ /* } */ } if (user_param->test_type == ITERATIONS) { check_alive_data.is_events = user_param->use_event; signal(SIGALRM, check_alive); alarm(60); // TODO } if(user_param->duplex && (user_param->use_xrc || user_param->connection_type == DC)) num_of_qps /= 2; tot_iters = (uint64_t)user_param->iters*num_of_qps; iters=user_param->iters; check_alive_data.g_total_iters = tot_iters; while ((user_param->test_type == DURATION && user_param->state != END_STATE) || totccnt < tot_iters || totrcnt < tot_iters ) { for (index=0; index < num_of_qps; index++) { while (before_first_rx == OFF && (ctx->scnt[index] < iters || user_param->test_type == DURATION) && ((ctx->scnt[index] + scredit_for_qp[index] - ctx->ccnt[index]) < user_param->tx_depth)) { if (ctx->send_rcredit) { uint32_t swindow = ctx->scnt[index] + user_param->post_list - ctx->credit_buf[index]; if (swindow >= user_param->rx_depth) break; } if (user_param->post_list == 1 && (ctx->scnt[index] % user_param->cq_mod == 0 && user_param->cq_mod > 1) && !(ctx->scnt[index] == (user_param->iters - 1) && user_param->test_type == ITERATIONS)) { #ifdef HAVE_VERBS_EXP if (user_param->use_exp ==1) ctx->exp_wr[index].exp_send_flags &= ~IBV_EXP_SEND_SIGNALED; else #endif ctx->wr[index].send_flags &= ~IBV_SEND_SIGNALED; } if (user_param->noPeak == OFF) user_param->tposted[totscnt] = get_cycles(); if (user_param->test_type == DURATION && duration_param->state == END_STATE) break; #ifdef HAVE_VERBS_EXP if (user_param->use_exp == 1) err = (ctx->exp_post_send_func_pointer)(ctx->qp[index], &ctx->exp_wr[index*user_param->post_list],&bad_exp_wr); else err = (ctx->post_send_func_pointer)(ctx->qp[index], &ctx->wr[index*user_param->post_list],&bad_wr); #else err = ibv_post_send(ctx->qp[index],&ctx->wr[index*user_param->post_list],&bad_wr); #endif if (err) { fprintf(stderr,"Couldn't post send: qp %d scnt=%lu \n",index,ctx->scnt[index]); return_value = FAILURE; goto cleaning; } if (user_param->post_list == 1 && user_param->size <= (ctx->cycle_buffer / 2)) { #ifdef HAVE_VERBS_EXP if (user_param->use_exp == 1) increase_loc_addr(ctx->exp_wr[index].sg_list,user_param->size,ctx->scnt[index], ctx->my_addr[index],0,ctx->cache_line_size,ctx->cycle_buffer); else #endif increase_loc_addr(ctx->wr[index].sg_list,user_param->size,ctx->scnt[index], ctx->my_addr[index],0,ctx->cache_line_size,ctx->cycle_buffer); } ctx->scnt[index] += user_param->post_list; totscnt += user_param->post_list; if (user_param->post_list == 1 && (ctx->scnt[index]%user_param->cq_mod == user_param->cq_mod - 1 || (user_param->test_type == ITERATIONS && ctx->scnt[index] == iters-1))) { #ifdef HAVE_VERBS_EXP if (user_param->use_exp == 1) ctx->exp_wr[index].exp_send_flags |= IBV_EXP_SEND_SIGNALED; else #endif ctx->wr[index].send_flags |= IBV_SEND_SIGNALED; } } } if (user_param->use_event) { if (ctx_notify_events(ctx->channel)) { fprintf(stderr,"Failed to notify events to CQ"); return_value = FAILURE; goto cleaning; } } ne = ibv_poll_cq(ctx->recv_cq,user_param->rx_depth,wc); if (ne > 0) { if (user_param->machine == SERVER && before_first_rx == ON) { before_first_rx = OFF; /* if (user_param->test_type == DURATION) { */ /* duration_param=user_param; */ /* user_param->iters=0; */ /* duration_param->state = START_STATE; */ /* signal(SIGALRM, catch_alarm); */ /* if (user_param->margin > 0 ) */ /* alarm(user_param->margin); */ /* else */ /* catch_alarm(0); [> move to next state <] */ /* } */ } for (i = 0; i < ne; i++) { if (wc[i].status != IBV_WC_SUCCESS) { NOTIFY_COMP_ERROR_RECV(wc[i],totrcnt); return_value = FAILURE; goto cleaning; } rcnt_for_qp[wc[i].wr_id]++; totrcnt++; check_alive_data.current_totrcnt = totrcnt; if (user_param->test_type==DURATION && user_param->state == SAMPLE_STATE) { if (user_param->report_per_port) { user_param->iters_per_port[user_param->port_by_qp[(int)wc[i].wr_id]]++; } user_param->iters++; } if (user_param->test_type==DURATION || rcnt_for_qp[wc[i].wr_id] + size_per_qp <= user_param->iters) { if (user_param->use_srq) { if (ibv_post_srq_recv(ctx->srq, &ctx->rwr[wc[i].wr_id],&bad_wr_recv)) { fprintf(stderr, "Couldn't post recv SRQ. QP = %d: counter=%d\n",(int)wc[i].wr_id,(int)totrcnt); return_value = FAILURE; goto cleaning; } } else { if (ibv_post_recv(ctx->qp[wc[i].wr_id],&ctx->rwr[wc[i].wr_id],&bad_wr_recv)) { fprintf(stderr, "Couldn't post recv Qp=%d rcnt=%lu\n",(int)wc[i].wr_id,rcnt_for_qp[wc[i].wr_id]); return_value = 15; goto cleaning; } } if (SIZE(user_param->connection_type,user_param->size,!(int)user_param->machine) <= (ctx->cycle_buffer / 2)) { increase_loc_addr(ctx->rwr[wc[i].wr_id].sg_list, user_param->size, rcnt_for_qp[wc[i].wr_id] + size_per_qp -1, ctx->rx_buffer_addr[wc[i].wr_id],user_param->connection_type, ctx->cache_line_size,ctx->cycle_buffer); } } if (ctx->send_rcredit) { int credit_cnt = rcnt_for_qp[wc[i].wr_id]%user_param->rx_depth; if (credit_cnt%ctx->credit_cnt == 0) { int sne = 0; struct ibv_wc credit_wc; struct ibv_send_wr *bad_wr = NULL; ctx->ctrl_buf[wc[i].wr_id] = rcnt_for_qp[wc[i].wr_id]; while ((ctx->scnt[wc[i].wr_id] + scredit_for_qp[wc[i].wr_id] - ctx->ccnt[wc[i].wr_id]) >= user_param->tx_depth) { sne = ibv_poll_cq(ctx->send_cq, 1, &credit_wc); if (sne > 0) { if (credit_wc.status != IBV_WC_SUCCESS) { fprintf(stderr, "Poll send CQ error status=%u qp %d credit=%lu scredit=%d\n", credit_wc.status,(int)credit_wc.wr_id, rcnt_for_qp[credit_wc.wr_id],scredit_for_qp[credit_wc.wr_id]); return_value = FAILURE; goto cleaning; } if (credit_wc.opcode == IBV_WC_RDMA_WRITE) { scredit_for_qp[credit_wc.wr_id]--; tot_scredit--; } else { totccnt += user_param->cq_mod; ctx->ccnt[(int)credit_wc.wr_id] += user_param->cq_mod; if (user_param->noPeak == OFF) { if ((user_param->test_type == ITERATIONS && (totccnt >= tot_iters - 1))) user_param->tcompleted[tot_iters - 1] = get_cycles(); else user_param->tcompleted[totccnt-1] = get_cycles(); } if (user_param->test_type==DURATION && user_param->state == SAMPLE_STATE) user_param->iters += user_param->cq_mod; } } else if (sne < 0) { fprintf(stderr, "Poll send CQ ne=%d\n",sne); return_value = FAILURE; goto cleaning; } } if (ibv_post_send(ctx->qp[wc[i].wr_id],&ctx->ctrl_wr[wc[i].wr_id],&bad_wr)) { fprintf(stderr,"Couldn't post send: qp%lu credit=%lu\n",wc[i].wr_id,rcnt_for_qp[wc[i].wr_id]); return_value = FAILURE; goto cleaning; } scredit_for_qp[wc[i].wr_id]++; tot_scredit++; } } } } else if (ne < 0) { fprintf(stderr, "poll CQ failed %d\n", ne); return_value = FAILURE; goto cleaning; } else if (ne == 0) { if (check_alive_data.to_exit) { user_param->check_alive_exited = 1; return_value = FAILURE; goto cleaning; } } ne = ibv_poll_cq(ctx->send_cq,CTX_POLL_BATCH,wc_tx); if (ne > 0) { for (i = 0; i < ne; i++) { if (wc_tx[i].status != IBV_WC_SUCCESS) { NOTIFY_COMP_ERROR_SEND(wc_tx[i],totscnt,totccnt); return_value = FAILURE; goto cleaning; } if (wc_tx[i].opcode == IBV_WC_RDMA_WRITE) { if (!ctx->send_rcredit) { fprintf(stderr, "Polled RDMA_WRITE completion without recv credit request\n"); return_value = FAILURE; goto cleaning; } scredit_for_qp[wc_tx[i].wr_id]--; tot_scredit--; } else { totccnt += user_param->cq_mod; ctx->ccnt[(int)wc_tx[i].wr_id] += user_param->cq_mod; if (user_param->noPeak == OFF) { if ((user_param->test_type == ITERATIONS && (totccnt >= tot_iters - 1))) user_param->tcompleted[tot_iters - 1] = get_cycles(); else user_param->tcompleted[totccnt-1] = get_cycles(); } if (user_param->test_type==DURATION && user_param->state == SAMPLE_STATE) { if (user_param->report_per_port) { user_param->iters_per_port[user_param->port_by_qp[(int)wc[i].wr_id]] += user_param->cq_mod; } user_param->iters += user_param->cq_mod; } } } } else if (ne < 0) { fprintf(stderr, "poll CQ failed %d\n", ne); return_value = FAILURE; goto cleaning; } } if (user_param->noPeak == ON && user_param->test_type == ITERATIONS) { user_param->tcompleted[0] = get_cycles(); } if (ctx->send_rcredit) { if (clean_scq_credit(tot_scredit, ctx, user_param)) { return_value = FAILURE; goto cleaning; } } cleaning: check_alive_data.last_totrcnt=0; free(rcnt_for_qp); free(scredit_for_qp); free(wc); free(wc_tx); return return_value; } /****************************************************************************** * ******************************************************************************/ int run_iter_lat_write(struct pingpong_context *ctx,struct perftest_parameters *user_param) { uint64_t scnt = 0; uint64_t ccnt = 0; uint64_t rcnt = 0; int ne; int err = 0; int poll_buf_offset = 0; volatile char *poll_buf = NULL; volatile char *post_buf = NULL; #ifdef HAVE_VERBS_EXP struct ibv_exp_send_wr *bad_exp_wr = NULL; #endif struct ibv_send_wr *bad_wr = NULL; struct ibv_wc wc; int cpu_mhz = get_cpu_mhz(user_param->cpu_freq_f); int total_gap_cycles = user_param->latency_gap * cpu_mhz; cycles_t end_cycle, start_gap=0; #ifdef HAVE_VERBS_EXP if (user_param->use_exp == 1) { ctx->exp_wr[0].sg_list->length = user_param->size; ctx->exp_wr[0].exp_send_flags = IBV_EXP_SEND_SIGNALED; if (user_param->size <= user_param->inline_size) ctx->exp_wr[0].exp_send_flags |= IBV_EXP_SEND_INLINE; } else { #endif ctx->wr[0].sg_list->length = user_param->size; ctx->wr[0].send_flags = IBV_SEND_SIGNALED; if (user_param->size <= user_param->inline_size) ctx->wr[0].send_flags |= IBV_SEND_INLINE; #ifdef HAVE_VERBS_EXP } #endif if((user_param->use_xrc || user_param->connection_type == DC)) poll_buf_offset = 1; post_buf = (char*)ctx->buf[0] + user_param->size - 1; poll_buf = (char*)ctx->buf[0] + (user_param->num_of_qps + poll_buf_offset)*BUFF_SIZE(ctx->size, ctx->cycle_buffer) + user_param->size - 1; /* Duration support in latency tests. */ /* if (user_param->test_type == DURATION) { */ /* duration_param=user_param; */ /* duration_param->state = START_STATE; */ /* signal(SIGALRM, catch_alarm); */ /* user_param->iters = 0; */ /* if (user_param->margin > 0) */ /* alarm(user_param->margin); */ /* else */ /* catch_alarm(0); */ /* } */ /* Done with setup. Start the test. */ while (scnt < user_param->iters || ccnt < user_param->iters || rcnt < user_param->iters || ((user_param->test_type == DURATION && user_param->state != END_STATE))) { if ((rcnt < user_param->iters || user_param->test_type == DURATION) && !(scnt < 1 && user_param->machine == SERVER)) { rcnt++; while (*poll_buf != (char)rcnt && user_param->state != END_STATE); } if (scnt < user_param->iters || user_param->test_type == DURATION) { if (user_param->latency_gap) { start_gap = get_cycles(); end_cycle = start_gap + total_gap_cycles; while (get_cycles() < end_cycle) { continue; } } if (user_param->test_type == ITERATIONS) user_param->tposted[scnt] = get_cycles(); *post_buf = (char)++scnt; #ifdef HAVE_VERBS_EXP if (user_param->use_exp == 1) err = (ctx->exp_post_send_func_pointer)(ctx->qp[0],&ctx->exp_wr[0],&bad_exp_wr); else err = (ctx->post_send_func_pointer)(ctx->qp[0],&ctx->wr[0],&bad_wr); #else err = ibv_post_send(ctx->qp[0],&ctx->wr[0],&bad_wr); #endif if (err) { fprintf(stderr,"Couldn't post send: scnt=%lu\n",scnt); return 1; } } if (user_param->test_type == DURATION && user_param->state == END_STATE) break; if (ccnt < user_param->iters || user_param->test_type == DURATION) { do { ne = ibv_poll_cq(ctx->send_cq, 1, &wc); } while (ne == 0); if(ne > 0) { if (wc.status != IBV_WC_SUCCESS) { NOTIFY_COMP_ERROR_SEND(wc,scnt,ccnt); return 1; } ccnt++; if (user_param->test_type==DURATION && user_param->state == SAMPLE_STATE) user_param->iters++; } else if (ne < 0) { fprintf(stderr, "poll CQ failed %d\n", ne); return FAILURE; } } } return 0; } /****************************************************************************** * ******************************************************************************/ int run_iter_lat(struct pingpong_context *ctx,struct perftest_parameters *user_param) { uint64_t scnt = 0; int ne; int err = 0; #ifdef HAVE_VERBS_EXP struct ibv_exp_send_wr *bad_exp_wr = NULL; #endif struct ibv_send_wr *bad_wr = NULL; struct ibv_wc wc; int cpu_mhz = get_cpu_mhz(user_param->cpu_freq_f); int total_gap_cycles = user_param->latency_gap * cpu_mhz; cycles_t end_cycle, start_gap=0; #ifdef HAVE_VERBS_EXP if (user_param->use_exp == 1) { ctx->exp_wr[0].sg_list->length = user_param->size; ctx->exp_wr[0].exp_send_flags = IBV_EXP_SEND_SIGNALED; } else { #endif ctx->wr[0].sg_list->length = user_param->size; ctx->wr[0].send_flags = IBV_SEND_SIGNALED; #ifdef HAVE_VERBS_EXP } #endif /* Duration support in latency tests. */ /* if (user_param->test_type == DURATION) { */ /* duration_param=user_param; */ /* duration_param->state = START_STATE; */ /* signal(SIGALRM, catch_alarm); */ /* user_param->iters = 0; */ /* if (user_param->margin > 0) */ /* alarm(user_param->margin); */ /* else */ /* catch_alarm(0); */ /* } */ while (scnt < user_param->iters || (user_param->test_type == DURATION && user_param->state != END_STATE)) { if (user_param->latency_gap) { start_gap = get_cycles(); end_cycle = start_gap + total_gap_cycles; while (get_cycles() < end_cycle) { continue; } } if (user_param->test_type == ITERATIONS) user_param->tposted[scnt++] = get_cycles(); #ifdef HAVE_VERBS_EXP if (user_param->use_exp == 1) err = (ctx->exp_post_send_func_pointer)(ctx->qp[0],&ctx->exp_wr[0],&bad_exp_wr); else err = (ctx->post_send_func_pointer)(ctx->qp[0],&ctx->wr[0],&bad_wr); #else err = ibv_post_send(ctx->qp[0],&ctx->wr[0],&bad_wr); #endif if (err) { fprintf(stderr,"Couldn't post send: scnt=%lu\n",scnt); return 1; } if (user_param->test_type == DURATION && user_param->state == END_STATE) break; if (user_param->use_event) { if (ctx_notify_events(ctx->channel)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } } do { ne = ibv_poll_cq(ctx->send_cq, 1, &wc); if(ne > 0) { if (wc.status != IBV_WC_SUCCESS) { NOTIFY_COMP_ERROR_SEND(wc,scnt,scnt); return 1; } if (user_param->test_type==DURATION && user_param->state == SAMPLE_STATE) user_param->iters++; } else if (ne < 0) { fprintf(stderr, "poll CQ failed %d\n", ne); return FAILURE; } } while (!user_param->use_event && ne == 0); } return 0; } /****************************************************************************** * ******************************************************************************/ int run_iter_lat_send(struct pingpong_context *ctx,struct perftest_parameters *user_param) { uint64_t scnt = 0; /* sent packets counter */ uint64_t rcnt = 0; /* received packets counter */ int poll = 0; int ne; int err = 0; struct ibv_wc wc; struct ibv_recv_wr *bad_wr_recv; #ifdef HAVE_VERBS_EXP struct ibv_exp_send_wr *bad_exp_wr; #endif struct ibv_send_wr *bad_wr; int firstRx = 1; int size_per_qp = (user_param->use_srq) ? user_param->rx_depth/user_param->num_of_qps : user_param->rx_depth; int cpu_mhz = get_cpu_mhz(user_param->cpu_freq_f); int total_gap_cycles = user_param->latency_gap * cpu_mhz; int send_flows_index = 0; int recv_flows_index = 0; cycles_t end_cycle, start_gap=0; uintptr_t primary_send_addr = ctx->sge_list[0].addr; uintptr_t primary_recv_addr = ctx->recv_sge_list[0].addr; if (user_param->connection_type != RawEth) { #ifdef HAVE_VERBS_EXP if (user_param->use_exp == 1) { ctx->exp_wr[0].sg_list->length = user_param->size; ctx->exp_wr[0].exp_send_flags = 0; } else { #endif ctx->wr[0].sg_list->length = user_param->size; ctx->wr[0].send_flags = 0; #ifdef HAVE_VERBS_EXP } #endif } if (user_param->size <= user_param->inline_size) { #ifdef HAVE_VERBS_EXP if (user_param->use_exp == 1) ctx->exp_wr[0].exp_send_flags |= IBV_EXP_SEND_INLINE; else #endif ctx->wr[0].send_flags |= IBV_SEND_INLINE; } while (scnt < user_param->iters || rcnt < user_param->iters || ( (user_param->test_type == DURATION && user_param->state != END_STATE))) { /* * Get the received packet. make sure that the client won't enter here until he sends * his first packet (scnt < 1) * server will enter here first and wait for a packet to arrive (from the client) */ if ((rcnt < user_param->iters || user_param->test_type == DURATION) && !(scnt < 1 && user_param->machine == CLIENT)) { if (user_param->use_event) { if (ctx_notify_events(ctx->channel)) { fprintf(stderr , " Failed to notify events to CQ"); return 1; } } do { ne = ibv_poll_cq(ctx->recv_cq,1,&wc); if (user_param->test_type == DURATION && user_param->state == END_STATE) break; if (ne > 0) { if (firstRx) { set_on_first_rx_packet(user_param); firstRx = 0; } if (wc.status != IBV_WC_SUCCESS) { NOTIFY_COMP_ERROR_RECV(wc,rcnt); return 1; } rcnt++; if (user_param->test_type == DURATION && user_param->state == SAMPLE_STATE) user_param->iters++; /*if we're in duration mode or there * is enough space in the rx_depth, * post that you received a packet. */ if (user_param->test_type == DURATION || (rcnt + size_per_qp <= user_param->iters)) { if (user_param->use_srq) { if (ibv_post_srq_recv(ctx->srq, &ctx->rwr[wc.wr_id], &bad_wr_recv)) { fprintf(stderr, "Couldn't post recv SRQ. QP = %d: counter=%lu\n",(int)wc.wr_id, rcnt); return 1; } } else { if (ibv_post_recv(ctx->qp[wc.wr_id], &ctx->rwr[wc.wr_id], &bad_wr_recv)) { fprintf(stderr, "Couldn't post recv: rcnt=%lu\n", rcnt); return 15; } } if (user_param->flows != DEF_FLOWS) { if (++recv_flows_index == user_param->flows) { recv_flows_index = 0; ctx->recv_sge_list[0].addr = primary_recv_addr; } else { ctx->recv_sge_list[0].addr += INC(user_param->size, ctx->cache_line_size); } } } } else if (ne < 0) { fprintf(stderr, "poll CQ failed %d\n", ne); return 1; } } while (!user_param->use_event && ne == 0); } if (scnt < user_param->iters || (user_param->test_type == DURATION && user_param->state != END_STATE)) { if (user_param->latency_gap) { start_gap = get_cycles(); end_cycle = start_gap + total_gap_cycles; while (get_cycles() < end_cycle) { continue; } } if (user_param->test_type == ITERATIONS) user_param->tposted[scnt] = get_cycles(); scnt++; if (scnt % user_param->cq_mod == 0 || (user_param->test_type == ITERATIONS && scnt == user_param->iters)) { poll = 1; #ifdef HAVE_VERBS_EXP if (user_param->use_exp == 1) ctx->exp_wr[0].exp_send_flags |= IBV_EXP_SEND_SIGNALED; else #endif ctx->wr[0].send_flags |= IBV_SEND_SIGNALED; } /* if we're in duration mode and the time is over, exit from this function */ if (user_param->test_type == DURATION && user_param->state == END_STATE) break; /* send the packet that's in index 0 on the buffer */ #ifdef HAVE_VERBS_EXP if (user_param->use_exp == 1) err = (ctx->exp_post_send_func_pointer)(ctx->qp[0],&ctx->exp_wr[0],&bad_exp_wr); else err = (ctx->post_send_func_pointer)(ctx->qp[0],&ctx->wr[0],&bad_wr); #else err = ibv_post_send(ctx->qp[0],&ctx->wr[0],&bad_wr); #endif if (err) { fprintf(stderr,"Couldn't post send: scnt=%lu \n",scnt); return 1; } if (user_param->flows != DEF_FLOWS) { if (++send_flows_index == user_param->flows) { send_flows_index = 0; ctx->sge_list[0].addr = primary_send_addr; } else { ctx->sge_list[0].addr = primary_send_addr + (ctx->flow_buff_size * send_flows_index); } } if (poll == 1) { struct ibv_wc s_wc; int s_ne; if (user_param->use_event) { if (ctx_notify_events(ctx->channel)) { fprintf(stderr , " Failed to notify events to CQ"); return FAILURE; } } /* wait until you get a cq for the last packet */ do { s_ne = ibv_poll_cq(ctx->send_cq, 1, &s_wc); } while (!user_param->use_event && s_ne == 0); if (s_ne < 0) { fprintf(stderr, "poll on Send CQ failed %d\n", s_ne); return FAILURE; } if (s_wc.status != IBV_WC_SUCCESS) { NOTIFY_COMP_ERROR_SEND(s_wc,scnt,scnt) return 1; } poll = 0; #ifdef HAVE_VERBS_EXP if (user_param->use_exp == 1) ctx->exp_wr[0].exp_send_flags &= ~IBV_EXP_SEND_SIGNALED; else #endif ctx->wr[0].send_flags &= ~IBV_SEND_SIGNALED; } } } return 0; } /****************************************************************************** *Server ******************************************************************************/ int run_iter_lat_burst_server(struct pingpong_context *ctx, struct perftest_parameters *user_param) { int i; int ne = 0; int err = 0; uint64_t scnt = 0; uint64_t rcnt = 0; uint64_t ccnt = 0; struct ibv_wc *wc = NULL; struct ibv_send_wr *bad_wr; struct ibv_recv_wr *bad_wr_recv = NULL; int wc_id; ALLOCATE(wc, struct ibv_wc, user_param->burst_size); /* main loop for polling */ while (rcnt < user_param->iters) { ne = ibv_poll_cq(ctx->recv_cq, user_param->burst_size, wc); if (ne > 0) { for (i = 0; i < ne; i++) { wc_id = (int)wc[i].wr_id; if (wc[i].status != IBV_WC_SUCCESS) { NOTIFY_COMP_ERROR_RECV(wc[i], rcnt); return FAILURE; } rcnt++; if (rcnt%user_param->reply_every == 0 && scnt - ccnt < user_param->tx_depth) { err = ibv_post_send(ctx->qp[0], &ctx->wr[0], &bad_wr); if (err) { fprintf(stderr, "Couldn't post send: scnt=%lu\n", scnt); return FAILURE; } scnt++; } if (ibv_post_recv(ctx->qp[wc_id], &ctx->rwr[wc_id], &bad_wr_recv)) { fprintf(stderr, "Couldn't post recv Qp=%d rcnt=%ld\n", wc_id, rcnt); return FAILURE; } } } else if (ne < 0) { fprintf(stderr, "poll CQ failed %d\n", ne); return FAILURE; } ne = ibv_poll_cq(ctx->send_cq, CTX_POLL_BATCH, wc); if (ne > 0) { for (i = 0; i < ne; i++) { if (wc[i].status != IBV_WC_SUCCESS) { NOTIFY_COMP_ERROR_SEND(wc[i], scnt, ccnt); return FAILURE; } ccnt++; } } else if (ne < 0) { fprintf(stderr, "poll CQ failed %d\n", ne); return FAILURE; } } free(wc); return SUCCESS; } /****************************************************************************** *Client ******************************************************************************/ int run_iter_lat_burst(struct pingpong_context *ctx, struct perftest_parameters *user_param) { uint64_t totscnt = 0; /* sent packets counter */ uint64_t totccnt = 0; /* complete sent packets counter */ uint64_t totrcnt = 0; /* received packets counter */ uint64_t tot_iters; uint64_t pong_cnt = 0; /* counts how many pongs arrived */ int ne, ns; int err = 0; int i = 0; int wc_id; struct ibv_wc *wc; #ifdef HAVE_VERBS_EXP struct ibv_exp_send_wr *bad_exp_wr; #endif struct ibv_send_wr *bad_wr; int cpu_mhz; int return_value = 0; /* Rate Limiter*/ int rate_limit_pps = 0; double gap_time = 0; /* in usec */ cycles_t gap_cycles = 0; /* in cycles */ cycles_t gap_deadline = 0; unsigned int number_of_bursts = 0; int burst_iter = 0; int is_sending_burst = 0; struct ibv_recv_wr *bad_wr_recv = NULL; ALLOCATE(wc, struct ibv_wc, user_param->burst_size); tot_iters = (uint64_t)user_param->iters; /* If using rate limiter, calculate gap time between bursts */ cpu_mhz = get_cpu_mhz(user_param->cpu_freq_f); if (cpu_mhz <= 0) { fprintf(stderr, "Failed: couldn't acquire cpu frequency for rate limiter.\n"); return_value = FAILURE; goto cleaning; } if (user_param->rate_limit > 0 ) { if (user_param->rate_limit_type == SW_RATE_LIMIT) { switch (user_param->rate_units) { case MEGA_BYTE_PS: rate_limit_pps = ((double)(user_param->rate_limit) / user_param->size) * 1048576; break; case GIGA_BIT_PS: rate_limit_pps = ((double)(user_param->rate_limit) / (user_param->size * 8)) * 1000000000; break; case PACKET_PS: rate_limit_pps = user_param->rate_limit; break; default: fprintf(stderr, " Failed: Unknown rate limit units\n"); return_value = FAILURE; goto cleaning; } number_of_bursts = rate_limit_pps / user_param->burst_size; gap_time = 1000000 * (1.0 / number_of_bursts); } } gap_cycles = cpu_mhz * gap_time; /* main loop for posting */ while (totrcnt < (totscnt / user_param->reply_every) || totccnt < tot_iters) { if (is_sending_burst == 0) { if (gap_deadline > get_cycles() && user_param->rate_limit_type == SW_RATE_LIMIT) { /* Go right to cq polling until gap time is over. */ goto polling; } gap_deadline = get_cycles() + gap_cycles; is_sending_burst = 1; burst_iter = 0; } while ((totscnt < user_param->iters) && (totscnt - totccnt) < (user_param->tx_depth) && !(is_sending_burst == 0 )) { #ifdef HAVE_VERBS_EXP if (user_param->use_exp == 1) err = (ctx->exp_post_send_func_pointer)(ctx->qp[0], &ctx->exp_wr[0], &bad_exp_wr); else err = (ctx->post_send_func_pointer)(ctx->qp[0],&ctx->wr[0],&bad_wr); #else err = ibv_post_send(ctx->qp[0],&ctx->wr[0],&bad_wr); #endif if (err) { fprintf(stderr, "Couldn't post send: scnt=%lu\n", totscnt); return 1; } if (user_param->post_list == 1 && user_param->size <= (ctx->cycle_buffer / 2)) { #ifdef HAVE_VERBS_EXP if (user_param->use_exp == 1) increase_loc_addr(ctx->exp_wr[0].sg_list, user_param->size, totscnt, ctx->my_addr[0], 0, ctx->cache_line_size, ctx->cycle_buffer); else #endif increase_loc_addr(ctx->wr[0].sg_list, user_param->size, totscnt, ctx->my_addr[0], 0, ctx->cache_line_size, ctx->cycle_buffer); } totscnt += user_param->post_list; if (totscnt % user_param->reply_every == 0 && totscnt != 0) { user_param->tposted[pong_cnt] = get_cycles(); pong_cnt++; } if (++burst_iter == user_param->burst_size) { is_sending_burst = 0; } } polling: do { ne = ibv_poll_cq(ctx->recv_cq, CTX_POLL_BATCH, wc); if (ne > 0) { for (i = 0; i < ne; i++) { wc_id = (user_param->verb_type == ACCL_INTF) ? 0 : (int)wc[i].wr_id; user_param->tcompleted[totrcnt] = get_cycles(); totrcnt++; if (wc[i].status != IBV_WC_SUCCESS) { NOTIFY_COMP_ERROR_SEND(wc[i], totscnt, totccnt); return_value = FAILURE; goto cleaning; } if (ibv_post_recv(ctx->qp[wc_id], &ctx->rwr[wc_id], &bad_wr_recv)) { fprintf(stderr, "Couldn't post recv Qp=%d rcnt=%ld\n", wc_id, totrcnt); return FAILURE; } } } else if (ne < 0) { fprintf(stderr, "poll CQ failed %d\n", ne); return_value = 1; goto cleaning; } ns = ibv_poll_cq(ctx->send_cq, user_param->burst_size, wc); if (ns > 0) { for (i = 0; i < ns; i++) { wc_id = (user_param->verb_type == ACCL_INTF) ? 0 : (int)wc[i].wr_id; if (wc[i].status != IBV_WC_SUCCESS) { NOTIFY_COMP_ERROR_SEND(wc[i], totscnt, totccnt); return_value = FAILURE ; goto cleaning; } totccnt += user_param->cq_mod; } } else if (ns < 0) { fprintf(stderr, "poll CQ failed %d\n", ne); return_value = 1; goto cleaning; } } while (ne != 0); } return SUCCESS; cleaning: free(wc); return return_value; } /****************************************************************************** * ******************************************************************************/ uint16_t ctx_get_local_lid(struct ibv_context *context,int port) { struct ibv_port_attr attr; if (ibv_query_port(context,port,&attr)) return 0; return attr.lid; } /****************************************************************************** * ******************************************************************************/ void catch_alarm(int sig) { switch (duration_param->state) { case START_STATE: duration_param->state = SAMPLE_STATE; get_cpu_stats(duration_param,1); duration_param->tposted[0] = get_cycles(); alarm(duration_param->duration - 2*(duration_param->margin)); break; case SAMPLE_STATE: duration_param->state = STOP_SAMPLE_STATE; duration_param->tcompleted[0] = get_cycles(); get_cpu_stats(duration_param,2); if (duration_param->margin > 0) alarm(duration_param->margin); else catch_alarm(0); break; case STOP_SAMPLE_STATE: duration_param->state = END_STATE; break; default: fprintf(stderr,"unknown state\n"); } } void check_alive(int sig) { if (check_alive_data.current_totrcnt > check_alive_data.last_totrcnt) { check_alive_data.last_totrcnt = check_alive_data.current_totrcnt; alarm(60); } else if (check_alive_data.current_totrcnt == check_alive_data.last_totrcnt && check_alive_data.current_totrcnt < check_alive_data.g_total_iters) { fprintf(stderr," Did not get Message for 120 Seconds, exiting..\n Total Received=%d, Total Iters Required=%d\n",check_alive_data.current_totrcnt, check_alive_data.g_total_iters); if (check_alive_data.is_events) { /* Can't report BW, as we are stuck in event_loop */ fprintf(stderr," Due to this issue, Perftest cannot produce a report when in event mode.\n"); exit(FAILURE); } else { /* exit nice from run_iter function and report known bw/mr */ check_alive_data.to_exit = 1; } } } /****************************************************************************** * ******************************************************************************/ void catch_alarm_infintely() { print_report_bw(duration_param,NULL); duration_param->iters = 0; alarm(duration_param->duration); duration_param->tposted[0] = get_cycles(); } /****************************************************************************** * ******************************************************************************/ void *handle_signal_print_thread(void *sigmask) { sigset_t *set = (sigset_t*)sigmask; int rc; int sig_caught; while(1){ rc = sigwait(set, &sig_caught); if (rc != 0){ printf("Error when try to wait for SIGALRM\n"); exit(EXIT_FAILURE); } if(sig_caught == SIGALRM) catch_alarm_infintely(); else { printf("Unsupported signal caught %d, only signal %d is supported\n", sig_caught, SIGALRM); exit(EXIT_FAILURE); } } } /****************************************************************************** * ******************************************************************************/ #ifdef HAVE_MASKED_ATOMICS int check_masked_atomics_support(struct pingpong_context *ctx) { struct ibv_exp_device_attr attr; memset(&attr,0,sizeof (struct ibv_exp_device_attr)); attr.comp_mask = IBV_EXP_DEVICE_ATTR_EXT_ATOMIC_ARGS | IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS; attr.exp_atomic_cap = IBV_EXP_ATOMIC_HCA_REPLY_BE; if (ibv_exp_query_device(ctx->context, &attr)) { fprintf(stderr, "ibv_exp_query_device failed\n"); return -1; } return MASK_IS_SET(IBV_EXP_ATOMIC_HCA_REPLY_BE, attr.exp_atomic_cap) && MASK_IS_SET(IBV_EXP_DEVICE_EXT_ATOMICS, attr.exp_device_cap_flags); } #endif /****************************************************************************** * ******************************************************************************/ #ifdef HAVE_PACKET_PACING_EXP int check_packet_pacing_support(struct pingpong_context *ctx) { struct ibv_exp_device_attr attr; memset(&attr, 0, sizeof (struct ibv_exp_device_attr)); attr.comp_mask = IBV_EXP_DEVICE_ATTR_PACKET_PACING_CAPS; if (ibv_exp_query_device(ctx->context, &attr)) { fprintf(stderr, "ibv_exp_query_device failed\n"); return FAILURE; } return MASK_IS_SET(IBV_EXP_DEVICE_ATTR_PACKET_PACING_CAPS, attr.comp_mask) ? SUCCESS : FAILURE; } #elif defined(HAVE_PACKET_PACING) int check_packet_pacing_support(struct pingpong_context *ctx) { struct ibv_device_attr_ex attr; memset(&attr, 0, sizeof (struct ibv_device_attr_ex)); if (ibv_query_device_ex(ctx->context, NULL, &attr)) { fprintf(stderr, "ibv_query_device_ex failed\n"); return FAILURE; } /* qp_rate_limit_max > 0 if PP is supported */ return attr.packet_pacing_caps.qp_rate_limit_max > 0 ? SUCCESS : FAILURE; } #endif int run_iter_fs(struct pingpong_context *ctx, struct perftest_parameters *user_param) { struct raw_ethernet_info *my_dest_info = NULL; struct raw_ethernet_info *rem_dest_info = NULL; #ifdef HAVE_RAW_ETH_EXP struct ibv_exp_flow **flow_create_result; struct ibv_exp_flow_attr **flow_rules; #else struct ibv_flow **flow_create_result; struct ibv_flow_attr **flow_rules; #endif int flow_index = 0; int qp_index = 0; int retval = SUCCESS; uint64_t tot_fs_cnt = 0; uint64_t allocated_flows = 0; uint64_t tot_iters = 0; /* Allocate user input dependable structs */ ALLOCATE(my_dest_info, struct raw_ethernet_info, user_param->num_of_qps); memset(my_dest_info, 0, sizeof(struct raw_ethernet_info) * user_param->num_of_qps); ALLOCATE(rem_dest_info, struct raw_ethernet_info, user_param->num_of_qps); memset(rem_dest_info, 0, sizeof(struct raw_ethernet_info) * user_param->num_of_qps); if (user_param->test_type == ITERATIONS) { user_param->flows = user_param->iters * user_param->num_of_qps; allocated_flows = user_param->iters; } else if (user_param->test_type == DURATION) { allocated_flows = (2 * MAX_FS_PORT) - (user_param->server_port + user_param->client_port); } #ifdef HAVE_RAW_ETH_EXP ALLOCATE(flow_create_result, struct ibv_exp_flow*, allocated_flows * user_param->num_of_qps); ALLOCATE(flow_rules, struct ibv_exp_flow_attr*, allocated_flows * user_param->num_of_qps); #else ALLOCATE(flow_create_result, struct ibv_flow*, allocated_flows * user_param->num_of_qps); ALLOCATE(flow_rules, struct ibv_flow_attr*, allocated_flows * user_param->num_of_qps); #endif /* if(user_param->test_type == DURATION) { */ /* duration_param = user_param; */ /* user_param->iters = 0; */ /* duration_param->state = START_STATE; */ /* signal(SIGALRM, catch_alarm); */ /* alarm(user_param->margin); */ /* if (user_param->margin > 0) */ /* alarm(user_param->margin); */ /* else */ /* catch_alarm(0); [> move to next state <] */ /* } */ if (set_up_fs_rules(flow_rules, ctx, user_param, allocated_flows)) { fprintf(stderr, "Unable to set up flow rules\n"); retval = FAILURE; goto cleaning; } do {/* This loop runs once in Iteration mode */ for (qp_index = 0; qp_index < user_param->num_of_qps; qp_index++) { for (flow_index = 0; flow_index < allocated_flows; flow_index++) { if (user_param->test_type == ITERATIONS) user_param->tposted[tot_fs_cnt] = get_cycles(); else if (user_param->test_type == DURATION && duration_param->state == END_STATE) break; #ifdef HAVE_RAW_ETH_EXP flow_create_result[flow_index] = ibv_exp_create_flow(ctx->qp[qp_index], flow_rules[(qp_index * allocated_flows) + flow_index]); #else flow_create_result[flow_index] = ibv_create_flow(ctx->qp[qp_index], flow_rules[(qp_index * allocated_flows) + flow_index]); #endif if (user_param->test_type == ITERATIONS) user_param->tcompleted[tot_fs_cnt] = get_cycles(); if (!flow_create_result[flow_index]) { perror("error"); fprintf(stderr, "Couldn't attach QP\n"); retval = FAILURE; goto cleaning; } if (user_param->test_type == ITERATIONS || (user_param->test_type == DURATION && duration_param->state == SAMPLE_STATE)) tot_fs_cnt++; tot_iters++; } } } while (user_param->test_type == DURATION && duration_param->state != END_STATE); if (user_param->test_type == DURATION && user_param->state == END_STATE) user_param->iters = tot_fs_cnt; cleaning: /* destroy open flows */ for (flow_index = 0; flow_index < tot_iters; flow_index++) { #ifdef HAVE_RAW_ETH_EXP if (ibv_exp_destroy_flow(flow_create_result[flow_index])) { #else if (ibv_destroy_flow(flow_create_result[flow_index])) { #endif perror("error"); fprintf(stderr, "Couldn't destroy flow\n"); } } free(flow_rules); free(flow_create_result); free(my_dest_info); free(rem_dest_info); return retval; } /****************************************************************************** * End ******************************************************************************/