#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <malloc.h>
#include <getopt.h>
#include <limits.h>
#include <errno.h>
#include <signal.h>
#include <string.h>
#include <ctype.h>
/* #include <sys/mman.h> */
#include <sys/ipc.h>
#include <sys/shm.h>
#include <pthread.h>

#include "perftest_resources.h"
#include "perftest_parameters.h"

/* #include "config.h" */

#ifdef HAVE_VERBS_EXP
static enum ibv_exp_wr_opcode exp_opcode_verbs_array[] = {IBV_EXP_WR_SEND,IBV_EXP_WR_RDMA_WRITE,IBV_EXP_WR_RDMA_READ};
static enum ibv_exp_wr_opcode exp_opcode_atomic_array[] = {IBV_EXP_WR_ATOMIC_CMP_AND_SWP,IBV_EXP_WR_ATOMIC_FETCH_AND_ADD};
#endif
static enum ibv_wr_opcode opcode_verbs_array[] = {IBV_WR_SEND,IBV_WR_RDMA_WRITE,IBV_WR_RDMA_READ};
static enum ibv_wr_opcode opcode_atomic_array[] = {IBV_WR_ATOMIC_CMP_AND_SWP,IBV_WR_ATOMIC_FETCH_AND_ADD};

#define CPU_UTILITY "/proc/stat"

struct perftest_parameters* duration_param;
struct check_alive_data check_alive_data;

/******************************************************************************
 * Beginning
 ******************************************************************************/
#ifdef HAVE_CUDA
#define ASSERT(x) \
	do { \
	if (!(x)) { \
		fprintf(stdout, "Assertion \"%s\" failed at %s:%d\n", #x, __FILE__, __LINE__); \
	} \
} while (0)

#define CUCHECK(stmt) \
	do { \
	CUresult result = (stmt); \
	ASSERT(CUDA_SUCCESS == result); \
} while (0)

/*----------------------------------------------------------------------------*/

static CUdevice cuDevice;
static CUcontext cuContext;

static int pp_init_gpu(struct pingpong_context *ctx, size_t _size)
{
	const size_t gpu_page_size = 64*1024;
	size_t size = (_size + gpu_page_size - 1) & ~(gpu_page_size - 1);
	printf("initializing CUDA\n");
	CUresult error = cuInit(0);
	if (error != CUDA_SUCCESS) {
		printf("cuInit(0) returned %d\n", error);
		exit(1);
	}

	int deviceCount = 0;
	error = cuDeviceGetCount(&deviceCount);
	if (error != CUDA_SUCCESS) {
		printf("cuDeviceGetCount() returned %d\n", error);
		exit(1);
	}
	/* This function call returns 0 if there are no CUDA capable devices. */
	if (deviceCount == 0) {
		printf("There are no available device(s) that support CUDA\n");
		return 1;
	} else if (deviceCount == 1)
		printf("There is 1 device supporting CUDA\n");
	else
		printf("There are %d devices supporting CUDA, picking first...\n", deviceCount);

	int devID = 0;

	/* pick up device with zero ordinal (default, or devID) */
	CUCHECK(cuDeviceGet(&cuDevice, devID));

	char name[128];
	CUCHECK(cuDeviceGetName(name, sizeof(name), devID));
	printf("[pid = %d, dev = %d] device name = [%s]\n", getpid(), cuDevice, name);
	printf("creating CUDA Ctx\n");

	/* Create context */
	error = cuCtxCreate(&cuContext, CU_CTX_MAP_HOST, cuDevice);
	if (error != CUDA_SUCCESS) {
		printf("cuCtxCreate() error=%d\n", error);
		return 1;
	}

	printf("making it the current CUDA Ctx\n");
	error = cuCtxSetCurrent(cuContext);
	if (error != CUDA_SUCCESS) {
		printf("cuCtxSetCurrent() error=%d\n", error);
		return 1;
	}

	printf("cuMemAlloc() of a %zd bytes GPU buffer\n", size);
	CUdeviceptr d_A;
	error = cuMemAlloc(&d_A, size);
	if (error != CUDA_SUCCESS) {
		printf("cuMemAlloc error=%d\n", error);
		return 1;
	}
	printf("allocated GPU buffer address at %016llx pointer=%p\n", d_A,
	       (void *) d_A);
	ctx->buf[0] = (void*)d_A;

	return 0;
}

static int pp_free_gpu(struct pingpong_context *ctx)
{
	int ret = 0;
	CUdeviceptr d_A = (CUdeviceptr) ctx->buf[0];

	printf("deallocating RX GPU buffer\n");
	cuMemFree(d_A);
	d_A = 0;

	printf("destroying current CUDA Ctx\n");
	CUCHECK(cuCtxDestroy(cuContext));

	return ret;
}
#endif

/* static int pp_init_mmap(struct pingpong_context *ctx, size_t size, */
			/* const char *fname, unsigned long offset) */
/* { */
	/* int fd = open(fname, O_RDWR); */
	/* if (fd < 0) { */
		/* printf("Unable to open '%s': %s\n", fname, strerror(errno)); */
		/* return 1; */
	/* } */

	/* ctx->buf[0] = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, fd, offset); */
	/* close(fd); */

	/* if (ctx->buf[0] == MAP_FAILED) { */
		/* printf("Unable to mmap '%s': %s\n", fname, strerror(errno)); */
		/* return 1; */
	/* } */

	/* printf("allocated mmap buffer of size %zd at %p\n", size, ctx->buf[0]); */

	/* return 0; */
/* } */

/* static int pp_free_mmap(struct pingpong_context *ctx) */
/* { */
	/* munmap(ctx->buf[0], ctx->buff_size); */
	/* return 0; */
/* } */

#ifdef HAVE_VERBS_EXP
static void get_verbs_pointers(struct pingpong_context *ctx)
{
	ctx->exp_post_send_func_pointer = ibv_exp_get_provider_func(ctx->context,IBV_EXP_POST_SEND_FUNC);
	if (!ctx->exp_post_send_func_pointer) {
		fprintf(stderr, "Couldn't get ibv_exp_post_send pointer\n");
		ctx->exp_post_send_func_pointer = &ibv_exp_post_send;
	}
	ctx->post_send_func_pointer = ibv_exp_get_provider_func(ctx->context,IBV_POST_SEND_FUNC);
	if (!ctx->post_send_func_pointer) {
		fprintf(stderr, "Couldn't get ibv_post_send pointer\n");
		ctx->post_send_func_pointer = &ibv_post_send;
	}
	ctx->poll_cq_func_pointer = ibv_exp_get_provider_func(ctx->context,IBV_POLL_CQ_FUNC);
	if (!ctx->poll_cq_func_pointer) {
		fprintf(stderr, "Couldn't get ibv_poll_cq pointer\n");
	}
}
#endif

static int next_word_string(char* input, char* output, int from_index)
{
	int i = from_index;
	int j = 0;

	while (input[i] != ' ') {
		output[j] = input[i];
		j++; i++;
	}

	output[j]=0;
	return i+1;
}

static int get_n_word_string(char *input, char *output,int from_index, int iters)
{
	for (;iters > 0; iters--) {
		from_index = next_word_string(input,output,from_index);
	}

	return from_index;
}
static void compress_spaces(char *str, char *dst)
{
	for (; *str; ++str) {
		*dst++ = *str;

		if (isspace(*str)) {
			do ++str;

			while (isspace(*str));

			--str;
		}
	}

	*dst = 0;
}

static void get_cpu_stats(struct perftest_parameters *duration_param,int stat_index)
{
	char* file_name = CPU_UTILITY;
	FILE *fp;
	char line[100];
	char tmp[100];
	int index=0;
	fp = fopen(file_name, "r");
	if (fp != NULL) {
		if (fgets(line,100,fp) != NULL) {
			compress_spaces(line,line);
			index=get_n_word_string(line,tmp,index,2); /* skip first word */
			duration_param->cpu_util_data.ustat[stat_index-1] = atoll(tmp);

			index=get_n_word_string(line,tmp,index,3); /* skip 2 stats */
			duration_param->cpu_util_data.idle[stat_index-1] = atoll(tmp);

			fclose(fp);
		}
	}
}

#ifdef HAVE_VERBS_EXP
static int check_for_contig_pages_support(struct ibv_context *context)
{
	int answer;
	struct ibv_exp_device_attr attr;
	memset(&attr,0,sizeof attr);
	if (ibv_exp_query_device(context,&attr)) {
		fprintf(stderr, "Couldn't get device attributes\n");
		return FAILURE;
	}
	answer = ( attr.exp_device_cap_flags &= IBV_EXP_DEVICE_MR_ALLOCATE) ? SUCCESS : FAILURE;
	return answer;
}
#endif
#ifdef HAVE_XRCD
/******************************************************************************
 *
 ******************************************************************************/
static int ctx_xrcd_create(struct pingpong_context *ctx,struct perftest_parameters *user_param)
{
	char *tmp_file_name;
	struct ibv_xrcd_init_attr xrcd_init_attr;

	memset(&xrcd_init_attr , 0 , sizeof xrcd_init_attr);

	tmp_file_name = (user_param->machine == SERVER) ? SERVER_FD : CLIENT_FD;

	ctx->fd = open(tmp_file_name, O_RDONLY | O_CREAT, S_IRUSR | S_IRGRP);
	if (ctx->fd < 0) {
		fprintf(stderr,"Error opening file %s errno: %s", tmp_file_name,strerror(errno));
		return FAILURE;
	}

	xrcd_init_attr.comp_mask = IBV_XRCD_INIT_ATTR_FD | IBV_XRCD_INIT_ATTR_OFLAGS;
	xrcd_init_attr.fd = ctx->fd;
	xrcd_init_attr.oflags = O_CREAT ;

	ctx->xrc_domain = ibv_open_xrcd(ctx->context,&xrcd_init_attr);
	if (ctx->xrc_domain == NULL) {
		fprintf(stderr,"Error opening XRC domain\n");
		return FAILURE;
	}
	return 0;
}

/******************************************************************************
 *
 ******************************************************************************/
static int ctx_xrc_srq_create(struct pingpong_context *ctx,
			      struct perftest_parameters *user_param)
{
	struct ibv_srq_init_attr_ex srq_init_attr;

	memset(&srq_init_attr, 0, sizeof(srq_init_attr));

	srq_init_attr.attr.max_wr = user_param->rx_depth;
	srq_init_attr.attr.max_sge = 1;
	srq_init_attr.comp_mask = IBV_SRQ_INIT_ATTR_TYPE | IBV_SRQ_INIT_ATTR_XRCD | IBV_SRQ_INIT_ATTR_CQ | IBV_SRQ_INIT_ATTR_PD;
	srq_init_attr.srq_type = IBV_SRQT_XRC;
	srq_init_attr.xrcd = ctx->xrc_domain;

	if(user_param->verb == SEND)
		srq_init_attr.cq = ctx->recv_cq;
	else
		srq_init_attr.cq = ctx->send_cq;

	srq_init_attr.pd = ctx->pd;
	ctx->srq = ibv_create_srq_ex(ctx->context, &srq_init_attr);
	if (ctx->srq == NULL) {
		fprintf(stderr, "Couldn't open XRC SRQ\n");
		return FAILURE;
	}

	return 0;
}

/******************************************************************************
 *
 ******************************************************************************/
static struct ibv_qp *ctx_xrc_qp_create(struct pingpong_context *ctx,
					struct perftest_parameters *user_param,
					int qp_index)
{
	struct ibv_qp* qp = NULL;
	int num_of_qps = user_param->num_of_qps / 2;

	#ifdef HAVE_VERBS_EXP
	struct ibv_exp_qp_init_attr qp_init_attr;
	#else
	struct ibv_qp_init_attr_ex qp_init_attr;
	#endif

	memset(&qp_init_attr, 0, sizeof(qp_init_attr));

	if ( (!(user_param->duplex || user_param->tst == LAT) && (user_param->machine == SERVER) )
			|| ((user_param->duplex || user_param->tst == LAT) && (qp_index >= num_of_qps))) {
		qp_init_attr.qp_type = IBV_QPT_XRC_RECV;
		qp_init_attr.comp_mask = IBV_QP_INIT_ATTR_XRCD;
		qp_init_attr.xrcd = ctx->xrc_domain;
		qp_init_attr.cap.max_recv_wr = user_param->rx_depth;
		qp_init_attr.cap.max_recv_sge = 1;
		qp_init_attr.cap.max_inline_data = user_param->inline_size;

	} else {
		qp_init_attr.qp_type = IBV_QPT_XRC_SEND;
		qp_init_attr.send_cq = ctx->send_cq;
		qp_init_attr.cap.max_send_wr = user_param->tx_depth;
		qp_init_attr.cap.max_send_sge = 1;
		qp_init_attr.comp_mask = IBV_QP_INIT_ATTR_PD;
		qp_init_attr.pd = ctx->pd;
		qp_init_attr.cap.max_inline_data = user_param->inline_size;
	}

	#ifdef HAVE_ACCL_VERBS
	if (user_param->use_res_domain) {
		qp_init_attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_RES_DOMAIN;
		qp_init_attr.res_domain = ctx->res_domain;
	}
	#endif

	#ifdef HAVE_VERBS_EXP
	qp = ibv_exp_create_qp(ctx->context, &qp_init_attr);
	#else
	qp = ibv_create_qp_ex(ctx->context, &qp_init_attr);
	#endif

	return qp;
}
#endif

#ifdef HAVE_DC
/******************************************************************************
 *
 ******************************************************************************/
static int ctx_dc_tgt_create(struct pingpong_context *ctx,struct perftest_parameters *user_param,int dct_index)
{
	struct ibv_exp_device_attr dattr;
	int err;
	int num_of_qps = user_param->num_of_qps;
	int num_of_qps_per_port = user_param->num_of_qps / 2;
	int port_num;

	memset(&dattr,0,sizeof(struct ibv_exp_device_attr));

	/* in dc with bidirectional,
	 * there are send qps and recv qps. the actual number of send/recv qps
	 * is num_of_qps / 2.
	 */
	if (user_param->duplex || user_param->tst == LAT) {
		num_of_qps /= 2;
		num_of_qps_per_port = num_of_qps / 2;
	}

	/* first half of qps are for ib_port and second half are for ib_port2
	 * in dc with bidirectional, the first half of qps are dc_ini qps and
	 * the second half are dc_tgts . the first half of the send/recv qps
	 * are for ib_port1 and the second half are for ib_port2
	 */
	if (user_param->dualport == ON && (dct_index % num_of_qps >= num_of_qps_per_port))
		port_num = user_param->ib_port2;
	else
		port_num = user_param->ib_port;

	dattr.comp_mask = IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS |
		IBV_EXP_DEVICE_DC_RD_REQ |
		IBV_EXP_DEVICE_DC_RD_RES;

	err = ibv_exp_query_device(ctx->context, &dattr);
	if (err) {
		printf("couldn't query device extended attributes\n");
		return -1;
	} else {
		if (!(dattr.comp_mask & IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS)) {
			printf("no extended capability flgas\n");
			return -1;
		}
		if (!(dattr.exp_device_cap_flags & IBV_EXP_DEVICE_DC_TRANSPORT)) {
			printf("DC transport not enabled\n");
			return -1;
		}

		if (!(dattr.comp_mask & IBV_EXP_DEVICE_DC_RD_REQ)) {
			printf("no report on max requestor rdma/atomic resources\n");
			return -1;
		}

		if (!(dattr.comp_mask & IBV_EXP_DEVICE_DC_RD_RES)) {
			printf("no report on max responder rdma/atomic resources\n");
			return -1;
		}
	}

	struct ibv_exp_dct_init_attr dctattr = {
		.pd = ctx->pd,
		.cq = (user_param->verb == SEND && (user_param->duplex || user_param->tst == LAT)) ? ctx->recv_cq : ctx->send_cq,
		.srq = ctx->srq,
		.dc_key = user_param->dct_key,
		.port = port_num,
		.access_flags = IBV_ACCESS_REMOTE_WRITE,
		.min_rnr_timer = 2,
		.tclass = 0,
		.flow_label = 0,
		.mtu = user_param->curr_mtu,
		.pkey_index = user_param->pkey_index,
		.gid_index = user_param->gid_index,
		.hop_limit = 1,
		.inline_size = user_param->inline_size,
	};

	ctx->dct[dct_index] = ibv_exp_create_dct(ctx->context, &dctattr);
	if (!ctx->dct[dct_index]) {
		printf("create dct failed\n");
		return FAILURE;
	}

	struct ibv_exp_dct_attr dcqattr;
	memset(&dcqattr,0,sizeof(struct ibv_exp_dct_attr));

	err = ibv_exp_query_dct(ctx->dct[dct_index], &dcqattr);
	if (err) {
		printf("query dct failed\n");
		return FAILURE;
	} else if (dcqattr.dc_key != user_param->dct_key) {
		printf("queried dckry (0x%llx) is different then provided at create (0x%llx)\n",
				(unsigned long long)dcqattr.dc_key,
				(unsigned long long)user_param->dct_key);
		return FAILURE;
	} else if (dcqattr.state != IBV_EXP_DCT_STATE_ACTIVE) {
		printf("state is not active %d\n", dcqattr.state);
		return FAILURE;
	}

	return 0;
}
#endif

#ifdef HAVE_RSS_EXP
static struct ibv_qp *ctx_rss_eth_qp_create(struct pingpong_context *ctx,struct perftest_parameters *user_param,int qp_index)
{

	struct ibv_exp_qp_init_attr attr;
	struct ibv_qp* qp = NULL;

	memset(&attr, 0, sizeof(struct ibv_exp_qp_init_attr));

	attr.send_cq = ctx->send_cq;
	attr.recv_cq = ctx->recv_cq;
	attr.cap.max_send_wr = user_param->tx_depth;
	attr.cap.max_send_sge = MAX_SEND_SGE;
	attr.cap.max_inline_data = user_param->inline_size;
	attr.cap.max_recv_wr = user_param->rx_depth;
	attr.cap.max_recv_sge = MAX_RECV_SGE;
	attr.qp_type = IBV_QPT_RAW_PACKET;
	attr.comp_mask = IBV_EXP_QP_INIT_ATTR_PD | IBV_EXP_QP_INIT_ATTR_QPG;
	attr.pd = ctx->pd;

	if (qp_index == 0) { /* rss parent */
		#ifdef HAVE_VERBS_EXP
		attr.qpg.qpg_type = IBV_EXP_QPG_PARENT;
		#else
		attr.qpg.qpg_type = IBV_QPG_PARENT;
		#endif
		attr.qpg.qpg_parent = NULL;
		attr.qpg.parent_attrib.tss_child_count = 0;
		attr.qpg.parent_attrib.rss_child_count = user_param->num_of_qps - 1;
	} else { /* rss childs */
		#ifdef HAVE_VERBS_EXP
		attr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX;
		#else
		attr.qpg.qpg_type = IBV_QPG_CHILD_RX;
		#endif
		attr.qpg.qpg_parent = ctx->qp[0];
	}
	qp = ibv_exp_create_qp(ctx->context,&attr);


	return qp;
}
#endif

/******************************************************************************
 *
 ******************************************************************************/
int check_add_port(char **service,int port,
		const char *servername,
		struct addrinfo *hints,
		struct addrinfo **res)
{
	int number;

	if (asprintf(service,"%d", port) < 0) {
		return FAILURE;
	}

	number = getaddrinfo(servername,*service,hints,res);

	if (number < 0) {
		fprintf(stderr, "%s for %s:%d\n", gai_strerror(number), servername, port);
		return FAILURE;
	}

	return SUCCESS;
}

/******************************************************************************
  + *
  + ******************************************************************************/
struct ibv_device* ctx_find_dev(const char *ib_devname)
{
	int num_of_device;
	struct ibv_device **dev_list;
	struct ibv_device *ib_dev = NULL;

	dev_list = ibv_get_device_list(&num_of_device);

	if (num_of_device <= 0) {
		fprintf(stderr," Did not detect devices \n");
		fprintf(stderr," If device exists, check if driver is up\n");
		return NULL;
	}

	if (!ib_devname) {
		ib_dev = dev_list[0];
		if (!ib_dev) {
			fprintf(stderr, "No IB devices found\n");
			exit (1);
		}
	} else {
		for (; (ib_dev = *dev_list); ++dev_list)
			if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
				break;
		if (!ib_dev)
			fprintf(stderr, "IB device %s not found\n", ib_devname);
	}
	return ib_dev;
}

/******************************************************************************
 *
 ******************************************************************************/
void alloc_ctx(struct pingpong_context *ctx,struct perftest_parameters *user_param)
{
	uint64_t tarr_size;
	int num_of_qps_factor;
	ctx->cycle_buffer = user_param->cycle_buffer;
	ctx->cache_line_size = user_param->cache_line_size;

	ALLOCATE(user_param->port_by_qp, uint64_t, user_param->num_of_qps);

	tarr_size = (user_param->noPeak) ? 1 : user_param->iters*user_param->num_of_qps;
	ALLOCATE(user_param->tposted, cycles_t, tarr_size);
	memset(user_param->tposted, 0, sizeof(cycles_t)*tarr_size);
	if ((user_param->tst == LAT || user_param->tst == FS_RATE) && user_param->test_type == DURATION)
		ALLOCATE(user_param->tcompleted, cycles_t, 1);

	ALLOCATE(ctx->qp, struct ibv_qp*, user_param->num_of_qps);
	ALLOCATE(ctx->mr, struct ibv_mr*, user_param->num_of_qps);
	ALLOCATE(ctx->buf, void* , user_param->num_of_qps);

	#ifdef HAVE_ACCL_VERBS
	ALLOCATE(ctx->qp_burst_family, struct ibv_exp_qp_burst_family*, user_param->num_of_qps);
	#endif

	#ifdef HAVE_DC
	if (user_param->connection_type == DC) {
		#ifdef HAVE_VERBS_EXP
		ALLOCATE(ctx->dct, struct ibv_exp_dct*, user_param->num_of_qps);
		#else
		ALLOCATE(ctx->dct, struct ibv_dct*, user_param->num_of_qps);
		#endif
	}
	#endif

	if ((user_param->tst == BW || user_param->tst == LAT_BY_BW) && (user_param->machine == CLIENT || user_param->duplex)) {

		ALLOCATE(user_param->tcompleted,cycles_t,tarr_size);
		memset(user_param->tcompleted, 0, sizeof(cycles_t)*tarr_size);
		ALLOCATE(ctx->my_addr,uint64_t,user_param->num_of_qps);
		ALLOCATE(ctx->rem_addr,uint64_t,user_param->num_of_qps);
		ALLOCATE(ctx->scnt,uint64_t,user_param->num_of_qps);
		ALLOCATE(ctx->ccnt,uint64_t,user_param->num_of_qps);
		memset(ctx->scnt, 0, user_param->num_of_qps * sizeof (uint64_t));
		memset(ctx->ccnt, 0, user_param->num_of_qps * sizeof (uint64_t));

	} else if ((user_param->tst == BW || user_param->tst == LAT_BY_BW)
		   && user_param->verb == SEND && user_param->machine == SERVER) {

		ALLOCATE(ctx->my_addr, uint64_t, user_param->num_of_qps);
		ALLOCATE(user_param->tcompleted, cycles_t, 1);
	} else if (user_param->tst == FS_RATE && user_param->test_type == ITERATIONS) {
		ALLOCATE(user_param->tcompleted, cycles_t, tarr_size);
		memset(user_param->tcompleted, 0, sizeof(cycles_t) * tarr_size);
	}

	if (user_param->machine == CLIENT || user_param->tst == LAT || user_param->duplex) {

		ALLOCATE(ctx->sge_list, struct ibv_sge,user_param->num_of_qps * user_param->post_list);
		#ifdef HAVE_VERBS_EXP
		ALLOCATE(ctx->exp_wr, struct ibv_exp_send_wr, user_param->num_of_qps * user_param->post_list);
		#endif
		ALLOCATE(ctx->wr, struct ibv_send_wr, user_param->num_of_qps * user_param->post_list);
		if ((user_param->verb == SEND && user_param->connection_type == UD ) || user_param->connection_type == DC) {
			ALLOCATE(ctx->ah, struct ibv_ah*, user_param->num_of_qps);
		}
	}

	if (user_param->verb == SEND && (user_param->tst == LAT || user_param->machine == SERVER || user_param->duplex)) {

		ALLOCATE(ctx->recv_sge_list,struct ibv_sge,user_param->num_of_qps);
		ALLOCATE(ctx->rwr,struct ibv_recv_wr,user_param->num_of_qps);
		ALLOCATE(ctx->rx_buffer_addr,uint64_t,user_param->num_of_qps);
	}
	if (user_param->mac_fwd == ON )
		ctx->cycle_buffer = user_param->size * user_param->rx_depth;

	ctx->size = user_param->size;

	num_of_qps_factor = (user_param->mr_per_qp) ? 1 : user_param->num_of_qps;

	/* holds the size of maximum between msg size and cycle buffer,
	* aligned to cache line,
	* it is multiply by 2 for send and receive
	* with reference to number of flows and number of QPs */
	ctx->buff_size = INC(BUFF_SIZE(ctx->size, ctx->cycle_buffer),
				 ctx->cache_line_size) * 2 * num_of_qps_factor * user_param->flows;
	ctx->send_qp_buff_size = ctx->buff_size / num_of_qps_factor / 2;
	ctx->flow_buff_size = ctx->send_qp_buff_size / user_param->flows;
	user_param->buff_size = ctx->buff_size;
	if (user_param->connection_type == UD)
		ctx->buff_size += ctx->cache_line_size;
}

/******************************************************************************
 *
 ******************************************************************************/
int destroy_ctx(struct pingpong_context *ctx,
		struct perftest_parameters *user_param)
{
	int i, first, dereg_counter;
	int test_result = 0;
	int num_of_qps = user_param->num_of_qps;

	if (user_param->wait_destroy) {
		printf(" Waiting %u seconds before releasing resources...\n",
		       user_param->wait_destroy);
		sleep(user_param->wait_destroy);
	}

	dereg_counter = (user_param->mr_per_qp) ? user_param->num_of_qps : 1;

	/* in dc with bidirectional,
	 * there are send qps and recv qps. the actual number of send/recv qps
	 * is num_of_qps / 2.
	 */
	if (user_param->duplex || user_param->tst == LAT) {
		num_of_qps /= 2;
	}

	/* RSS parent should be last */
	if (user_param->use_rss)
		first = 1;
	else
		first = 0;
	for (i = first; i < user_param->num_of_qps; i++) {

		if (( (user_param->connection_type == DC && !((!(user_param->duplex || user_param->tst == LAT) && (user_param->machine == SERVER) )
							|| ((user_param->duplex || user_param->tst == LAT) && (i >= num_of_qps)))) ||
					user_param->connection_type == UD) && (user_param->tst == LAT || user_param->machine == CLIENT || user_param->duplex)) {
			if (ibv_destroy_ah(ctx->ah[i])) {
				fprintf(stderr, "Failed to destroy AH\n");
				test_result = 1;
			}
		}
		#ifdef HAVE_DC
		if (user_param->connection_type == DC && ((!(user_param->duplex || user_param->tst == LAT)
						&& (user_param->machine == SERVER)) || ((user_param->duplex || user_param->tst == LAT) && (i >= num_of_qps)))) {
			if (ibv_exp_destroy_dct(ctx->dct[i])) {
				fprintf(stderr, "Failed to destroy dct\n");
				test_result = 1;
			}
			if ( i == user_param->num_of_qps -1 )
				return test_result;
		} else
		#endif
		if (ibv_destroy_qp(ctx->qp[i])) {
			fprintf(stderr, "Couldn't destroy QP - %s\n", strerror(errno));
			test_result = 1;
		}
	}

	if (user_param->use_rss) {
		if (user_param->connection_type == UD && (user_param->tst == LAT || user_param->machine == CLIENT || user_param->duplex)) {
			if (ibv_destroy_ah(ctx->ah[0])) {
				fprintf(stderr, "Failed to destroy AH\n");
				test_result = 1;
			}
		}

		if (ibv_destroy_qp(ctx->qp[0])) {
			fprintf(stderr, "Couldn't destroy QP - %s\n", strerror(errno));
			test_result = 1;
		}
	}
	if (user_param->srq_exists) {
		if (ibv_destroy_srq(ctx->srq)) {
			fprintf(stderr, "Couldn't destroy SRQ\n");
			test_result = 1;
		}
	}

	#ifdef HAVE_XRCD
	if (user_param->use_xrc) {

		if (ibv_close_xrcd(ctx->xrc_domain)) {
			fprintf(stderr, "Couldn't destroy XRC domain\n");
			test_result = 1;
		}

		if (ctx->fd >= 0 && close(ctx->fd)) {
			fprintf(stderr, "Couldn't close the file for the XRC Domain\n");
			test_result = 1;
		}

	}
	#endif

	if (ibv_destroy_cq(ctx->send_cq)) {
		fprintf(stderr, "Failed to destroy CQ - %s\n", strerror(errno));
		test_result = 1;
	}

	if (user_param->verb == SEND && (user_param->tst == LAT || user_param->machine == SERVER || user_param->duplex || (ctx->channel)) ) {
		if (!(user_param->connection_type == DC && user_param->machine == SERVER)) {
			if (ibv_destroy_cq(ctx->recv_cq)) {
				fprintf(stderr, "Failed to destroy CQ - %s\n", strerror(errno));
				test_result = 1;
			}
		}
	}

	for (i = 0; i < dereg_counter; i++) {
		if (ibv_dereg_mr(ctx->mr[i])) {
			fprintf(stderr, "Failed to deregister MR #%d\n", i+1);
			test_result = 1;
		}
	}

	if (ibv_dealloc_pd(ctx->pd)) {
		fprintf(stderr, "Failed to deallocate PD - %s\n", strerror(errno));
		test_result = 1;
	}

	if (ctx->channel) {
		if (ibv_destroy_comp_channel(ctx->channel)) {
			fprintf(stderr, "Failed to close event channel\n");
			test_result = 1;
		}
	}
	if (ibv_close_device(ctx->context)) {
		fprintf(stderr, "Failed to close device context\n");
		test_result = 1;
	}

	#ifdef HAVE_CUDA
	if (user_param->use_cuda) {
		pp_free_gpu(ctx);
	}
	else
	#endif
	/* if (user_param->mmap_file != NULL) { */
		/* pp_free_mmap(ctx); */
	/* } else if (ctx->is_contig_supported == FAILURE) { */
	if (ctx->is_contig_supported == FAILURE) {
		for (i = 0; i < dereg_counter; i++) {
			if (user_param->use_hugepages) {
				shmdt(ctx->buf[i]);
			} else {
				free(ctx->buf[i]);
			}
		}
	}
	free(ctx->qp);

	if ((user_param->tst == BW || user_param->tst == LAT_BY_BW ) && (user_param->machine == CLIENT || user_param->duplex)) {

		free(user_param->tposted);
		free(user_param->tcompleted);
		free(ctx->my_addr);
		free(ctx->rem_addr);
		free(ctx->scnt);
		free(ctx->ccnt);
	}
	else if ((user_param->tst == BW || user_param->tst == LAT_BY_BW ) && user_param->verb == SEND && user_param->machine == SERVER) {

		free(user_param->tposted);
		free(user_param->tcompleted);
		free(ctx->my_addr);
	}
	if (user_param->machine == CLIENT || user_param->tst == LAT || user_param->duplex) {

		free(ctx->sge_list);
		free(ctx->wr);
		#ifdef HAVE_VERBS_EXP
		free(ctx->exp_wr);
		#endif
	}

	if (user_param->verb == SEND && (user_param->tst == LAT || user_param->machine == SERVER || user_param->duplex)) {

		free(ctx->rx_buffer_addr);
		free(ctx->recv_sge_list);
		free(ctx->rwr);
	}
	return test_result;
}

/******************************************************************************
 *
 ******************************************************************************/
#ifdef HAVE_VERBS_EXP
static int check_inline_recv_support(struct pingpong_context *ctx,
					struct perftest_parameters *user_param)
{
	struct ibv_exp_device_attr dattr;
	int ret = 0;

	memset(&dattr, 0, sizeof(dattr));

	dattr.comp_mask |= IBV_EXP_DEVICE_ATTR_INLINE_RECV_SZ;
	ret = ibv_exp_query_device(ctx->context, &dattr);
	if (ret) {
		printf(" Couldn't query device for inline-receive capabilities.\n");
	} else if (!(dattr.comp_mask & IBV_EXP_DEVICE_ATTR_INLINE_RECV_SZ)) {
		printf(" Inline-receive not supported by driver.\n");
		ret = 1;
	} else if (dattr.inline_recv_sz < user_param->inline_recv_size) {
		printf(" Max inline-receive(%d) < Requested inline-receive(%d).\n",
			dattr.inline_recv_sz, user_param->inline_recv_size);
	}

	return ret;

}
#endif

/******************************************************************************
 *
 ******************************************************************************/
#if defined HAVE_EX_ODP || defined HAVE_EXP_ODP
static int check_odp_support(struct pingpong_context *ctx)
{
	#ifdef HAVE_EX_ODP
	struct ibv_device_attr_ex dattr;
	int odp_support_send = IBV_ODP_SUPPORT_SEND;
	int odp_support_recv = IBV_ODP_SUPPORT_RECV;
	int ret = ibv_query_device_ex(ctx->context, NULL, &dattr);
	#elif defined HAVE_EXP_ODP
	struct ibv_exp_device_attr dattr;
	int ret = ibv_exp_query_device(ctx->context, &dattr);
	int odp_support_send = IBV_EXP_ODP_SUPPORT_SEND;
	int odp_support_recv = IBV_EXP_ODP_SUPPORT_RECV;
	#endif
	if (ret) {
		fprintf(stderr, " Couldn't query device for on-demand paging capabilities.\n");
		return 0;
	} else if (!(dattr.odp_caps.per_transport_caps.rc_odp_caps & odp_support_send)) {
		fprintf(stderr, " Send is not supported for RC transport.\n");
		return 0;
	} else if (!(dattr.odp_caps.per_transport_caps.rc_odp_caps & odp_support_recv)) {
		fprintf(stderr, " Receive is not supported for RC transport.\n");
		return 0;
	}
	return 1;
}
#endif

/******************************************************************************
 *
 ******************************************************************************/
int create_reg_cqs(struct pingpong_context *ctx,
		   struct perftest_parameters *user_param,
		   int tx_buffer_depth, int need_recv_cq)
{
	ctx->send_cq = ibv_create_cq(ctx->context,tx_buffer_depth *
					user_param->num_of_qps, NULL, ctx->channel, user_param->eq_num);
	if (!ctx->send_cq) {
		fprintf(stderr, "Couldn't create CQ\n");
		return FAILURE;
	}

	if (need_recv_cq) {
		ctx->recv_cq = ibv_create_cq(ctx->context,user_param->rx_depth *
						user_param->num_of_qps, NULL, ctx->channel, user_param->eq_num);
		if (!ctx->recv_cq) {
			fprintf(stderr, "Couldn't create a receiver CQ\n");
			return FAILURE;
		}
	}

	return SUCCESS;
}

/******************************************************************************
 *
 ******************************************************************************/
#ifdef HAVE_VERBS_EXP
int create_exp_cqs(struct pingpong_context *ctx,
		   struct perftest_parameters *user_param,
		   int tx_buffer_depth, int need_recv_cq)
{
	struct ibv_exp_cq_init_attr attr;
	#ifdef HAVE_ACCL_VERBS
	enum ibv_exp_query_intf_status intf_status;
	struct ibv_exp_query_intf_params intf_params;
	#endif

	memset(&attr, 0, sizeof(attr));

	#ifdef HAVE_ACCL_VERBS
	if (user_param->use_res_domain) {
		attr.res_domain = ctx->res_domain;
	}

	if (user_param->verb_type == ACCL_INTF) {
		memset(&intf_params, 0, sizeof(intf_params));
		intf_params.intf_scope = IBV_EXP_INTF_GLOBAL;
		intf_params.intf = IBV_EXP_INTF_CQ;
	}
	#endif

	ctx->send_cq = ibv_exp_create_cq(ctx->context, tx_buffer_depth *
						user_param->num_of_qps, NULL,
						ctx->channel, 0, &attr);
	if (!ctx->send_cq) {
		fprintf(stderr, "Couldn't create exp CQ\n");
		return FAILURE;
	}

	if (need_recv_cq) {
		ctx->recv_cq = ibv_create_cq(ctx->context,user_param->rx_depth *
						user_param->num_of_qps,NULL,ctx->channel,0);
		if (!ctx->recv_cq) {
			fprintf(stderr, "Couldn't create a receiver CQ\n");
			return FAILURE;
		}
	}

	#ifdef HAVE_ACCL_VERBS
	if (user_param->verb_type == ACCL_INTF) {
		/* Check CQ family */
		intf_params.obj = ctx->send_cq;
		ctx->send_cq_family = ibv_exp_query_intf(ctx->context, &intf_params, &intf_status);
		intf_params.obj = ctx->recv_cq;
		ctx->recv_cq_family = ibv_exp_query_intf(ctx->context, &intf_params, &intf_status);

		if (!ctx->send_cq_family || (!ctx->recv_cq_family && need_recv_cq)) {
			fprintf(stderr, "Couldn't create CQ family.\n");
			return FAILURE;
		}
	}
	#endif

	return SUCCESS;
}
#endif

/******************************************************************************
 *
 ******************************************************************************/
int create_cqs(struct pingpong_context *ctx, struct perftest_parameters *user_param)
{
	int ret;
	int dct_only = 0, need_recv_cq = 0;
	int tx_buffer_depth = user_param->tx_depth;

	if (user_param->connection_type == DC) {
		dct_only = (user_param->machine == SERVER && !(user_param->duplex || user_param->tst == LAT));
	}

	if (dct_only)
		tx_buffer_depth = user_param->rx_depth;

	if ((user_param->connection_type == DC && !dct_only) || (user_param->verb == SEND))
		need_recv_cq = 1;

	#ifdef HAVE_VERBS_EXP
	if (user_param->is_exp_cq)
		ret = create_exp_cqs(ctx, user_param, tx_buffer_depth, need_recv_cq);
	else
	#endif
		ret = create_reg_cqs(ctx, user_param, tx_buffer_depth, need_recv_cq);

	return ret;
}

/******************************************************************************
 *
 ******************************************************************************/
#ifdef HAVE_ACCL_VERBS
struct ibv_exp_res_domain* create_res_domain(struct pingpong_context *ctx, struct perftest_parameters *user_param)
{
	struct ibv_exp_res_domain_init_attr res_domain_attr;
	struct ibv_exp_device_attr dattr;
	uint32_t req_comp_mask;

	/* Query device */
	req_comp_mask = IBV_EXP_DEVICE_ATTR_CALC_CAP |
			IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS |
			IBV_EXP_DEVICE_ATTR_MAX_CTX_RES_DOMAIN;
	dattr.comp_mask = req_comp_mask;

	if (ibv_exp_query_device(ctx->context, &dattr)) {
		fprintf(stderr, "Couldn't query device capabilities.\n");
		return NULL;
	} else if (dattr.comp_mask != req_comp_mask) {
		fprintf(stderr, "This device does not support resource domain / accelerated verbs.\n");
		return NULL;
	}

	/* Allocate resource domain */
	res_domain_attr.comp_mask = IBV_EXP_RES_DOMAIN_THREAD_MODEL | IBV_EXP_RES_DOMAIN_MSG_MODEL;
	res_domain_attr.thread_model = IBV_EXP_THREAD_SINGLE;
	if (user_param->tst == BW)
		res_domain_attr.msg_model = IBV_EXP_MSG_HIGH_BW;
	else if (user_param->tst == LAT)
		res_domain_attr.msg_model = IBV_EXP_MSG_LOW_LATENCY;
	else
		res_domain_attr.msg_model = IBV_EXP_MSG_DEFAULT;

	return ibv_exp_create_res_domain(ctx->context, &res_domain_attr);
}
#endif

/******************************************************************************
 *
 ******************************************************************************/
int create_single_mr(struct pingpong_context *ctx, struct perftest_parameters *user_param, int qp_index)
{
	int i;
	int flags = IBV_ACCESS_LOCAL_WRITE;

	#ifdef HAVE_VERBS_EXP
	struct ibv_exp_reg_mr_in reg_mr_exp_in;
	uint64_t exp_flags = IBV_EXP_ACCESS_LOCAL_WRITE;
	#endif

	/* ODP */
	#if defined HAVE_EX_ODP || defined HAVE_EXP_ODP
	if (user_param->use_odp) {
		if ( !check_odp_support(ctx) )
			return FAILURE;

		/* ODP does not support contig pages */
		ctx->is_contig_supported = FAILURE;
		#ifdef HAVE_EX_ODP
		flags |= IBV_ACCESS_ON_DEMAND;
		#elif defined HAVE_EXP_ODP
		exp_flags |= IBV_EXP_ACCESS_ON_DEMAND;
		#endif
	}
	#endif

	#ifdef HAVE_CUDA
	if (user_param->use_cuda) {
		ctx->is_contig_supported = FAILURE;
		if(pp_init_gpu(ctx, ctx->buff_size)) {
			fprintf(stderr, "Couldn't allocate work buf.\n");
			return FAILURE;
		}
	}
	#endif

	/* if (user_param->mmap_file != NULL) { */
		/* ctx->buf = memalign(user_param->cycle_buffer, ctx->buff_size); */
		/* if (pp_init_mmap(ctx, ctx->buff_size, user_param->mmap_file, */
					/* user_param->mmap_offset)) */
		/* { */
			/* fprintf(stderr, "Couldn't allocate work buf.\n"); */
			/* return FAILURE; */
		/* } */
	/* } else { */

	/* Allocating buffer for data, in case driver not support contig pages. */
	if (ctx->is_contig_supported == FAILURE) {
		if (user_param->use_hugepages) {
			if (alloc_hugepage_region(ctx) != SUCCESS){
				fprintf(stderr, "Failed to allocate hugepage region.\n");
				return FAILURE;
			}
			memset(ctx->buf[qp_index], 0, ctx->buff_size);
		} else if (ctx->is_contig_supported == FAILURE) {
			ctx->buf[qp_index] = memalign(user_param->cycle_buffer, ctx->buff_size);
		}
		if (!ctx->buf[qp_index]) {
			fprintf(stderr, "Couldn't allocate work buf.\n");
			return FAILURE;
		}

		memset(ctx->buf[qp_index], 0, ctx->buff_size);
	} else {
		ctx->buf[qp_index] = NULL;
		#ifdef HAVE_VERBS_EXP
		exp_flags |= IBV_EXP_ACCESS_ALLOCATE_MR;
		#else
		flags |= (1 << 5);
		#endif
	}
	/* } */

	if (user_param->verb == WRITE) {
		flags |= IBV_ACCESS_REMOTE_WRITE;
		#ifdef HAVE_VERBS_EXP
		exp_flags |= IBV_EXP_ACCESS_REMOTE_WRITE;
		#endif
	} else if (user_param->verb == READ) {
		flags |= IBV_ACCESS_REMOTE_READ;
		#ifdef HAVE_VERBS_EXP
		exp_flags |= IBV_EXP_ACCESS_REMOTE_READ;
		#endif
		if (user_param->transport_type == IBV_TRANSPORT_IWARP)
			flags |= IBV_ACCESS_REMOTE_WRITE;
		#ifdef HAVE_VERBS_EXP
		exp_flags |= IBV_EXP_ACCESS_REMOTE_WRITE;
		#endif
	} else if (user_param->verb == ATOMIC) {
		flags |= IBV_ACCESS_REMOTE_ATOMIC;
		#ifdef HAVE_VERBS_EXP
		exp_flags |= IBV_EXP_ACCESS_REMOTE_ATOMIC;
		#endif
	}

	/* Allocating Memory region and assigning our buffer to it. */
	#ifdef HAVE_VERBS_EXP
	if (ctx->is_contig_supported == SUCCESS || user_param->use_odp) {
		reg_mr_exp_in.pd = ctx->pd;
		reg_mr_exp_in.addr = ctx->buf[qp_index];
		reg_mr_exp_in.length = ctx->buff_size;
		reg_mr_exp_in.exp_access = exp_flags;
		reg_mr_exp_in.comp_mask = 0;

		ctx->mr[qp_index] = ibv_exp_reg_mr(&reg_mr_exp_in);
	}
	else
		ctx->mr[qp_index] = ibv_reg_mr(ctx->pd, ctx->buf[qp_index], ctx->buff_size, flags);
	#else
	ctx->mr[qp_index] = ibv_reg_mr(ctx->pd, ctx->buf[qp_index], ctx->buff_size, flags);
	#endif

	if (!ctx->mr[qp_index]) {
		fprintf(stderr, "Couldn't allocate MR\n");
		return FAILURE;
	}

	if (ctx->is_contig_supported == SUCCESS)
		ctx->buf[qp_index] = ctx->mr[qp_index]->addr;


	/* Initialize buffer with random numbers except in WRITE_LAT test that it 0's */
	if (!user_param->use_cuda) {
		srand(time(NULL));
		if (user_param->verb == WRITE && user_param->tst == LAT) {
			memset(ctx->buf[qp_index], 0, ctx->buff_size);
		} else {
			for (i = 0; i < ctx->buff_size; i++) {
				((char*)ctx->buf[qp_index])[i] = (char)rand();
			}
		}
	}

	return SUCCESS;
}

/******************************************************************************
 *
 ******************************************************************************/
int create_mr(struct pingpong_context *ctx, struct perftest_parameters *user_param)
{
	int i;

	/* create first MR */
	if (create_single_mr(ctx, user_param, 0)) {
		fprintf(stderr, "failed to create mr\n");
		return 1;
	}

	/* create the rest if needed, or copy the first one */
	for (i = 1; i < user_param->num_of_qps; i++) {
		if (user_param->mr_per_qp) {
			if (create_single_mr(ctx, user_param, i)) {
				fprintf(stderr, "failed to create mr\n");
				return 1;
			}
		} else {
			ALLOCATE(ctx->mr[i], struct ibv_mr, 1);
			memset(ctx->mr[i], 0, sizeof(struct ibv_mr));
			ctx->mr[i] = ctx->mr[0];
			ctx->buf[i] = ctx->buf[0] + (i*BUFF_SIZE(ctx->size, ctx->cycle_buffer));
		}
	}

	return 0;
}

/******************************************************************************
 *
 ******************************************************************************/
#define HUGEPAGE_ALIGN (2*1024*1024)
#define SHMAT_ADDR (void *)(0x0UL)
#define SHMAT_FLAGS (0)
#define SHM_HUGETLB (2048) // !

int alloc_hugepage_region (struct pingpong_context *ctx)
{
    int buf_size;
    int alignment = (((ctx->cycle_buffer + HUGEPAGE_ALIGN -1) / HUGEPAGE_ALIGN) * HUGEPAGE_ALIGN);
    buf_size = (((ctx->buff_size + alignment -1 ) / alignment ) * alignment);

    /* create hugepage shared region */
    ctx->huge_shmid = shmget(IPC_PRIVATE, buf_size,
                        SHM_HUGETLB | IPC_CREAT /* | IPC_R | IPC_W */); // !
    if (ctx->huge_shmid < 0) {
        fprintf(stderr, "Failed to allocate hugepages. Please configure hugepages\n");
        return FAILURE;
    }

    /* attach shared memory */
    ctx->buf = (void *) shmat(ctx->huge_shmid, SHMAT_ADDR, SHMAT_FLAGS);
    if (ctx->buf == (void *) -1) {
	fprintf(stderr, "Failed to attach shared memory region\n");
	return FAILURE;
    }

    /* Mark shmem for removal */
    if (shmctl(ctx->huge_shmid, IPC_RMID, 0) != 0) {
	fprintf(stderr, "Failed to mark shm for removal\n");
	return FAILURE;
    }

     return SUCCESS;
}

int verify_params_with_device_context(struct ibv_context *context,
				      struct perftest_parameters *user_param)
{
	if(user_param->use_event) {
		if(user_param->eq_num > context->num_comp_vectors) {
			fprintf(stderr, " Completion vector specified is invalid\n");
			fprintf(stderr, " Max completion vector = %d\n",
				context->num_comp_vectors - 1);
			return FAILURE;
		}
	}

	return SUCCESS;
}

#if defined HAVE_OOO_ATTR || defined HAVE_EXP_OOO_ATTR
static int verify_ooo_settings(struct pingpong_context *ctx,
			       struct perftest_parameters *user_param)
{
	#ifdef HAVE_OOO_ATTR
	struct ibv_device_attr_ex attr = { };
	if (ibv_query_device_ex(ctx->context, NULL, &attr))
	#elif HAVE_EXP_OOO_ATTR
	struct ibv_exp_device_attr attr = { };
	attr.comp_mask = IBV_EXP_DEVICE_ATTR_RESERVED - 1;
	if (ibv_exp_query_device(ctx->context, &attr))
	#endif
		return FAILURE;

	if (user_param->connection_type == RC) {
		if (attr.ooo_caps.rc_caps == 0) {
			fprintf(stderr, " OOO unsupported by HCA on RC QP\n");
			return FAILURE;
		} else {
			return SUCCESS;
		}
	} else if (user_param->connection_type == XRC) {
		if (attr.ooo_caps.xrc_caps == 0) {
			fprintf(stderr, " OOO unsupported by HCA on XRC QP\n");
			return FAILURE;
		} else {
			return SUCCESS;
		}
	} else if (user_param->connection_type == UD) {
		if (attr.ooo_caps.ud_caps == 0) {
			fprintf(stderr, " OOO unsupported by HCA on UD QP\n");
			return FAILURE;
		} else {
			return SUCCESS;
		}

	#if HAVE_OOO_ATTR
	} else if (user_param->connection_type == UC) {
		if (attr.ooo_caps.uc_caps == 0) {
			fprintf(stderr, " OOO unsupported by HCA on UC QP\n");
			return FAILURE;
		} else {
			return SUCCESS;
		}
	#elif HAVE_EXP_OOO_ATTR
	} else if (user_param->connection_type == DC) {
		if (attr.ooo_caps.dc_caps == 0) {
			fprintf(stderr, " OOO unsupported by HCA on DC QP\n");
			return FAILURE;
		} else {
			return SUCCESS;
		}
	#endif
	} else {
		return FAILURE;
	}
}
#endif
int ctx_init(struct pingpong_context *ctx, struct perftest_parameters *user_param)
{
	int i;
	int num_of_qps = user_param->num_of_qps / 2;

	ctx->is_contig_supported = FAILURE;

	#ifdef HAVE_ACCL_VERBS
	enum ibv_exp_query_intf_status intf_status;
	struct ibv_exp_query_intf_params intf_params;
	#endif

	#ifdef HAVE_VERBS_EXP
	struct ibv_exp_device_attr dattr;
	memset(&dattr, 0, sizeof(dattr));
	get_verbs_pointers(ctx);
	#endif

	#if defined HAVE_OOO_ATTR || defined HAVE_EXP_OOO_ATTR
	if (user_param->use_ooo) {
		if (verify_ooo_settings(ctx, user_param) != SUCCESS) {
			fprintf(stderr, "Incompatible OOO settings\n");
			return FAILURE;
		}
	}
	#endif

	#ifdef HAVE_VERBS_EXP
	ctx->is_contig_supported = check_for_contig_pages_support(ctx->context);
	#endif

	if (user_param->use_hugepages)
		ctx->is_contig_supported = FAILURE;

	/* Allocating an event channel if requested. */
	if (user_param->use_event) {
		ctx->channel = ibv_create_comp_channel(ctx->context);
		if (!ctx->channel) {
			fprintf(stderr, "Couldn't create completion channel\n");
			return FAILURE;
		}
	}

	/* Allocating the Protection domain. */
	ctx->pd = ibv_alloc_pd(ctx->context);
	if (!ctx->pd) {
		fprintf(stderr, "Couldn't allocate PD\n");
		return FAILURE;
	}

	#ifdef HAVE_ACCL_VERBS
	if (user_param->use_res_domain) {
		ctx->res_domain = create_res_domain(ctx, user_param);
		if (!ctx->res_domain) {
			fprintf(stderr, "Couldn't create resource domain\n");
			return FAILURE;
		}
	}
	#endif

	if (create_mr(ctx, user_param)) {
		fprintf(stderr, "Failed to create MR\n");
		return FAILURE;
	}

	if (create_cqs(ctx, user_param)) {
		fprintf(stderr, "Failed to create CQs\n");
		return FAILURE;

	}

	#ifdef HAVE_XRCD
	if (user_param->use_xrc) {

		if (ctx_xrcd_create(ctx,user_param)) {
			fprintf(stderr, "Couldn't create XRC resources\n");
			return FAILURE;
		}

		if (ctx_xrc_srq_create(ctx,user_param)) {
			fprintf(stderr, "Couldn't create SRQ XRC resources\n");
			return FAILURE;
		}
	}
	#endif

	if (user_param->use_srq && !user_param->use_xrc && (user_param->tst == LAT ||
				user_param->machine == SERVER || user_param->duplex == ON)) {

		struct ibv_srq_init_attr attr = {
			.attr = {
				/* when using sreq, rx_depth sets the max_wr */
				.max_wr = user_param->rx_depth,
				.max_sge = 1
			}
		};

		ctx->srq = ibv_create_srq(ctx->pd, &attr);
		if (!ctx->srq) {
			fprintf(stderr, "Couldn't create SRQ\n");
			return FAILURE;
		}
	}

	#ifdef HAVE_RSS_EXP
	if (user_param->use_rss) {
		struct ibv_exp_device_attr attr;

		attr.comp_mask = IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS |
			IBV_EXP_DEVICE_ATTR_RSS_TBL_SZ;
		if (ibv_exp_query_device(ctx->context, &attr)) {
			fprintf(stderr, "Experimental ibv_exp_query_device.\n");
			exit(1);
		}

		if (!((attr.exp_device_cap_flags & IBV_EXP_DEVICE_QPG) &&
					(attr.exp_device_cap_flags & IBV_EXP_DEVICE_UD_RSS) &&
					(attr.comp_mask & IBV_EXP_DEVICE_ATTR_RSS_TBL_SZ) &&
					(attr.max_rss_tbl_sz > 0))) {
			fprintf(stderr, "RSS not supported .\n");
			exit(1);
		}

		/* num of qps includes the parent */
		if (user_param->num_of_qps > attr.max_rss_tbl_sz + 1) {
			fprintf(stderr, "RSS limit is %d .\n",
					attr.max_rss_tbl_sz);
			exit(1);
		}
	}
	#endif

	for (i=0; i < user_param->num_of_qps; i++) {

		if (create_qp_main(ctx, user_param, i, num_of_qps)) {
			fprintf(stderr, "Failed to create QP.\n");
			return FAILURE;
		}
		modify_qp_to_init(ctx, user_param, i, num_of_qps);
		#ifdef HAVE_ACCL_VERBS
		if (user_param->verb_type == ACCL_INTF) {
			memset(&intf_params, 0, sizeof(intf_params));
			intf_params.intf_scope = IBV_EXP_INTF_GLOBAL;
			intf_params.intf = IBV_EXP_INTF_QP_BURST;
			intf_params.obj = ctx->qp[i];
			ctx->qp_burst_family[i] = ibv_exp_query_intf(ctx->context, &intf_params, &intf_status);
			if (!ctx->qp_burst_family[i]) {
				fprintf(stderr, "Couldn't get QP burst family.\n");
				return FAILURE;
			}
		}
		#endif
	}

	return SUCCESS;
}

int modify_qp_to_init(struct pingpong_context *ctx,
		struct perftest_parameters *user_param, int qp_index, int num_of_qps)
{
	uint64_t init_flag = 0;

	#ifdef HAVE_RSS_EXP
	if (qp_index == 0 && user_param->use_rss) {
		init_flag = IBV_EXP_QP_GROUP_RSS;
	}
	else
	#endif
		init_flag = 0;

	if(user_param->connection_type == DC) {
		if ( !((!(user_param->duplex || user_param->tst == LAT) && (user_param->machine == SERVER) )
					|| ((user_param->duplex || user_param->tst == LAT) && (qp_index >= num_of_qps)))) {
			#ifdef HAVE_DC
			if (ctx_modify_dc_qp_to_init(ctx->qp[qp_index],user_param)) {
				fprintf(stderr," Unable to create DC QP.\n");
				return FAILURE;
			}
			#endif
		}
	} else {
		if (ctx_modify_qp_to_init(ctx->qp[qp_index],user_param,init_flag)) {
			fprintf(stderr, "Failed to modify QP to INIT\n");
			return FAILURE;
		}
	}

	return SUCCESS;
}

/******************************************************************************
 *
 ******************************************************************************/
int create_reg_qp_main(struct pingpong_context *ctx,
		       struct perftest_parameters *user_param,
		       int i, int num_of_qps)
{
	if (user_param->use_xrc) {
		#ifdef HAVE_XRCD
		ctx->qp[i] = ctx_xrc_qp_create(ctx, user_param, i);
		#endif
	} else {
		ctx->qp[i] = ctx_qp_create(ctx, user_param);
	}

	if (ctx->qp[i] == NULL) {
		fprintf(stderr, "Unable to create QP.\n");
		return FAILURE;
	}

	return SUCCESS;
}


int create_exp_qp_main(struct pingpong_context *ctx,
		struct perftest_parameters *user_param, int i, int num_of_qps)
{
	int is_dc_tgt_query = 0;
	is_dc_tgt_query |= !(user_param->duplex || user_param->tst == LAT) && (user_param->machine == SERVER);
	is_dc_tgt_query |= (user_param->duplex || user_param->tst == LAT) && (i >= num_of_qps);
	is_dc_tgt_query &= user_param->connection_type == DC;

	if (is_dc_tgt_query) {
		#ifdef HAVE_DC
		if(ctx_dc_tgt_create(ctx,user_param,i)) {
			return FAILURE;
		}
		/* in order to not change anything in the test */
		ALLOCATE(ctx->qp[i], struct ibv_qp, 1);
		ctx->qp[i]->qp_num = ctx->dct[i]->dct_num;
		#else
		fprintf(stderr, "DC is not supported.\n");
		return FAILURE;
		#endif
	} else if (user_param->use_rss && user_param->connection_type == RawEth) {
		#ifdef HAVE_RSS_EXP
		ctx->qp[i] = ctx_rss_eth_qp_create(ctx, user_param, i);
		#endif
		if (ctx->qp[i] == NULL) {
			fprintf(stderr," Unable to create RSS QP.\n");
			return FAILURE;
		}
	} else if (user_param->use_xrc) {
		#ifdef HAVE_XRCD
		ctx->qp[i] = ctx_xrc_qp_create(ctx, user_param, i);
		#endif
		if (ctx->qp[i] == NULL) {
			fprintf(stderr," Unable to create XRC QP.\n");
			return FAILURE;
		}
	} else {
		#ifdef HAVE_VERBS_EXP
		ctx->qp[i] = ctx_exp_qp_create(ctx, user_param, i);
		#endif
		if (ctx->qp[i] == NULL) {
			fprintf(stderr, " Unable to create exp QP.\n");
			return FAILURE;
		}
	}
	return SUCCESS;
}

int create_qp_main(struct pingpong_context *ctx,
		struct perftest_parameters *user_param, int i, int num_of_qps)
{
	int ret;
	#ifdef HAVE_VERBS_EXP
	int query;

	/* flag that indicates that we are going to use exp QP */
	query = (user_param->connection_type == DC);
	query |= (user_param->use_rss && user_param->connection_type == RawEth);
	query |= user_param->use_xrc;
	query |= user_param->inline_recv_size != 0;
	query |= user_param->masked_atomics;
	query |= user_param->verb_type != NORMAL_INTF;
	query |= user_param->use_res_domain;
	query |= user_param->use_exp;

	if (query == 1)
		user_param->is_exp_qp = 1;

	if (user_param->is_exp_qp)
		ret = create_exp_qp_main(ctx, user_param, i, num_of_qps);
	else
	#endif
		ret = create_reg_qp_main(ctx, user_param, i, num_of_qps);

	return ret;
}

#ifdef HAVE_VERBS_EXP
#ifdef HAVE_SCATTER_FCS
static int check_scatter_fcs_support(struct pingpong_context *ctx,
		struct perftest_parameters *user_param)
{
	struct ibv_exp_device_attr dev_attr;

	memset(&dev_attr, 0, sizeof(dev_attr));
	dev_attr.comp_mask = IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS;
	dev_attr.exp_device_cap_flags = IBV_EXP_DEVICE_SCATTER_FCS;
	if (ibv_exp_query_device(ctx->context, &dev_attr)) {
		fprintf(stderr, "ibv_exp_query_device failed\n");
		return 1;
	}

	return MASK_IS_SET(IBV_EXP_DEVICE_SCATTER_FCS, dev_attr.exp_device_cap_flags);
}
#endif
#endif

#ifdef HAVE_VERBS_EXP
struct ibv_qp* ctx_exp_qp_create(struct pingpong_context *ctx,
		struct perftest_parameters *user_param, int qp_index)
{
	struct ibv_exp_qp_init_attr attr;
	struct ibv_qp* qp = NULL;
	struct ibv_exp_device_attr dev_attr;

	memset(&attr, 0, sizeof(attr));
	memset(&dev_attr, 0, sizeof(dev_attr));
	attr.comp_mask = IBV_EXP_QP_INIT_ATTR_PD | IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS;

	if (user_param->masked_atomics) {
		#ifdef HAVE_MASKED_ATOMICS
		dev_attr.comp_mask = IBV_EXP_DEVICE_ATTR_EXT_ATOMIC_ARGS | IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS;

		if (ibv_exp_query_device(ctx->context, &dev_attr)) {
			fprintf(stderr, "ibv_exp_query_device failed\n");
			return NULL;
		}

		attr.max_atomic_arg = pow(2,dev_attr.ext_atom.log_max_atomic_inline);
		attr.exp_create_flags |= IBV_EXP_QP_CREATE_ATOMIC_BE_REPLY;
		attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG;
		#else
		fprintf(stderr, "Can't create masked atomic QP.\n");
		return NULL;
		#endif
	}

	if (user_param->inline_recv_size) {

		if (check_inline_recv_support(ctx, user_param)) {
			fprintf(stderr, "Failed to create QP with inline receive.\n");
			return NULL;
		}

		attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_INL_RECV;
		attr.max_inl_recv = user_param->inline_recv_size;
		attr.sq_sig_all = (1 == user_param->cq_mod) ? 1 : 0; //inline receive on requestor must QP's sq_sig_all to be applied
	}

	#ifdef HAVE_ACCL_VERBS
	if (user_param->use_res_domain) {
		attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_RES_DOMAIN;
		attr.res_domain = ctx->res_domain;
	}
	#endif

	attr.pd = ctx->pd;
	attr.send_cq = ctx->send_cq;
	attr.recv_cq = (user_param->verb == SEND) ? ctx->recv_cq : ctx->send_cq;
	attr.cap.max_send_wr = user_param->tx_depth;
	attr.cap.max_send_sge = MAX_SEND_SGE;
	attr.cap.max_inline_data = user_param->inline_size;

	if (user_param->use_srq && (user_param->tst == LAT || user_param->machine == SERVER || user_param->duplex == ON)) {
		attr.srq = ctx->srq;
		attr.cap.max_recv_wr = 0;
		attr.cap.max_recv_sge = 0;
	} else {
		attr.srq = NULL;
		attr.cap.max_recv_wr = user_param->rx_depth;
		attr.cap.max_recv_sge = MAX_RECV_SGE;
	}

	switch (user_param->connection_type) {

		case RC : attr.qp_type = IBV_QPT_RC; break;
		case UC : attr.qp_type = IBV_QPT_UC; break;
		case UD : attr.qp_type = IBV_QPT_UD; break;
		#ifdef HAVE_RAW_ETH
		case RawEth : attr.qp_type = IBV_QPT_RAW_PACKET; break;
		#endif
		case DC : attr.qp_type = IBV_EXP_QPT_DC_INI; break;
		default: fprintf(stderr, "Unknown connection type \n");
			  return NULL;
	}

	#ifdef HAVE_SCATTER_FCS
	if (!user_param->disable_fcs && (user_param->connection_type == RawEth)) {
		if(check_scatter_fcs_support(ctx, user_param)) {
			attr.exp_create_flags |= IBV_EXP_QP_CREATE_SCATTER_FCS;
		}
	}
	#endif

	qp = ibv_exp_create_qp(ctx->context, &attr);
	if (!qp)
		return NULL;

	if (user_param->inline_recv_size > attr.max_inl_recv)
		printf(" Actual inline-receive(%d) < requested inline-receive(%d)\n",
				attr.max_inl_recv, user_param->inline_recv_size);

	return qp;
}
#endif

struct ibv_qp* ctx_qp_create(struct pingpong_context *ctx,
		struct perftest_parameters *user_param)
{
	struct ibv_qp_init_attr attr;
	struct ibv_qp* qp = NULL;

	memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
	attr.send_cq = ctx->send_cq;
	attr.recv_cq = (user_param->verb == SEND) ? ctx->recv_cq : ctx->send_cq;
	attr.cap.max_send_wr = user_param->tx_depth;
	attr.cap.max_send_sge = MAX_SEND_SGE;
	attr.cap.max_inline_data = user_param->inline_size;

	if (user_param->use_srq && (user_param->tst == LAT || user_param->machine == SERVER || user_param->duplex == ON)) {
		attr.srq = ctx->srq;
		attr.cap.max_recv_wr = 0;
		attr.cap.max_recv_sge = 0;
	} else {
		attr.srq = NULL;
		attr.cap.max_recv_wr = user_param->rx_depth;
		attr.cap.max_recv_sge = MAX_RECV_SGE;
	}

	switch (user_param->connection_type) {

		case RC : attr.qp_type = IBV_QPT_RC; break;
		case UC : attr.qp_type = IBV_QPT_UC; break;
		case UD : attr.qp_type = IBV_QPT_UD; break;
		#ifdef HAVE_RAW_ETH
		case RawEth : attr.qp_type = IBV_QPT_RAW_PACKET; break;
		#endif
		default: fprintf(stderr, "Unknown connection type \n");
			  return NULL;
	}

	qp = ibv_create_qp(ctx->pd,&attr);
	return qp;
}


#ifdef HAVE_MASKED_ATOMICS
/******************************************************************************
 *
 ******************************************************************************/
struct ibv_qp* ctx_atomic_qp_create(struct pingpong_context *ctx,
		struct perftest_parameters *user_param)
{
	struct ibv_exp_qp_init_attr attr;
	struct ibv_qp* qp = NULL;
	struct ibv_exp_device_attr dev_attr;

	memset(&dev_attr, 0, sizeof(dev_attr));

	dev_attr.comp_mask |= IBV_EXP_DEVICE_ATTR_EXT_ATOMIC_ARGS | IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS;

	if (ibv_exp_query_device(ctx->context, &dev_attr)) {
		fprintf(stderr, "ibv_exp_query_device failed\n");
		exit(1);
	}

	memset(&attr, 0, sizeof(struct ibv_exp_qp_init_attr));
	attr.pd = ctx->pd;
	attr.send_cq = ctx->send_cq;
	attr.recv_cq = (user_param->verb == SEND) ? ctx->recv_cq : ctx->send_cq;
	attr.cap.max_send_wr = user_param->tx_depth;
	attr.cap.max_send_sge = MAX_SEND_SGE;
	attr.cap.max_inline_data = user_param->inline_size;
	attr.max_atomic_arg = pow(2,dev_attr.ext_atom.log_max_atomic_inline);
	attr.exp_create_flags = IBV_EXP_QP_CREATE_ATOMIC_BE_REPLY;
	attr.comp_mask = IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS | IBV_EXP_QP_INIT_ATTR_PD;
	attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG;

	if (user_param->use_srq && (user_param->tst == LAT || user_param->machine == SERVER || user_param->duplex == ON)) {
		attr.srq = ctx->srq;
		attr.cap.max_recv_wr = 0;
		attr.cap.max_recv_sge = 0;
	} else {
		attr.srq = NULL;
		attr.cap.max_recv_wr = user_param->rx_depth;
		attr.cap.max_recv_sge = MAX_RECV_SGE;
	}

	switch (user_param->connection_type) {

		case RC : attr.qp_type = IBV_QPT_RC; break;
		case UC : attr.qp_type = IBV_QPT_UC; break;
		case UD : attr.qp_type = IBV_QPT_UD; break;
		#ifdef HAVE_XRCD
		case XRC : attr.qp_type = IBV_QPT_XRC; break;
		#endif
		#ifdef HAVE_RAW_ETH
		case RawEth : attr.qp_type = IBV_QPT_RAW_PACKET; break;
		#endif
		default: fprintf(stderr, "Unknown connection type \n");
			  return NULL;
	}

	qp = ibv_exp_create_qp(ctx->context, &attr);

	return qp;
}
#endif

#ifdef HAVE_DC
/******************************************************************************
 *
 ******************************************************************************/
int ctx_modify_dc_qp_to_init(struct ibv_qp *qp,struct perftest_parameters *user_param)
{
	int num_of_qps = user_param->num_of_qps;
	int num_of_qps_per_port = user_param->num_of_qps / 2;
	int err;
	uint64_t flags;

	#ifdef HAVE_VERBS_EXP
	struct ibv_exp_qp_attr attr;
	memset(&attr, 0, sizeof(struct ibv_exp_qp_attr));
	flags = IBV_EXP_QP_STATE | IBV_EXP_QP_PKEY_INDEX | IBV_EXP_QP_PORT;
	#else
	struct ibv_qp_attr attr;
	memset(&attr, 0, sizeof(struct ibv_qp_attr));
	flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT;
	#endif

	static int portindex=0; /* for dual-port support */

	attr.qp_state = IBV_QPS_INIT;
	attr.pkey_index = user_param->pkey_index;
	attr.qp_access_flags = 0;
	attr.dct_key = user_param->dct_key;

	if (user_param->duplex || user_param->tst == LAT) {
		num_of_qps /= 2;
		num_of_qps_per_port = num_of_qps / 2;
	}

	if (user_param->dualport==ON) {
		if (portindex % num_of_qps < num_of_qps_per_port) {
			attr.port_num = user_param->ib_port;
			user_param->port_by_qp[portindex] = 0;
		} else {
			attr.port_num = user_param->ib_port2;
			user_param->port_by_qp[portindex] = 1;
		}
		portindex++;

	} else {

		attr.port_num = user_param->ib_port;
	}

	#ifdef HAVE_VERBS_EXP
	flags |= IBV_EXP_QP_DC_KEY;
	err = ibv_exp_modify_qp(qp,&attr,flags);
	#else
	flags |= IBV_QP_DC_KEY;
	attr.comp_mask = IBV_QP_ATTR_DCT_KEY;
	err = ibv_modify_qp(qp,&attr,flags);
	#endif

	if (err) {
		fprintf(stderr, "Failed to modify QP to INIT\n");
		return 1;
	}
	return 0;
}
#endif

/******************************************************************************
 *
 ******************************************************************************/
int ctx_modify_qp_to_init(struct ibv_qp *qp,struct perftest_parameters *user_param, uint64_t init_flag)
{
	int num_of_qps = user_param->num_of_qps;
	int num_of_qps_per_port = user_param->num_of_qps / 2;

	struct ibv_qp_attr attr;
	int flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT;

	#ifdef HAVE_VERBS_EXP
	struct ibv_exp_qp_attr exp_attr;
	uint64_t exp_flags = 0;
	#endif

	static int portindex=0; /* for dual-port support */
	int ret = 0;

	memset(&attr, 0, sizeof(struct ibv_qp_attr));
	attr.qp_state = IBV_QPS_INIT;
	attr.pkey_index = user_param->pkey_index;

	#ifdef HAVE_VERBS_EXP
	memset(&exp_attr, 0, sizeof(struct ibv_exp_qp_attr));
	exp_attr.qp_state = attr.qp_state;
	exp_attr.pkey_index = attr.pkey_index;
	#endif

	if ( user_param->use_xrc && (user_param->duplex || user_param->tst == LAT)) {
		num_of_qps /= 2;
		num_of_qps_per_port = num_of_qps / 2;
	}

	if (user_param->dualport==ON) {
		if (portindex % num_of_qps < num_of_qps_per_port) {
			attr.port_num = user_param->ib_port;
			user_param->port_by_qp[portindex] = 0;
		} else {
			attr.port_num = user_param->ib_port2;
			user_param->port_by_qp[portindex] = 1;
		}
		portindex++;

	} else {

		attr.port_num = user_param->ib_port;
	}

	#ifdef HAVE_VERBS_EXP
	exp_attr.port_num = attr.port_num;
	#endif

	if (user_param->connection_type == RawEth) {
		flags = IBV_QP_STATE | IBV_QP_PORT;
		#ifdef HAVE_VERBS_EXP
		exp_flags = init_flag | IBV_EXP_QP_STATE | IBV_EXP_QP_PORT;
		#endif

	} else if (user_param->connection_type == UD) {
		attr.qkey = DEFF_QKEY;
		flags |= IBV_QP_QKEY;

	} else {
		switch (user_param->verb) {
			case ATOMIC: attr.qp_access_flags = IBV_ACCESS_REMOTE_ATOMIC; break;
			case READ : attr.qp_access_flags = IBV_ACCESS_REMOTE_READ; break;
			case WRITE : attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE; break;
			case SEND : attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE;
		}
		flags |= IBV_QP_ACCESS_FLAGS;
	}

	#ifdef HAVE_MASKED_ATOMICS
	if (user_param->masked_atomics) {
		exp_attr.qp_access_flags = IBV_ACCESS_REMOTE_ATOMIC | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ;
		exp_flags = IBV_EXP_QP_STATE | IBV_EXP_QP_PKEY_INDEX | IBV_EXP_QP_PORT | IBV_EXP_QP_ACCESS_FLAGS;
	}
	#endif

	#ifdef HAVE_VERBS_EXP
	if ( (init_flag != 0 && user_param->use_rss) || user_param->masked_atomics ) {
		ret = ibv_exp_modify_qp(qp,&exp_attr,exp_flags);
	}
	else
	#endif
		ret = ibv_modify_qp(qp,&attr,flags);

	if (ret) {
		fprintf(stderr, "Failed to modify QP to INIT, ret=%d\n",ret);
		return 1;
	}
	return 0;
}

#ifdef HAVE_DC
/******************************************************************************
 *
 ******************************************************************************/
static int ctx_modify_dc_qp_to_rtr(struct ibv_qp *qp,
		struct ibv_exp_qp_attr *attr,
		struct perftest_parameters *user_param,
		struct pingpong_dest *dest,
		struct pingpong_dest *my_dest,
		int qpindex)
{
	int num_of_qps = user_param->num_of_qps;
	int num_of_qps_per_port = user_param->num_of_qps / 2;

	int flags = IBV_EXP_QP_STATE | IBV_EXP_QP_PATH_MTU | IBV_EXP_QP_AV;
	attr->qp_state = IBV_QPS_RTR;
	attr->ah_attr.src_path_bits = 0;

	/* in DC with bidirectional,
	 * there are send qps and recv qps. the actual number of send/recv qps
	 * is num_of_qps / 2.
	 */
	if (user_param->duplex || user_param->tst == LAT) {
		num_of_qps /= 2;
		num_of_qps_per_port = num_of_qps / 2;
	}

	/* first half of qps are for ib_port and second half are for ib_port2
	 * in DC with bidirectional, the first half of qps are DC_INI qps and
	 * the second half are DC_TGT qps. the first half of the send/recv qps
	 * are for ib_port1 and the second half are for ib_port2
	 */
	if (user_param->dualport == ON && (qpindex % num_of_qps >= num_of_qps_per_port))
		attr->ah_attr.port_num = user_param->ib_port2;
	else
		attr->ah_attr.port_num = user_param->ib_port;

	attr->ah_attr.dlid = (user_param->dlid) ? user_param->dlid : dest->lid;
	if (user_param->gid_index == DEF_GID_INDEX) {

		attr->ah_attr.is_global = 0;
		attr->ah_attr.sl = user_param->sl;

	} else {
		attr->ah_attr.is_global = 1;
		attr->ah_attr.grh.dgid = dest->gid;
		attr->ah_attr.grh.sgid_index = user_param->gid_index;
		attr->ah_attr.grh.hop_limit = 1;
		attr->ah_attr.grh.traffic_class = user_param->traffic_class;
		attr->ah_attr.sl = 0;
	}

	attr->max_dest_rd_atomic = 0;
	attr->min_rnr_timer = 0;
	attr->dct_key = user_param->dct_key;
	attr->path_mtu = user_param->curr_mtu;

	return ibv_exp_modify_qp(qp,attr,flags);
}
#endif

/******************************************************************************
 *
 ******************************************************************************/
static int ctx_modify_qp_to_rtr(struct ibv_qp *qp,
		struct ibv_qp_attr *attr,
		struct perftest_parameters *user_param,
		struct pingpong_dest *dest,
		struct pingpong_dest *my_dest,
		int qpindex)
{
	int num_of_qps = user_param->num_of_qps;
	int num_of_qps_per_port = user_param->num_of_qps / 2;

	int flags = IBV_QP_STATE;
	int ooo_flags = 0;

	attr->qp_state = IBV_QPS_RTR;
	attr->ah_attr.src_path_bits = 0;

	/* in xrc with bidirectional,
	 * there are send qps and recv qps. the actual number of send/recv qps
	 * is num_of_qps / 2.
	 */
	if ( user_param->use_xrc && (user_param->duplex || user_param->tst == LAT)) {
		num_of_qps /= 2;
		num_of_qps_per_port = num_of_qps / 2;
	}

	/* first half of qps are for ib_port and second half are for ib_port2
	 * in xrc with bidirectional, the first half of qps are xrc_send qps and
	 * the second half are xrc_recv qps. the first half of the send/recv qps
	 * are for ib_port1 and the second half are for ib_port2
	 */
	if (user_param->dualport == ON && (qpindex % num_of_qps >= num_of_qps_per_port))
		attr->ah_attr.port_num = user_param->ib_port2;
	else
		attr->ah_attr.port_num = user_param->ib_port;

	if (user_param->connection_type != RawEth) {

		attr->ah_attr.dlid = (user_param->dlid) ? user_param->dlid : dest->lid;
		attr->ah_attr.sl = user_param->sl;

		if (((attr->ah_attr.port_num == user_param->ib_port) && (user_param->gid_index == DEF_GID_INDEX))
				|| ((attr->ah_attr.port_num == user_param->ib_port2) && (user_param->gid_index2 == DEF_GID_INDEX) && user_param->dualport)) {

			attr->ah_attr.is_global = 0;
		} else {

			attr->ah_attr.is_global = 1;
			attr->ah_attr.grh.dgid = dest->gid;
			attr->ah_attr.grh.sgid_index = (attr->ah_attr.port_num == user_param->ib_port) ? user_param->gid_index : user_param->gid_index2;
			attr->ah_attr.grh.hop_limit = 0xFF;
			attr->ah_attr.grh.traffic_class = user_param->traffic_class;
		}
		if (user_param->connection_type != UD) {

			attr->path_mtu = user_param->curr_mtu;
			attr->dest_qp_num = dest->qpn;
			attr->rq_psn = dest->psn;

			flags |= (IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN);

			if (user_param->connection_type == RC || user_param->connection_type == XRC) {

				attr->max_dest_rd_atomic = my_dest->out_reads;
				attr->min_rnr_timer = 12;
				flags |= (IBV_QP_MIN_RNR_TIMER | IBV_QP_MAX_DEST_RD_ATOMIC);
			}
		}
	}
	else if (user_param->raw_qos) {
		attr->ah_attr.sl = user_param->sl;
		flags |= IBV_QP_AV;
	}

	#ifdef HAVE_OOO_ATTR
		ooo_flags |= IBV_QP_OOO_RW_DATA_PLACEMENT;
	#elif HAVE_EXP_OOO_ATTR
		ooo_flags |= IBV_EXP_QP_OOO_RW_DATA_PLACEMENT;
	#endif

	if (user_param->use_ooo)
		flags |= ooo_flags;
	return ibv_modify_qp(qp, attr, flags);
}

#ifdef HAVE_DC
/******************************************************************************
 *
 ******************************************************************************/
static int ctx_modify_dc_qp_to_rts(struct ibv_qp *qp,
		#ifdef HAVE_VERBS_EXP
		struct ibv_exp_qp_attr *attr,
		#else
		struct ibv_qp_attr_ex *attr,
		#endif
		struct perftest_parameters *user_param,
		struct pingpong_dest *dest,
		struct pingpong_dest *my_dest)
{

	#ifdef HAVE_VERBS_EXP
	int flags = IBV_EXP_QP_STATE | IBV_EXP_QP_TIMEOUT | IBV_EXP_QP_RETRY_CNT |
					 IBV_EXP_QP_RNR_RETRY | IBV_EXP_QP_MAX_QP_RD_ATOMIC;
	#else
	int flags = IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC;
	#endif

	attr->qp_state = IBV_QPS_RTS;

	attr->timeout = user_param->qp_timeout;
	attr->retry_cnt = 7;
	attr->rnr_retry = 7;
	attr->max_rd_atomic = dest->out_reads;

	#ifdef HAVE_VERBS_EXP
	return ibv_exp_modify_qp(qp,attr,flags);
	#else
	return ibv_modify_qp_ex(qp,attr,flags);
	#endif
}
#endif

/******************************************************************************
 *
 ******************************************************************************/
static int ctx_modify_qp_to_rts(struct ibv_qp *qp,
		void *_attr,
		struct perftest_parameters *user_param,
		struct pingpong_dest *dest,
		struct pingpong_dest *my_dest)
{
	#ifdef HAVE_PACKET_PACING_EXP
	uint64_t flags = IBV_QP_STATE;
	#else
	int flags = IBV_QP_STATE;
	#endif
	struct ibv_qp_attr *attr = (struct ibv_qp_attr*)_attr;

	attr->qp_state = IBV_QPS_RTS;

	if (user_param->connection_type != RawEth) {

		flags |= IBV_QP_SQ_PSN;
		attr->sq_psn = my_dest->psn;

		if (user_param->connection_type == RC || user_param->connection_type == XRC) {

			attr->timeout = user_param->qp_timeout;
			attr->retry_cnt = 7;
			attr->rnr_retry = 7;
			attr->max_rd_atomic = dest->out_reads;
			flags |= (IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC);
		}
	}

	#ifdef HAVE_PACKET_PACING_EXP
	if (user_param->rate_limit_type == PP_RATE_LIMIT) {
		((struct ibv_exp_qp_attr*)_attr)->rate_limit = user_param->rate_limit;
		flags |= IBV_EXP_QP_RATE_LIMIT;
		return ibv_exp_modify_qp(qp, (struct ibv_exp_qp_attr*)_attr, flags);
	}
	#elif defined(HAVE_PACKET_PACING)
	if (user_param->rate_limit_type == PP_RATE_LIMIT) {
		attr->rate_limit = user_param->rate_limit;
		flags |= IBV_QP_RATE_LIMIT;
	}
	#endif

	return ibv_modify_qp(qp, attr, flags);
}

/******************************************************************************
 *
 ******************************************************************************/
int ctx_connect(struct pingpong_context *ctx,
		struct pingpong_dest *dest,
		struct perftest_parameters *user_param,
		struct pingpong_dest *my_dest)
{
	int i;
	#ifdef HAVE_DC
	#ifdef HAVE_VERBS_EXP
	struct ibv_exp_qp_attr attr_ex;
	#else
	struct ibv_qp_attr_ex attr_ex;
	#endif
	#elif HAVE_PACKET_PACING_EXP
	struct ibv_exp_qp_attr attr_ex;
	#endif
	struct ibv_qp_attr attr;
	int xrc_offset = 0;

	if((user_param->use_xrc || user_param->connection_type == DC) && (user_param->duplex || user_param->tst == LAT)) {
		xrc_offset = user_param->num_of_qps / 2;
	}
	for (i=0; i < user_param->num_of_qps; i++) {

		if (user_param->connection_type == DC) {
			if ( ((!(user_param->duplex || user_param->tst == LAT) && (user_param->machine == SERVER) )
						|| ((user_param->duplex || user_param->tst == LAT) && (i >= user_param->num_of_qps/2)))) {
				continue;
			}
		}
		#if defined (HAVE_DC) || defined (HAVE_PACKET_PACING_EXP)
		memset(&attr_ex, 0, sizeof attr_ex);
		#endif
		memset(&attr, 0, sizeof attr);

		if (user_param->rate_limit_type == HW_RATE_LIMIT)
			attr.ah_attr.static_rate = user_param->valid_hw_rate_limit;

		#if defined (HAVE_PACKET_PACING_EXP) || defined (HAVE_PACKET_PACING)
		if (user_param->rate_limit_type == PP_RATE_LIMIT && (check_packet_pacing_support(ctx) == FAILURE)) {
			fprintf(stderr, "Packet Pacing isn't supported.\n");
			return FAILURE;
		}
		#endif

		if ((i >= xrc_offset) && (user_param->use_xrc || user_param->connection_type == DC) && (user_param->duplex || user_param->tst == LAT))
			xrc_offset = -1*xrc_offset;

		if(user_param->connection_type == DC) {
			#ifdef HAVE_DC
			if(ctx_modify_dc_qp_to_rtr(ctx->qp[i],&attr_ex,user_param,&dest[xrc_offset + i],&my_dest[i],i)) {
				fprintf(stderr, "Failed to modify QP %d to RTR\n",ctx->qp[i]->qp_num);
				return FAILURE;
			}
			#endif
		} else {
			if(ctx_modify_qp_to_rtr(ctx->qp[i], &attr, user_param, &dest[xrc_offset + i], &my_dest[i], i)) {
				fprintf(stderr, "Failed to modify QP %d to RTR\n",ctx->qp[i]->qp_num);
				return FAILURE;
			}
		}
		if (user_param->tst == LAT || user_param->machine == CLIENT || user_param->duplex) {
			if(user_param->connection_type == DC) {
				#ifdef HAVE_DC
				if(ctx_modify_dc_qp_to_rts(ctx->qp[i], &attr_ex, user_param, &dest[xrc_offset + i], &my_dest[i])) {
					fprintf(stderr, "Failed to modify QP to RTS\n");
					return FAILURE;
				}
				#endif
			} else {
				#ifdef HAVE_PACKET_PACING_EXP
				if (user_param->rate_limit_type == PP_RATE_LIMIT) {
					if(ctx_modify_qp_to_rts(ctx->qp[i], &attr_ex, user_param, &dest[xrc_offset + i], &my_dest[i])) {
						fprintf(stderr, "Failed to modify QP %x to RTS\n", ctx->qp[i]->qp_num);
						return FAILURE;
					}
				} else {
				#endif
					if(ctx_modify_qp_to_rts(ctx->qp[i], &attr, user_param, &dest[xrc_offset + i], &my_dest[i])) {
						fprintf(stderr, "Failed to modify QP to RTS\n");
						return FAILURE;
					}
				#ifdef HAVE_PACKET_PACING_EXP
				}
				#endif
			}
		}

		if ((user_param->connection_type == UD || user_param->connection_type == DC) &&
				(user_param->tst == LAT || user_param->machine == CLIENT || user_param->duplex)) {

			#ifdef HAVE_DC
			if(user_param->connection_type == DC)
				ctx->ah[i] = ibv_create_ah(ctx->pd,&(attr_ex.ah_attr));
			else
			#endif
				ctx->ah[i] = ibv_create_ah(ctx->pd,&(attr.ah_attr));


			if (!ctx->ah[i]) {
				fprintf(stderr, "Failed to create AH for UD\n");
				return FAILURE;
			}
		}

		if (user_param->rate_limit_type == HW_RATE_LIMIT) {
			struct ibv_qp_attr qp_attr;
			struct ibv_qp_init_attr init_attr;
			int err, qp_static_rate=0;

			memset(&qp_attr,0,sizeof(struct ibv_qp_attr));
			memset(&init_attr,0,sizeof(struct ibv_qp_init_attr));

			err = ibv_query_qp(ctx->qp[i], &qp_attr, IBV_QP_AV, &init_attr);
			if (err)
				fprintf(stderr, "ibv_query_qp failed to get ah_attr\n");
			else
				qp_static_rate = (int)(qp_attr.ah_attr.static_rate);

			//- Fall back to SW Limit only if flag undefined
			if(err || (qp_static_rate != user_param->valid_hw_rate_limit)) {
				if(!user_param->is_rate_limit_type) {
					user_param->rate_limit_type = SW_RATE_LIMIT;
					fprintf(stderr, "\x1b[31mThe QP failed to accept HW rate limit, providing SW rate limit \x1b[0m\n");
				} else {
					fprintf(stderr, "\x1b[31mThe QP failed to accept HW rate limit \x1b[0m\n");
					return FAILURE;
				}

			}
		}

		if((user_param->use_xrc || user_param->connection_type == DC) && (user_param->duplex || user_param->tst == LAT))
			xrc_offset = user_param->num_of_qps / 2;

	}
	return SUCCESS;
}

/******************************************************************************
 *
 ******************************************************************************/
void ctx_set_send_wqes(struct pingpong_context *ctx,
		struct perftest_parameters *user_param,
		struct pingpong_dest *rem_dest)
{

	#ifdef HAVE_VERBS_EXP
	if (user_param->use_exp == 1) {
		ctx_set_send_exp_wqes(ctx,user_param,rem_dest);
	}
	else {
	#endif
		ctx_set_send_reg_wqes(ctx,user_param,rem_dest);
	#ifdef HAVE_VERBS_EXP
	}
	#endif
}

#ifdef HAVE_VERBS_EXP
/******************************************************************************
 *
 ******************************************************************************/
void ctx_set_send_exp_wqes(struct pingpong_context *ctx,
		struct perftest_parameters *user_param,
		struct pingpong_dest *rem_dest)
{
	int i,j;
	int num_of_qps = user_param->num_of_qps;
	int xrc_offset = 0;

	if((user_param->use_xrc || user_param->connection_type == DC) && (user_param->duplex || user_param->tst == LAT)) {
		num_of_qps /= 2;
		xrc_offset = num_of_qps;
	}

	for (i = 0; i < num_of_qps ; i++) {
		memset(&ctx->exp_wr[i*user_param->post_list],0,sizeof(struct ibv_exp_send_wr));
		ctx->sge_list[i*user_param->post_list].addr = (uintptr_t)ctx->buf[i];

		if (user_param->mac_fwd) {
			if (user_param->mr_per_qp) {
				ctx->sge_list[i*user_param->post_list].addr = 
					(uintptr_t)ctx->buf[0] + (num_of_qps + i)*BUFF_SIZE(ctx->size,ctx->cycle_buffer);
			} else {
				ctx->sge_list[i*user_param->post_list].addr = (uintptr_t)ctx->buf[i];
			}
		}

		if (user_param->verb == WRITE || user_param->verb == READ)
			ctx->exp_wr[i*user_param->post_list].wr.rdma.remote_addr = rem_dest[xrc_offset + i].vaddr;

		else if (user_param->verb == ATOMIC)
			ctx->exp_wr[i*user_param->post_list].wr.atomic.remote_addr = rem_dest[xrc_offset + i].vaddr;

		if (user_param->tst == BW || user_param->tst == LAT_BY_BW ) {

			ctx->scnt[i] = 0;
			ctx->ccnt[i] = 0;
			ctx->my_addr[i] = (uintptr_t)ctx->buf[i];
			if (user_param->verb != SEND)
				ctx->rem_addr[i] = rem_dest[xrc_offset + i].vaddr;
		}

		for (j = 0; j < user_param->post_list; j++) {

			ctx->sge_list[i*user_param->post_list + j].length =
				(user_param->connection_type == RawEth) ? (user_param->size - HW_CRC_ADDITION) : user_param->size;

			ctx->sge_list[i*user_param->post_list + j].lkey = ctx->mr[i]->lkey;

			if (j > 0) {

				ctx->sge_list[i*user_param->post_list +j].addr = ctx->sge_list[i*user_param->post_list + (j-1)].addr;

				if ((user_param->tst == BW || user_param->tst == LAT_BY_BW) && user_param->size <= (ctx->cycle_buffer / 2))
					increase_loc_addr(&ctx->sge_list[i*user_param->post_list +j],user_param->size,
							j-1,ctx->my_addr[i],0,ctx->cache_line_size,ctx->cycle_buffer);
			}

			ctx->exp_wr[i*user_param->post_list + j].sg_list = &ctx->sge_list[i*user_param->post_list + j];
			ctx->exp_wr[i*user_param->post_list + j].num_sge = MAX_SEND_SGE;
			ctx->exp_wr[i*user_param->post_list + j].wr_id = i;

			if (j == (user_param->post_list - 1)) {
				#ifdef HAVE_ACCL_VERBS
				if (user_param->verb_type == ACCL_INTF)
					ctx->exp_wr[i*user_param->post_list + j].exp_send_flags = IBV_EXP_QP_BURST_SIGNALED;
				else
				#endif
					ctx->exp_wr[i*user_param->post_list + j].exp_send_flags = IBV_EXP_SEND_SIGNALED;

				ctx->exp_wr[i*user_param->post_list + j].next = NULL;
			}

			else {
				ctx->exp_wr[i*user_param->post_list + j].next = &ctx->exp_wr[i*user_param->post_list+j+1];
				ctx->exp_wr[i*user_param->post_list + j].exp_send_flags = 0;
			}

			if (user_param->verb == ATOMIC) {
				ctx->exp_wr[i*user_param->post_list + j].exp_opcode = exp_opcode_atomic_array[user_param->atomicType];
			}
			else {
				ctx->exp_wr[i*user_param->post_list + j].exp_opcode = exp_opcode_verbs_array[user_param->verb];
			}
			if (user_param->verb == WRITE || user_param->verb == READ) {

				ctx->exp_wr[i*user_param->post_list + j].wr.rdma.rkey = rem_dest[xrc_offset + i].rkey;

				if (j > 0) {

					ctx->exp_wr[i*user_param->post_list + j].wr.rdma.remote_addr = ctx->exp_wr[i*user_param->post_list + (j-1)].wr.rdma.remote_addr;

					if ((user_param->tst == BW || user_param->tst == LAT_BY_BW) && user_param->size <= (ctx->cycle_buffer / 2))
						increase_exp_rem_addr(&ctx->exp_wr[i*user_param->post_list + j],user_param->size,
								j-1,ctx->rem_addr[i],WRITE,ctx->cache_line_size,ctx->cycle_buffer);
				}

			} else if (user_param->verb == ATOMIC) {

				ctx->exp_wr[i*user_param->post_list + j].wr.atomic.rkey = rem_dest[xrc_offset + i].rkey;

				if (j > 0) {

					ctx->exp_wr[i*user_param->post_list + j].wr.atomic.remote_addr = ctx->exp_wr[i*user_param->post_list + j-1].wr.atomic.remote_addr;
					if (user_param->tst == BW || user_param->tst == LAT_BY_BW)
						increase_exp_rem_addr(&ctx->exp_wr[i*user_param->post_list + j],user_param->size,
								j-1,ctx->rem_addr[i],ATOMIC,ctx->cache_line_size,ctx->cycle_buffer);
				}

				if (user_param->atomicType == FETCH_AND_ADD)
					ctx->exp_wr[i*user_param->post_list + j].wr.atomic.compare_add = ATOMIC_ADD_VALUE;

				else
					ctx->exp_wr[i*user_param->post_list + j].wr.atomic.swap = ATOMIC_SWAP_VALUE;


			} else if (user_param->verb == SEND) {

				if (user_param->connection_type == UD) {

					ctx->exp_wr[i*user_param->post_list + j].wr.ud.ah = ctx->ah[i];
					ctx->exp_wr[i*user_param->post_list + j].wr.ud.remote_qkey = DEF_QKEY;
					ctx->exp_wr[i*user_param->post_list + j].wr.ud.remote_qpn = rem_dest[xrc_offset + i].qpn;

				#ifdef HAVE_DC
				} else if (user_param->connection_type == DC) {
					ctx->exp_wr[i*user_param->post_list + j].dc.ah = ctx->ah[i];
					ctx->exp_wr[i*user_param->post_list + j].dc.dct_access_key = user_param->dct_key;
					ctx->exp_wr[i*user_param->post_list + j].dc.dct_number = rem_dest[xrc_offset + i].qpn;
				#endif
				}
			}

			#ifdef HAVE_DC
			if (user_param->connection_type == DC) {
				ctx->exp_wr[i*user_param->post_list + j].dc.ah = ctx->ah[i];
				ctx->exp_wr[i*user_param->post_list + j].dc.dct_access_key = user_param->dct_key;
				ctx->exp_wr[i*user_param->post_list + j].dc.dct_number = rem_dest[xrc_offset + i].qpn;
			}
			#endif


			if ((user_param->verb == SEND || user_param->verb == WRITE) && user_param->size <= user_param->inline_size)
				ctx->exp_wr[i*user_param->post_list + j].exp_send_flags |= IBV_EXP_SEND_INLINE;

			#ifdef HAVE_XRCD
			if (user_param->use_xrc)
				ctx->exp_wr[i*user_param->post_list + j].qp_type.xrc.remote_srqn = rem_dest[xrc_offset + i].srqn;
			#endif
		}
	}
}
#endif

/******************************************************************************
 *
 ******************************************************************************/
void ctx_set_send_reg_wqes(struct pingpong_context *ctx,
		struct perftest_parameters *user_param,
		struct pingpong_dest *rem_dest)
{
	int i,j;
	int num_of_qps = user_param->num_of_qps;
	int xrc_offset = 0;

	if((user_param->use_xrc || user_param->connection_type == DC) && (user_param->duplex || user_param->tst == LAT)) {
		num_of_qps /= 2;
		xrc_offset = num_of_qps;
	}

	for (i = 0; i < num_of_qps ; i++) {
		memset(&ctx->wr[i*user_param->post_list],0,sizeof(struct ibv_send_wr));
		ctx->sge_list[i*user_param->post_list].addr = (uintptr_t)ctx->buf[i];

		if (user_param->mac_fwd) {
			if (user_param->mr_per_qp) {
				ctx->sge_list[i*user_param->post_list].addr =
					(uintptr_t)ctx->buf[0] + (num_of_qps + i)*BUFF_SIZE(ctx->size,ctx->cycle_buffer);
			} else {
				ctx->sge_list[i*user_param->post_list].addr = (uintptr_t)ctx->buf[i];
			}
		}

		if (user_param->verb == WRITE || user_param->verb == READ)
			ctx->wr[i*user_param->post_list].wr.rdma.remote_addr = rem_dest[xrc_offset + i].vaddr;

		else if (user_param->verb == ATOMIC)
			ctx->wr[i*user_param->post_list].wr.atomic.remote_addr = rem_dest[xrc_offset + i].vaddr;

		if (user_param->tst == BW || user_param->tst == LAT_BY_BW) {

			ctx->scnt[i] = 0;
			ctx->ccnt[i] = 0;
			ctx->my_addr[i] = (uintptr_t)ctx->buf[i];
			if (user_param->verb != SEND)
				ctx->rem_addr[i] = rem_dest[xrc_offset + i].vaddr;
		}

		for (j = 0; j < user_param->post_list; j++) {

			ctx->sge_list[i*user_param->post_list + j].length =
				(user_param->connection_type == RawEth) ? (user_param->size - HW_CRC_ADDITION) : user_param->size;

			ctx->sge_list[i*user_param->post_list + j].lkey = ctx->mr[i]->lkey;

			if (j > 0) {

				ctx->sge_list[i*user_param->post_list +j].addr = ctx->sge_list[i*user_param->post_list + (j-1)].addr;

				if ((user_param->tst == BW || user_param->tst == LAT_BY_BW) && user_param->size <= (ctx->cycle_buffer / 2))
					increase_loc_addr(&ctx->sge_list[i*user_param->post_list +j],user_param->size,
							j-1,ctx->my_addr[i],0,ctx->cache_line_size,ctx->cycle_buffer);
			}

			ctx->wr[i*user_param->post_list + j].sg_list = &ctx->sge_list[i*user_param->post_list + j];
			ctx->wr[i*user_param->post_list + j].num_sge = MAX_SEND_SGE;
			ctx->wr[i*user_param->post_list + j].wr_id = i;

			if (j == (user_param->post_list - 1)) {
				ctx->wr[i*user_param->post_list + j].send_flags = IBV_SEND_SIGNALED;
				ctx->wr[i*user_param->post_list + j].next = NULL;
			}

			else {
				ctx->wr[i*user_param->post_list + j].next = &ctx->wr[i*user_param->post_list+j+1];
				ctx->wr[i*user_param->post_list + j].send_flags = 0;
			}

			if (user_param->verb == ATOMIC) {
				ctx->wr[i*user_param->post_list + j].opcode = opcode_atomic_array[user_param->atomicType];
			}
			else {
				ctx->wr[i*user_param->post_list + j].opcode = opcode_verbs_array[user_param->verb];
			}
			if (user_param->verb == WRITE || user_param->verb == READ) {

				ctx->wr[i*user_param->post_list + j].wr.rdma.rkey = rem_dest[xrc_offset + i].rkey;

				if (j > 0) {

					ctx->wr[i*user_param->post_list + j].wr.rdma.remote_addr =
						ctx->wr[i*user_param->post_list + (j-1)].wr.rdma.remote_addr;

					if ((user_param->tst == BW || user_param->tst == LAT_BY_BW ) && user_param->size <= (ctx->cycle_buffer / 2))
						increase_rem_addr(&ctx->wr[i*user_param->post_list + j],user_param->size,
								j-1,ctx->rem_addr[i],WRITE,ctx->cache_line_size,ctx->cycle_buffer);
				}

			} else if (user_param->verb == ATOMIC) {

				ctx->wr[i*user_param->post_list + j].wr.atomic.rkey = rem_dest[xrc_offset + i].rkey;

				if (j > 0) {

					ctx->wr[i*user_param->post_list + j].wr.atomic.remote_addr =
						ctx->wr[i*user_param->post_list + j-1].wr.atomic.remote_addr;
					if (user_param->tst == BW || user_param->tst == LAT_BY_BW)
						increase_rem_addr(&ctx->wr[i*user_param->post_list + j],user_param->size,
								j-1,ctx->rem_addr[i],ATOMIC,ctx->cache_line_size,ctx->cycle_buffer);
				}

				if (user_param->atomicType == FETCH_AND_ADD)
					ctx->wr[i*user_param->post_list + j].wr.atomic.compare_add = ATOMIC_ADD_VALUE;

				else
					ctx->wr[i*user_param->post_list + j].wr.atomic.swap = ATOMIC_SWAP_VALUE;


			} else if (user_param->verb == SEND) {

				if (user_param->connection_type == UD) {

					ctx->wr[i*user_param->post_list + j].wr.ud.ah = ctx->ah[i];
					ctx->wr[i*user_param->post_list + j].wr.ud.remote_qkey = DEF_QKEY;
					ctx->wr[i*user_param->post_list + j].wr.ud.remote_qpn = rem_dest[xrc_offset + i].qpn;
				}
			}

			if ((user_param->verb == SEND || user_param->verb == WRITE) && user_param->size <= user_param->inline_size)
				ctx->wr[i*user_param->post_list + j].send_flags |= IBV_SEND_INLINE;

			#ifdef HAVE_XRCD
			if (user_param->use_xrc)
				ctx->wr[i*user_param->post_list + j].qp_type.xrc.remote_srqn = rem_dest[xrc_offset + i].srqn;
			#endif
		}
	}
}

/******************************************************************************
 *
 ******************************************************************************/
int ctx_set_recv_wqes(struct pingpong_context *ctx,struct perftest_parameters *user_param)
{
	int i = 0,j,k;
	int num_of_qps = user_param->num_of_qps;
	struct ibv_recv_wr *bad_wr_recv;
	int size_per_qp = user_param->rx_depth;

	if((user_param->use_xrc || user_param->connection_type == DC) &&
				(user_param->duplex || user_param->tst == LAT)) {

		i = user_param->num_of_qps / 2;
		num_of_qps /= 2;
	}

	if (user_param->use_srq)
		size_per_qp /= user_param->num_of_qps;

	if (user_param->use_rss) {
		i = 1;
		num_of_qps = 1;
	}
	for (k = 0; i < user_param->num_of_qps; i++,k++) {
		if (!user_param->mr_per_qp) {
			ctx->recv_sge_list[i].addr = (uintptr_t)ctx->buf[0] +
				(num_of_qps + k) * ctx->send_qp_buff_size;
		} else {
			ctx->recv_sge_list[i].addr = (uintptr_t)ctx->buf[i];
		}

		if (user_param->connection_type == UD)
			ctx->recv_sge_list[i].addr += (ctx->cache_line_size - UD_ADDITION);

		ctx->recv_sge_list[i].length = SIZE(user_param->connection_type,user_param->size,1);
		ctx->recv_sge_list[i].lkey = ctx->mr[i]->lkey;

		ctx->rwr[i].sg_list = &ctx->recv_sge_list[i];
		ctx->rwr[i].wr_id = i;
		ctx->rwr[i].next = NULL;
		ctx->rwr[i].num_sge = MAX_RECV_SGE;

		ctx->rx_buffer_addr[i] = ctx->recv_sge_list[i].addr;

		for (j = 0; j < size_per_qp ; ++j) {

			if (user_param->use_srq) {

				if (ibv_post_srq_recv(ctx->srq,&ctx->rwr[i], &bad_wr_recv)) {
					fprintf(stderr, "Couldn't post recv SRQ = %d: counter=%d\n",i,j);
					return 1;
				}

			} else {

				if (ibv_post_recv(ctx->qp[i],&ctx->rwr[i],&bad_wr_recv)) {
					fprintf(stderr, "Couldn't post recv Qp = %d: counter=%d\n",i,j);
					return 1;
				}
			}

			if ((user_param->tst == BW || user_param->tst == LAT_BY_BW) && user_param->size <= (ctx->cycle_buffer / 2)) {

				increase_loc_addr(&ctx->recv_sge_list[i],
						user_param->size,
						j,
						ctx->rx_buffer_addr[i],
						user_param->connection_type,ctx->cache_line_size,ctx->cycle_buffer);
			}
		}
		ctx->recv_sge_list[i].addr = ctx->rx_buffer_addr[i];
	}
	return 0;
}

int ctx_alloc_credit(struct pingpong_context *ctx,
		struct perftest_parameters *user_param,
		struct pingpong_dest *my_dest)
{
	int buf_size = 2*user_param->num_of_qps*sizeof(uint32_t);
	int flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE;
	int i;

	ALLOCATE(ctx->ctrl_buf,uint32_t,user_param->num_of_qps);
	memset(&ctx->ctrl_buf[0],0,buf_size);

	ctx->credit_buf = (uint32_t *)ctx->ctrl_buf + user_param->num_of_qps;
	ctx->credit_cnt = user_param->rx_depth/3;

	ctx->credit_mr = ibv_reg_mr(ctx->pd,ctx->ctrl_buf,buf_size,flags);
	if (!ctx->credit_mr) {
		fprintf(stderr, "Couldn't allocate MR\n");
		return FAILURE;
	}
	for (i = 0; i < user_param->num_of_qps; i++) {
		my_dest[i].rkey = ctx->credit_mr->rkey;
		my_dest[i].vaddr = (uintptr_t)ctx->credit_buf + i*sizeof(uint32_t);
	}
	return 0;
}

/* Should be called after the remote keys have been exchanged */
int ctx_set_credit_wqes(struct pingpong_context *ctx,
		struct perftest_parameters *user_param,
		struct pingpong_dest *rem_dest)
{
	int i;
	ALLOCATE(ctx->ctrl_wr,struct ibv_send_wr,user_param->num_of_qps);
	ALLOCATE(ctx->ctrl_sge_list,struct ibv_sge,user_param->num_of_qps);

	for (i = 0; i < user_param->num_of_qps; i++) {
		memset(&ctx->ctrl_wr[i],0,sizeof(struct ibv_send_wr));

		ctx->ctrl_sge_list[i].addr = (uintptr_t)ctx->ctrl_buf + (i*sizeof(uint32_t));
		ctx->ctrl_sge_list[i].length = sizeof(uint32_t);
		ctx->ctrl_sge_list[i].lkey = ctx->credit_mr->lkey;

		ctx->ctrl_wr[i].opcode = IBV_WR_RDMA_WRITE;
		ctx->ctrl_wr[i].sg_list = &ctx->ctrl_sge_list[i];
		ctx->ctrl_wr[i].num_sge = 1;
		ctx->ctrl_wr[i].wr_id = i;
		ctx->ctrl_wr[i].send_flags = IBV_SEND_SIGNALED;
		ctx->ctrl_wr[i].next = NULL;

		ctx->ctrl_wr[i].wr.rdma.remote_addr = rem_dest[i].vaddr;
		ctx->ctrl_wr[i].wr.rdma.rkey = rem_dest[i].rkey;
	}
	return 0;
}

static int clean_scq_credit(int send_cnt,struct pingpong_context *ctx,struct perftest_parameters *user_param)
{
	int i= 0, sne = 0;
	struct ibv_wc *swc = NULL;
	int return_value = 0;
	if (!send_cnt)
		return 0;

	ALLOCATE(swc,struct ibv_wc,user_param->tx_depth);
	do {
		sne = ibv_poll_cq(ctx->send_cq,user_param->tx_depth,swc);
		if (sne > 0) {
			for (i = 0; i < sne; i++) {
				if (swc[i].status != IBV_WC_SUCCESS) {
					fprintf(stderr, "Poll send CQ error status=%u qp %d\n",
							swc[i].status,(int)swc[i].wr_id);
					return_value = FAILURE;
					goto cleaning;
				}
				send_cnt--;
			}

		} else if (sne < 0) {
			fprintf(stderr, "Poll send CQ to clean credit failed ne=%d\n",sne);
			return_value = FAILURE;
			goto cleaning;
		}
	} while(send_cnt > 0);

cleaning:
	free(swc);
	return return_value;
}

/******************************************************************************
 *
 ******************************************************************************/
int perform_warm_up(struct pingpong_context *ctx,struct perftest_parameters *user_param)
{
	int ne,index,warmindex,warmupsession;
	int err = 0;
	#ifdef HAVE_VERBS_EXP
	struct ibv_exp_send_wr *bad_exp_wr = NULL;
	#endif
	struct ibv_send_wr *bad_wr = NULL;
	struct ibv_wc wc;
	struct ibv_wc *wc_for_cleaning = NULL;
	int num_of_qps = user_param->num_of_qps;
	int return_value = 0;

	if(user_param->duplex && (user_param->use_xrc || user_param->connection_type == DC))
		num_of_qps /= 2;

	warmupsession = (user_param->post_list == 1) ? user_param->tx_depth : user_param->post_list;
	ALLOCATE(wc_for_cleaning,struct ibv_wc,user_param->tx_depth);

	/* Clean up the pipe */
	ne = ibv_poll_cq(ctx->send_cq,user_param->tx_depth,wc_for_cleaning);

	for (index=0 ; index < num_of_qps ; index++) {
		for (warmindex = 0 ;warmindex < warmupsession ;warmindex += user_param->post_list) {
			#ifdef HAVE_VERBS_EXP
			if (user_param->use_exp == 1)
				err = (ctx->exp_post_send_func_pointer)(ctx->qp[index],
					&ctx->exp_wr[index*user_param->post_list], &bad_exp_wr);
			else
				err = (ctx->post_send_func_pointer)(ctx->qp[index],&ctx->wr[index*user_param->post_list],&bad_wr);
			#else
			err = ibv_post_send(ctx->qp[index],&ctx->wr[index*user_param->post_list],&bad_wr);
			#endif

			if (err) {
				fprintf(stderr,"Couldn't post send during warm up: qp %d scnt=%d \n",index,warmindex);
				return_value = FAILURE;
				goto cleaning;
			}
		}

		do {
			ne = ibv_poll_cq(ctx->send_cq,1,&wc);
			if (ne > 0) {
				if (wc.status != IBV_WC_SUCCESS) {
					return_value = FAILURE;
					goto cleaning;
				}
				warmindex -= user_param->post_list;
			} else if (ne < 0) {
				return_value = FAILURE;
				goto cleaning;
			}
		} while (warmindex);
	}

cleaning:
	free(wc_for_cleaning);
	return return_value;
}

/******************************************************************************
 *
 ******************************************************************************/
int run_iter_bw(struct pingpong_context *ctx,struct perftest_parameters *user_param)
{
	uint64_t totscnt = 0;
	uint64_t totccnt = 0;
	int i = 0;
	int index,ne;
	uint64_t tot_iters;
	int err = 0;
	#ifdef HAVE_VERBS_EXP
	struct ibv_exp_send_wr *bad_exp_wr = NULL;
	#ifdef HAVE_ACCL_VERBS
	int pl_index;
	struct ibv_sge *sg_l;
	#endif
	#endif
	struct ibv_send_wr *bad_wr = NULL;
	struct ibv_wc *wc = NULL;
	int num_of_qps = user_param->num_of_qps;
	/* Rate Limiter*/
	int rate_limit_pps = 0;
	double gap_time = 0; /* in usec */
	cycles_t gap_cycles = 0; /* in cycles */
	cycles_t gap_deadline = 0;
	unsigned int number_of_bursts = 0;
	int burst_iter = 0;
	int is_sending_burst = 0;
	int cpu_mhz = 0;
	int return_value = 0;
	int wc_id;
	int send_flows_index = 0;
	uintptr_t primary_send_addr = ctx->sge_list[0].addr;
	int address_offset = 0;
	int flows_burst_iter = 0;

	ALLOCATE(wc ,struct ibv_wc ,CTX_POLL_BATCH);

	/* if (user_param->test_type == DURATION) { */
		/* duration_param=user_param; */
		/* duration_param->state = START_STATE; */
		/* signal(SIGALRM, catch_alarm); */
		/* if (user_param->margin > 0 ) */
			/* alarm(user_param->margin); */
		/* else */
			/* catch_alarm(0); [> move to next state <] */

		/* user_param->iters = 0; */
	/* } */

	if (user_param->duplex && (user_param->use_xrc || user_param->connection_type == DC))
		num_of_qps /= 2;

	/* Will be 0, in case of Duration (look at force_dependencies or in the exp above). */
	tot_iters = (uint64_t)user_param->iters*num_of_qps;

	if (user_param->test_type == DURATION && user_param->state != START_STATE && user_param->margin > 0) {
		fprintf(stderr, "Failed: margin is not long enough (taking samples before warmup ends)\n");
		fprintf(stderr, "Please increase margin or decrease tx_depth\n");
		return_value = FAILURE;
		goto cleaning;
	}

	if (user_param->test_type == ITERATIONS && user_param->noPeak == ON)
		user_param->tposted[0] = get_cycles();

	/* If using rate limiter, calculate gap time between bursts */
	if (user_param->rate_limit_type == SW_RATE_LIMIT ) {
		/* Calculate rate limit in pps */
		switch (user_param->rate_units) {
			case MEGA_BYTE_PS:
				rate_limit_pps = ((double)(user_param->rate_limit) / user_param->size) * 1048576;
				break;
			case GIGA_BIT_PS:
				rate_limit_pps = ((double)(user_param->rate_limit) / (user_param->size * 8)) * 1000000000;
				break;
			case PACKET_PS:
				rate_limit_pps = user_param->rate_limit;
				break;
			default:
				fprintf(stderr, " Failed: Unknown rate limit units\n");
				return_value = FAILURE;
				goto cleaning;
		}
		cpu_mhz = get_cpu_mhz(user_param->cpu_freq_f);
		if (cpu_mhz <= 0) {
			fprintf(stderr, "Failed: couldn't acquire cpu frequency for rate limiter.\n");
		}
		number_of_bursts = rate_limit_pps / user_param->burst_size;
		gap_time = 1000000 * (1.0 / number_of_bursts);
		gap_cycles = cpu_mhz * gap_time;
	}

	/* main loop for posting */
	while (totscnt < tot_iters || totccnt < tot_iters ||
		(user_param->test_type == DURATION && user_param->state != END_STATE) ) {

		/* main loop to run over all the qps and post each time n messages */
		for (index =0 ; index < num_of_qps ; index++) {
			if (user_param->rate_limit_type == SW_RATE_LIMIT && is_sending_burst == 0) {
				if (gap_deadline > get_cycles()) {
					/* Go right to cq polling until gap time is over. */
					continue;
				}
				gap_deadline = get_cycles() + gap_cycles;
				is_sending_burst = 1;
				burst_iter = 0;
			}

			while ((ctx->scnt[index] < user_param->iters || user_param->test_type == DURATION) && (ctx->scnt[index] - ctx->ccnt[index]) < (user_param->tx_depth) &&
					!((user_param->rate_limit_type == SW_RATE_LIMIT ) && is_sending_burst == 0)) {

				if (ctx->send_rcredit) {
					uint32_t swindow = ctx->scnt[index] + user_param->post_list - ctx->credit_buf[index];
					if (swindow >= user_param->rx_depth)
						break;
				}
				if (user_param->post_list == 1 && (ctx->scnt[index] % user_param->cq_mod == 0 && user_param->cq_mod > 1)
					&& !(ctx->scnt[index] == (user_param->iters - 1) && user_param->test_type == ITERATIONS)) {

					#ifdef HAVE_VERBS_EXP
					#ifdef HAVE_ACCL_VERBS
					if (user_param->verb_type == ACCL_INTF)
						ctx->exp_wr[index].exp_send_flags &= ~IBV_EXP_QP_BURST_SIGNALED;
					else {
					#endif
						if (user_param->use_exp == 1)
							ctx->exp_wr[index].exp_send_flags &= ~IBV_EXP_SEND_SIGNALED;
						else
					#endif
							ctx->wr[index].send_flags &= ~IBV_SEND_SIGNALED;
					#ifdef HAVE_ACCL_VERBS
					}
					#endif
				}

				if (user_param->noPeak == OFF)
					user_param->tposted[totscnt] = get_cycles();

				if (user_param->test_type == DURATION && user_param->state == END_STATE)
					break;

				#ifdef HAVE_VERBS_EXP
				#ifdef HAVE_ACCL_VERBS
				if (user_param->verb_type == ACCL_INTF) {
					for (pl_index = 0; pl_index < user_param->post_list; pl_index++) {
						sg_l = ctx->exp_wr[index*user_param->post_list + pl_index].sg_list;
						ctx->qp_burst_family[index]->send_pending(ctx->qp[index], sg_l->addr, sg_l->length, sg_l->lkey,
											ctx->exp_wr[index*user_param->post_list + pl_index].exp_send_flags);
					}
					ctx->qp_burst_family[index]->send_flush(ctx->qp[index]);
				} else {
				#endif
					if (user_param->use_exp == 1) {
						err = (ctx->exp_post_send_func_pointer)(ctx->qp[index],
						&ctx->exp_wr[index*user_param->post_list],&bad_exp_wr);
					}
					else {
						err = (ctx->post_send_func_pointer)(ctx->qp[index],
							&ctx->wr[index*user_param->post_list],&bad_wr);
					}
				#ifdef HAVE_ACCL_VERBS
				}
				#endif
				#else
				err = ibv_post_send(ctx->qp[index],&ctx->wr[index*user_param->post_list],&bad_wr);
				#endif
				if (err) {
					fprintf(stderr,"Couldn't post send: qp %d scnt=%lu \n",index,ctx->scnt[index]);
					return_value = FAILURE;
					goto cleaning;
				}
				/* if we have more than single flow and the burst iter is the last one */
				if (user_param->flows != DEF_FLOWS) {
					if (++flows_burst_iter == user_param->flows_burst) {
						flows_burst_iter = 0;
						/* inc the send_flows_index and update the address */
						if (++send_flows_index == user_param->flows)
							send_flows_index = 0;
						address_offset = send_flows_index * ctx->flow_buff_size;
						ctx->sge_list[0].addr = primary_send_addr + address_offset;
					}
				}

				/* in multiple flow scenarios we will go to next cycle buffer address in the main buffer*/
				if (user_param->post_list == 1 && user_param->size <= (ctx->cycle_buffer / 2)) {
					#ifdef HAVE_VERBS_EXP
					if (user_param->use_exp == 1)
						increase_loc_addr(ctx->exp_wr[index].sg_list,user_param->size,
								ctx->scnt[index], ctx->my_addr[index] + address_offset, 0,
									ctx->cache_line_size, ctx->cycle_buffer);
					else
					#endif
						increase_loc_addr(ctx->wr[index].sg_list,user_param->size, ctx->scnt[index],
								ctx->my_addr[index] + address_offset , 0, ctx->cache_line_size,
								ctx->cycle_buffer);

					if (user_param->verb != SEND) {
						#ifdef HAVE_VERBS_EXP
						if (user_param->use_exp == 1)
							increase_exp_rem_addr(&ctx->exp_wr[index], user_param->size,
									ctx->scnt[index], ctx->rem_addr[index], user_param->verb,
									ctx->cache_line_size, ctx->cycle_buffer);
						else
						#endif
							increase_rem_addr(&ctx->wr[index], user_param->size,
									ctx->scnt[index], ctx->rem_addr[index], user_param->verb,
									ctx->cache_line_size, ctx->cycle_buffer);
					}
				}

				ctx->scnt[index] += user_param->post_list;
				totscnt += user_param->post_list;
				/* ask for completion on this wr */
				if (user_param->post_list == 1 &&
						(ctx->scnt[index]%user_param->cq_mod == user_param->cq_mod - 1 ||
							(user_param->test_type == ITERATIONS && ctx->scnt[index] == user_param->iters - 1))) {
					#ifdef HAVE_VERBS_EXP
					#ifdef HAVE_ACCL_VERBS
					if (user_param->verb_type == ACCL_INTF)
						ctx->exp_wr[index].exp_send_flags |= IBV_EXP_QP_BURST_SIGNALED;
					else {
					#endif
						if (user_param->use_exp == 1)
							ctx->exp_wr[index].exp_send_flags |= IBV_EXP_SEND_SIGNALED;
						else
					#endif
							ctx->wr[index].send_flags |= IBV_SEND_SIGNALED;
					#ifdef HAVE_ACCL_VERBS
					}
					#endif
				}

				/* Check if a full burst was sent. */
				if (user_param->rate_limit_type == SW_RATE_LIMIT) {
					burst_iter += user_param->post_list;
					if (burst_iter >= user_param->burst_size) {
						is_sending_burst = 0;
					}
				}
			}
		}
		if (totccnt < tot_iters || (user_param->test_type == DURATION && totccnt < totscnt)) {
				if (user_param->use_event) {
					if (ctx_notify_events(ctx->channel)) {
						fprintf(stderr, "Couldn't request CQ notification\n");
						return_value = FAILURE;
						goto cleaning;
					}
				}

				#ifdef HAVE_ACCL_VERBS
				if (user_param->verb_type == ACCL_INTF)
					ne = ctx->send_cq_family->poll_cnt(ctx->send_cq, CTX_POLL_BATCH);
				else
				#endif
					ne = ibv_poll_cq(ctx->send_cq,CTX_POLL_BATCH,wc);

				if (ne > 0) {
					for (i = 0; i < ne; i++) {
						wc_id = (user_param->verb_type == ACCL_INTF) ?
							0 : (int)wc[i].wr_id;

						if (user_param->verb_type != ACCL_INTF) {
							if (wc[i].status != IBV_WC_SUCCESS) {
								NOTIFY_COMP_ERROR_SEND(wc[i],totscnt,totccnt);
								return_value = FAILURE;
								goto cleaning;
							}
						}

						ctx->ccnt[wc_id] += user_param->cq_mod;
						totccnt += user_param->cq_mod;
						if (user_param->noPeak == OFF) {

							if (totccnt >= tot_iters - 1)
								user_param->tcompleted[user_param->iters*num_of_qps - 1] = get_cycles();
							else
								user_param->tcompleted[totccnt-1] = get_cycles();
						}

						if (user_param->test_type==DURATION && user_param->state == SAMPLE_STATE) {
							if (user_param->report_per_port) {
								user_param->iters_per_port[user_param->port_by_qp[wc_id]] += user_param->cq_mod;
							}
							user_param->iters += user_param->cq_mod;
						}
					}

				} else if (ne < 0) {
					fprintf(stderr, "poll CQ failed %d\n",ne);
					return_value = FAILURE;
					goto cleaning;
					}
		}
	}
	if (user_param->noPeak == ON && user_param->test_type == ITERATIONS)
		user_param->tcompleted[0] = get_cycles();

cleaning:

	free(wc);
	return return_value;
}

/******************************************************************************
 *
 ******************************************************************************/
static inline void set_on_first_rx_packet(struct perftest_parameters *user_param)
{
	/* if (user_param->test_type == DURATION) { */

		/* duration_param=user_param; */
		/* user_param->iters=0; */
		/* duration_param->state = START_STATE; */
		/* signal(SIGALRM, catch_alarm); */
		/* if (user_param->margin > 0) */
			/* alarm(user_param->margin); */
		/* else */
			/* catch_alarm(0); */

	/* } else if (user_param->tst == BW) { */

	if (user_param->tst == BW) {
		user_param->tposted[0] = get_cycles();
	}
}

/******************************************************************************
 *
 ******************************************************************************/
int run_iter_bw_server(struct pingpong_context *ctx, struct perftest_parameters *user_param)
{
	uint64_t rcnt = 0;
	int ne = 0;
	int i;
	uint64_t tot_iters;
	uint64_t *rcnt_for_qp = NULL;
	struct ibv_wc *wc = NULL;
	struct ibv_recv_wr *bad_wr_recv = NULL;
	struct ibv_wc *swc = NULL;
	long *scredit_for_qp = NULL;
	int tot_scredit = 0;
	int firstRx = 1;
	int size_per_qp = (user_param->use_srq) ?
					user_param->rx_depth/user_param->num_of_qps : user_param->rx_depth;
	int return_value = 0;
	int wc_id;
	int recv_flows_index = 0;
	uintptr_t primary_recv_addr = ctx->recv_sge_list[0].addr;
	int recv_flows_burst = 0;
	int address_flows_offset =0;

	ALLOCATE(wc ,struct ibv_wc ,CTX_POLL_BATCH);
	ALLOCATE(swc ,struct ibv_wc ,user_param->tx_depth);

	ALLOCATE(rcnt_for_qp,uint64_t,user_param->num_of_qps);
	memset(rcnt_for_qp,0,sizeof(uint64_t)*user_param->num_of_qps);

	ALLOCATE(scredit_for_qp,long,user_param->num_of_qps);
	memset(scredit_for_qp,0,sizeof(long)*user_param->num_of_qps);

	if (user_param->use_rss)
		tot_iters = (uint64_t)user_param->iters*(user_param->num_of_qps-1);
	else
		tot_iters = (uint64_t)user_param->iters*user_param->num_of_qps;

	if (user_param->test_type == ITERATIONS) {
		check_alive_data.is_events = user_param->use_event;
		signal(SIGALRM, check_alive);
		alarm(60); // TODO
	}

	check_alive_data.g_total_iters = tot_iters;

	while (rcnt < tot_iters || (user_param->test_type == DURATION && user_param->state != END_STATE)) {

		if (user_param->use_event) {
			if (ctx_notify_events(ctx->channel)) {
				fprintf(stderr ," Failed to notify events to CQ");
				return_value = FAILURE;
				goto cleaning;
			}
		}

		do {
			if (user_param->test_type == DURATION && user_param->state == END_STATE)
				break;

			#ifdef HAVE_ACCL_VERBS
			if (user_param->verb_type == ACCL_INTF)
				ne = ctx->recv_cq_family->poll_cnt(ctx->recv_cq, CTX_POLL_BATCH);
			else {
			#endif
				if (user_param->connection_type == DC)
					ne = ibv_poll_cq(ctx->send_cq,CTX_POLL_BATCH,wc);
				else
					ne = ibv_poll_cq(ctx->recv_cq,CTX_POLL_BATCH,wc);
			#ifdef HAVE_ACCL_VERBS
			}
			#endif

			if (ne > 0) {
				if (firstRx) {
					set_on_first_rx_packet(user_param);
					firstRx = 0;
				}

				for (i = 0; i < ne; i++) {
					wc_id = (user_param->verb_type == ACCL_INTF) ?
						0 : (int)wc[i].wr_id;

					if (user_param->verb_type != ACCL_INTF) {
						if (wc[i].status != IBV_WC_SUCCESS) {

							NOTIFY_COMP_ERROR_RECV(wc[i],rcnt_for_qp[wc_id]);
							return_value = FAILURE;
							goto cleaning;
						}
					}
					rcnt_for_qp[wc_id]++;
					rcnt++;
					check_alive_data.current_totrcnt = rcnt;

					if (user_param->test_type==DURATION && user_param->state == SAMPLE_STATE) {
						if (user_param->report_per_port) {
							user_param->iters_per_port[user_param->port_by_qp[wc_id]]++;
						}
						user_param->iters++;
					}
					if (user_param->test_type==DURATION || rcnt_for_qp[wc_id] + size_per_qp <= user_param->iters) {
						#ifdef HAVE_ACCL_VERBS
						if (user_param->verb_type == ACCL_INTF) {
							if (ctx->qp_burst_family[wc_id]->recv_burst(ctx->qp[wc_id], ctx->rwr[wc_id].sg_list, 1)) {
								fprintf(stderr, "Couldn't post recv burst (accelerated verbs).\n");
								return_value = FAILURE;
								goto cleaning;
							}
						} else {
						#endif
							if (user_param->use_srq) {
								if (ibv_post_srq_recv(ctx->srq, &ctx->rwr[wc_id],&bad_wr_recv)) {
									fprintf(stderr, "Couldn't post recv SRQ. QP = %d: counter=%lu\n", wc_id,rcnt);
									return_value = FAILURE;
									goto cleaning;
								}

							} else {
								if (ibv_post_recv(ctx->qp[wc_id],&ctx->rwr[wc_id],&bad_wr_recv)) {
									fprintf(stderr, "Couldn't post recv Qp=%d rcnt=%ld\n",wc_id,rcnt_for_qp[wc_id]);
									return_value = 15;
									goto cleaning;
								}

							}
							if (user_param->flows != DEF_FLOWS) {
								if (++recv_flows_burst == user_param->flows_burst) {
									recv_flows_burst = 0;
									if (++recv_flows_index == user_param->flows)
										recv_flows_index = 0;
									address_flows_offset = recv_flows_index * ctx->cycle_buffer;
									ctx->recv_sge_list[0].addr = primary_recv_addr + address_flows_offset;
								}
							}
						#ifdef HAVE_ACCL_VERBS
						}
						#endif
						if (SIZE(user_param->connection_type,user_param->size,!(int)user_param->machine) <= (ctx->cycle_buffer / 2)) {
							increase_loc_addr(ctx->rwr[wc_id].sg_list,
									user_param->size,
									rcnt_for_qp[wc_id] + size_per_qp,
									ctx->rx_buffer_addr[wc_id] + address_flows_offset,
									user_param->connection_type,ctx->cache_line_size,ctx->cycle_buffer);
						}
					}

					if (ctx->send_rcredit) {
						int credit_cnt = rcnt_for_qp[wc_id]%user_param->rx_depth;

						if (credit_cnt%ctx->credit_cnt == 0) {
							struct ibv_send_wr *bad_wr = NULL;
							int sne = 0, j = 0;
							ctx->ctrl_buf[wc_id] = rcnt_for_qp[wc_id];

							while (scredit_for_qp[wc_id] == user_param->tx_depth) {
								sne = ibv_poll_cq(ctx->send_cq,user_param->tx_depth,swc);
								if (sne > 0) {
									for (j = 0; j < sne; j++) {
										if (swc[j].status != IBV_WC_SUCCESS) {
											fprintf(stderr, "Poll send CQ error status=%u qp %d credit=%lu scredit=%lu\n",
													swc[j].status,(int)swc[j].wr_id,
													rcnt_for_qp[swc[j].wr_id],scredit_for_qp[swc[j].wr_id]);
											return_value = FAILURE;
											goto cleaning;
										}
										scredit_for_qp[swc[j].wr_id]--;
										tot_scredit--;
									}
								} else if (sne < 0) {
									fprintf(stderr, "Poll send CQ failed ne=%d\n",sne);
									return_value = FAILURE;
									goto cleaning;
								}
							}
							if (ibv_post_send(ctx->qp[wc_id],&ctx->ctrl_wr[wc_id],&bad_wr)) {
								fprintf(stderr,"Couldn't post send qp %d credit = %lu\n",
										wc_id,rcnt_for_qp[wc_id]);
								return_value = FAILURE;
								goto cleaning;
							}
							scredit_for_qp[wc_id]++;
							tot_scredit++;
						}
					}
				}
			}

		} while (ne > 0);

		if (ne < 0) {
			fprintf(stderr, "Poll Receive CQ failed %d\n", ne);
			return_value = FAILURE;
			goto cleaning;
		}
		else if (ne == 0) {
			if (check_alive_data.to_exit) {
				user_param->check_alive_exited = 1;
				return_value = FAILURE;
				goto cleaning;
			}
		}

	}
	if (user_param->test_type == ITERATIONS)
		user_param->tcompleted[0] = get_cycles();

cleaning:
	if (ctx->send_rcredit) {
		if (clean_scq_credit(tot_scredit, ctx, user_param))
			return_value = FAILURE;
	}

	check_alive_data.last_totrcnt=0;
	free(wc);
	free(rcnt_for_qp);
	free(swc);
	free(scredit_for_qp);

	return return_value;
}
/******************************************************************************
 *
 ******************************************************************************/
/* int run_iter_bw_infinitely(struct pingpong_context *ctx,struct perftest_parameters *user_param) */
/* { */
	/* uint64_t totscnt = 0; */
	/* uint64_t totccnt = 0; */
	/* int i = 0; */
	/* int index = 0,ne; */
	/* int err = 0; */
	/* int wc_id; */
	/* #ifdef HAVE_VERBS_EXP */
	/* struct ibv_exp_send_wr *bad_exp_wr = NULL; */
	/* #endif */
	/* uint64_t *scnt_for_qp = NULL; */
	/* struct ibv_send_wr *bad_wr = NULL; */
	/* struct ibv_wc *wc = NULL; */
	/* int num_of_qps = user_param->num_of_qps; */
	/* int return_value = 0; */
	/* int single_thread_handler; */

	/* ALLOCATE(wc ,struct ibv_wc ,CTX_POLL_BATCH); */
	/* ALLOCATE(scnt_for_qp,uint64_t,user_param->num_of_qps); */
	/* memset(scnt_for_qp,0,sizeof(uint64_t)*user_param->num_of_qps); */

	/* duration_param=user_param; */
	/* sigset_t set; */
	/* sigemptyset(&set); */
	/* sigaddset(&set, SIGALRM); */
	/* single_thread_handler = pthread_sigmask(SIG_BLOCK, &set, NULL); */
	/* if (single_thread_handler != 0){ */
		/* printf("error when try to mask alram for signal to thread\n"); */
		/* return FAILURE; */
	/* } */

	/* pthread_t print_thread; */
	/* if (pthread_create(&print_thread, NULL, &handle_signal_print_thread,(void *)&set) != 0){ */
		/* printf("Fail to create thread \n"); */
		/* return FAILURE; */
	/* } */

	/* alarm(user_param->duration); */
	/* user_param->iters = 0; */

	/* [> Will be 0, in case of Duration (look at force_dependencies or in the exp above) <] */
	/* if (user_param->duplex && (user_param->use_xrc || user_param->connection_type == DC)) */
		/* num_of_qps /= 2; */

	/* user_param->tposted[0] = get_cycles(); */

	/* [> main loop for posting <] */
	/* while (1) { */
	/* [> main loop to run over all the qps and post each time n messages <] */
		/* for (index =0 ; index < num_of_qps ; index++) { */

			/* while ((ctx->scnt[index] - ctx->ccnt[index]) < user_param->tx_depth) { */
				/* if (ctx->send_rcredit) { */
					/* uint32_t swindow = scnt_for_qp[index] + user_param->post_list - ctx->credit_buf[index]; */
					/* if (swindow >= user_param->rx_depth) */
						/* break; */
				/* } */

				/* if (user_param->post_list == 1 && (ctx->scnt[index] % user_param->cq_mod == 0 && user_param->cq_mod > 1)) { */

					/* #ifdef HAVE_VERBS_EXP */
					/* #ifdef HAVE_ACCL_VERBS */
					/* if (user_param->verb_type == ACCL_INTF) */
						/* ctx->exp_wr[index].exp_send_flags &= ~IBV_EXP_QP_BURST_SIGNALED; */
					/* else { */
					/* #endif */
						/* if (user_param->use_exp == 1) */
							/* ctx->exp_wr[index].exp_send_flags &= ~IBV_EXP_SEND_SIGNALED; */
						/* else */
					/* #endif */
							/* ctx->wr[index].send_flags &= ~IBV_SEND_SIGNALED; */
					/* #ifdef HAVE_ACCL_VERBS */
					/* } */
					/* #endif */
				/* } */

				/* #ifdef HAVE_VERBS_EXP */
				/* if (user_param->use_exp == 1) */
					/* err = (ctx->exp_post_send_func_pointer)(ctx->qp[index],&ctx->exp_wr[index*user_param->post_list],&bad_exp_wr); */
				/* else */
					/* err = (ctx->post_send_func_pointer)(ctx->qp[index],&ctx->wr[index*user_param->post_list],&bad_wr); */
				/* #else */
				/* err = ibv_post_send(ctx->qp[index],&ctx->wr[index*user_param->post_list],&bad_wr); */
				/* #endif */
				/* if (err) { */
					/* fprintf(stderr,"Couldn't post send: %d scnt=%lu \n",index,ctx->scnt[index]); */
					/* return_value = FAILURE; */
					/* goto cleaning; */
				/* } */
				/* ctx->scnt[index] += user_param->post_list; */
				/* scnt_for_qp[index] += user_param->post_list; */
				/* totscnt += user_param->post_list; */

				/* [> ask for completion on this wr <] */
				/* if (user_param->post_list == 1 && */
						/* (ctx->scnt[index]%user_param->cq_mod == user_param->cq_mod - 1 || */
							/* (user_param->test_type == ITERATIONS && ctx->scnt[index] == user_param->iters - 1))) { */
					/* #ifdef HAVE_VERBS_EXP */
					/* #ifdef HAVE_ACCL_VERBS */
					/* if (user_param->verb_type == ACCL_INTF) */
						/* ctx->exp_wr[index].exp_send_flags |= IBV_EXP_QP_BURST_SIGNALED; */
					/* else { */
					/* #endif */
						/* if (user_param->use_exp == 1) */
							/* ctx->exp_wr[index].exp_send_flags |= IBV_EXP_SEND_SIGNALED; */
						/* else */
					/* #endif */
							/* ctx->wr[index].send_flags |= IBV_SEND_SIGNALED; */
					/* #ifdef HAVE_ACCL_VERBS */
					/* } */
					/* #endif */
				/* } */
			/* } */
		/* } */
		/* if (totccnt < totscnt) { */
			/* ne = ibv_poll_cq(ctx->send_cq,CTX_POLL_BATCH,wc); */

			/* if (ne > 0) { */

				/* for (i = 0; i < ne; i++) { */
					/* if (wc[i].status != IBV_WC_SUCCESS) { */
						/* NOTIFY_COMP_ERROR_SEND(wc[i],ctx->scnt[(int)wc[i].wr_id],ctx->scnt[(int)wc[i].wr_id]); */
						/* return_value = FAILURE; */
						/* goto cleaning; */
					/* } */
					/* wc_id = (user_param->verb_type == ACCL_INTF) ? */
							/* 0 : (int)wc[i].wr_id; */
					/* user_param->iters += user_param->cq_mod; */
					/* totccnt += user_param->cq_mod; */
					/* ctx->ccnt[wc_id] += user_param->cq_mod; */
				/* } */

			/* } else if (ne < 0) { */
				/* fprintf(stderr, "poll CQ failed %d\n",ne); */
				/* return_value = FAILURE; */
				/* goto cleaning; */
			/* } */
		/* } */
	/* } */
/* cleaning: */
	/* free(scnt_for_qp); */
	/* free(wc); */
	/* return return_value; */
/* } */

/******************************************************************************
 *
 ******************************************************************************/
int run_iter_bw_infinitely_server(struct pingpong_context *ctx, struct perftest_parameters *user_param)
{
	int i,ne;
	struct ibv_wc *wc = NULL;
	struct ibv_wc *swc = NULL;
	struct ibv_recv_wr *bad_wr_recv = NULL;
	uint64_t *rcnt_for_qp = NULL;
	uint64_t *ccnt_for_qp = NULL;
	int *scredit_for_qp = NULL;
	int return_value = 0;

	ALLOCATE(wc ,struct ibv_wc ,CTX_POLL_BATCH);
	ALLOCATE(swc ,struct ibv_wc ,user_param->tx_depth);

	ALLOCATE(rcnt_for_qp,uint64_t,user_param->num_of_qps);
	memset(rcnt_for_qp,0,sizeof(uint64_t)*user_param->num_of_qps);

	ALLOCATE(ccnt_for_qp,uint64_t,user_param->num_of_qps);
	memset(ccnt_for_qp,0,sizeof(uint64_t)*user_param->num_of_qps);

	ALLOCATE(scredit_for_qp,int,user_param->num_of_qps);
	memset(scredit_for_qp,0,sizeof(int)*user_param->num_of_qps);

	while (1) {

		ne = ibv_poll_cq(ctx->recv_cq,CTX_POLL_BATCH,wc);

		if (ne > 0) {

			for (i = 0; i < ne; i++) {

				if (wc[i].status != IBV_WC_SUCCESS) {
					fprintf(stderr,"A completion with Error in run_infinitely_bw_server function");
					return_value = FAILURE;
					goto cleaning;
				}

				if (user_param->use_srq) {

					if (ibv_post_srq_recv(ctx->srq, &ctx->rwr[wc[i].wr_id],&bad_wr_recv)) {
						fprintf(stderr, "Couldn't post recv SRQ. QP = %d:\n",(int)wc[i].wr_id);
						return_value = FAILURE;
						goto cleaning;
					}

				} else {

					if (ibv_post_recv(ctx->qp[wc[i].wr_id],&ctx->rwr[wc[i].wr_id],&bad_wr_recv)) {
						fprintf(stderr, "Couldn't post recv Qp=%d\n",(int)wc[i].wr_id);
						return_value = 15;
						goto cleaning;
					}
					if (ctx->send_rcredit) {
						rcnt_for_qp[wc[i].wr_id]++;
						scredit_for_qp[wc[i].wr_id]++;

						if (scredit_for_qp[wc[i].wr_id] == ctx->credit_cnt) {
							struct ibv_send_wr *bad_wr = NULL;
							ctx->ctrl_buf[wc[i].wr_id] = rcnt_for_qp[wc[i].wr_id];

							while (ccnt_for_qp[wc[i].wr_id] == user_param->tx_depth) {
								int sne, j = 0;

								sne = ibv_poll_cq(ctx->send_cq,user_param->tx_depth,swc);
								if (sne > 0) {
									for (j = 0; j < sne; j++) {
										if (swc[j].status != IBV_WC_SUCCESS) {
											fprintf(stderr, "Poll send CQ error status=%u qp %d credit=%lu scredit=%lu\n",
													swc[j].status,(int)swc[j].wr_id,
													rcnt_for_qp[swc[j].wr_id],ccnt_for_qp[swc[j].wr_id]);
											return_value = FAILURE;
											goto cleaning;
										}
										ccnt_for_qp[swc[j].wr_id]--;
									}

								} else if (sne < 0) {
									fprintf(stderr, "Poll send CQ failed ne=%d\n",sne);
									return_value = FAILURE;
									goto cleaning;
								}
							}
							if (ibv_post_send(ctx->qp[wc[i].wr_id],&ctx->ctrl_wr[wc[i].wr_id],&bad_wr)) {
								fprintf(stderr,"Couldn't post send qp %d credit=%lu\n",
										(int)wc[i].wr_id,rcnt_for_qp[wc[i].wr_id]);
								return_value = FAILURE;
								goto cleaning;
							}
							ccnt_for_qp[wc[i].wr_id]++;
							scredit_for_qp[wc[i].wr_id] = 0;
						}
					}
				}
			}

		} else if (ne < 0) {
			fprintf(stderr, "Poll Receive CQ failed %d\n", ne);
			return_value = FAILURE;
			goto cleaning;
		}
	}

cleaning:
	free(wc);
	free(swc);
	free(rcnt_for_qp);
	free(ccnt_for_qp);
	free(scredit_for_qp);
	return return_value;
}

/******************************************************************************
 *
 ******************************************************************************/
int run_iter_bi(struct pingpong_context *ctx,
		struct perftest_parameters *user_param) {

	uint64_t totscnt = 0;
	uint64_t totccnt = 0;
	uint64_t totrcnt = 0;
	int i,index = 0;
	int ne = 0;
	int err = 0;
	uint64_t *rcnt_for_qp = NULL;
	uint64_t tot_iters = 0;
	uint64_t iters = 0;
	int tot_scredit = 0;
	int *scredit_for_qp = NULL;
	struct ibv_wc *wc = NULL;
	struct ibv_wc *wc_tx = NULL;
	struct ibv_recv_wr *bad_wr_recv = NULL;
	#ifdef HAVE_VERBS_EXP
	struct ibv_exp_send_wr *bad_exp_wr = NULL;
	#endif
	struct ibv_send_wr *bad_wr = NULL;
	int num_of_qps = user_param->num_of_qps;
	/* This is to ensure SERVER will not start to send packets before CLIENT start the test. */
	int before_first_rx = ON;
	int size_per_qp = (user_param->use_srq) ? user_param->rx_depth/user_param->num_of_qps : user_param->rx_depth;
	int return_value = 0;

	ALLOCATE(wc_tx,struct ibv_wc,CTX_POLL_BATCH);
	ALLOCATE(rcnt_for_qp,uint64_t,user_param->num_of_qps);
	ALLOCATE(scredit_for_qp,int,user_param->num_of_qps);
	ALLOCATE(wc,struct ibv_wc,user_param->rx_depth);

	memset(rcnt_for_qp,0,sizeof(uint64_t)*user_param->num_of_qps);
	memset(scredit_for_qp,0,sizeof(int)*user_param->num_of_qps);

	if (user_param->noPeak == ON)
		user_param->tposted[0] = get_cycles();

	/* This is a very important point. Since this function do RX and TX
	   in the same time, we need to give some priority to RX to avoid
	   deadlock in UC/UD test scenarios (Recv WQEs depleted due to fast TX) */
	if (user_param->machine == CLIENT) {

		before_first_rx = OFF;
		/* if (user_param->test_type == DURATION) { */
			/* duration_param=user_param; */
			/* user_param->iters=0; */
			/* duration_param->state = START_STATE; */
			/* signal(SIGALRM, catch_alarm); */

			/* if (user_param->margin > 0 ) */
				/* alarm(user_param->margin); */
			/* else */
				/* catch_alarm(0); [> move to next state <] */
		/* } */
	}

	if (user_param->test_type == ITERATIONS) {
		check_alive_data.is_events = user_param->use_event;
		signal(SIGALRM, check_alive);
		alarm(60); // TODO
	}


	if(user_param->duplex && (user_param->use_xrc || user_param->connection_type == DC))
		num_of_qps /= 2;

	tot_iters = (uint64_t)user_param->iters*num_of_qps;
	iters=user_param->iters;
	check_alive_data.g_total_iters = tot_iters;

	while ((user_param->test_type == DURATION && user_param->state != END_STATE) ||
							totccnt < tot_iters || totrcnt < tot_iters ) {

		for (index=0; index < num_of_qps; index++) {
			while (before_first_rx == OFF && (ctx->scnt[index] < iters || user_param->test_type == DURATION) &&
					((ctx->scnt[index] + scredit_for_qp[index] - ctx->ccnt[index]) < user_param->tx_depth)) {
				if (ctx->send_rcredit) {
					uint32_t swindow = ctx->scnt[index] + user_param->post_list - ctx->credit_buf[index];
					if (swindow >= user_param->rx_depth)
						break;
				}
				if (user_param->post_list == 1 && (ctx->scnt[index] % user_param->cq_mod == 0 && user_param->cq_mod > 1)
					&& !(ctx->scnt[index] == (user_param->iters - 1) && user_param->test_type == ITERATIONS)) {
					#ifdef HAVE_VERBS_EXP
					if (user_param->use_exp ==1)
						ctx->exp_wr[index].exp_send_flags &= ~IBV_EXP_SEND_SIGNALED;
					else
					#endif
						ctx->wr[index].send_flags &= ~IBV_SEND_SIGNALED;
				}
				if (user_param->noPeak == OFF)
					user_param->tposted[totscnt] = get_cycles();

				if (user_param->test_type == DURATION && duration_param->state == END_STATE)
					break;

				#ifdef HAVE_VERBS_EXP
				if (user_param->use_exp == 1)
					err = (ctx->exp_post_send_func_pointer)(ctx->qp[index],
						&ctx->exp_wr[index*user_param->post_list],&bad_exp_wr);
				else
					err = (ctx->post_send_func_pointer)(ctx->qp[index],
						&ctx->wr[index*user_param->post_list],&bad_wr);
				#else
				err = ibv_post_send(ctx->qp[index],&ctx->wr[index*user_param->post_list],&bad_wr);
				#endif
				if (err) {
					fprintf(stderr,"Couldn't post send: qp %d scnt=%lu \n",index,ctx->scnt[index]);
					return_value = FAILURE;
					goto cleaning;
				}

				if (user_param->post_list == 1 && user_param->size <= (ctx->cycle_buffer / 2)) {
					#ifdef HAVE_VERBS_EXP
					if (user_param->use_exp == 1)
						increase_loc_addr(ctx->exp_wr[index].sg_list,user_param->size,ctx->scnt[index],
								ctx->my_addr[index],0,ctx->cache_line_size,ctx->cycle_buffer);
					else
					#endif
						increase_loc_addr(ctx->wr[index].sg_list,user_param->size,ctx->scnt[index],
								ctx->my_addr[index],0,ctx->cache_line_size,ctx->cycle_buffer);
				}

				ctx->scnt[index] += user_param->post_list;
				totscnt += user_param->post_list;

				if (user_param->post_list == 1 &&
					(ctx->scnt[index]%user_param->cq_mod == user_param->cq_mod - 1 ||
						(user_param->test_type == ITERATIONS && ctx->scnt[index] == iters-1))) {

					#ifdef HAVE_VERBS_EXP
					if (user_param->use_exp == 1)
						ctx->exp_wr[index].exp_send_flags |= IBV_EXP_SEND_SIGNALED;
					else
					#endif
						ctx->wr[index].send_flags |= IBV_SEND_SIGNALED;
				}
			}
		}
		if (user_param->use_event) {

			if (ctx_notify_events(ctx->channel)) {
				fprintf(stderr,"Failed to notify events to CQ");
				return_value = FAILURE;
				goto cleaning;
			}
		}

		ne = ibv_poll_cq(ctx->recv_cq,user_param->rx_depth,wc);
		if (ne > 0) {

			if (user_param->machine == SERVER && before_first_rx == ON) {
				before_first_rx = OFF;
				/* if (user_param->test_type == DURATION) { */
					/* duration_param=user_param; */
					/* user_param->iters=0; */
					/* duration_param->state = START_STATE; */
					/* signal(SIGALRM, catch_alarm); */
					/* if (user_param->margin > 0 ) */
						/* alarm(user_param->margin); */
					/* else */
						/* catch_alarm(0); [> move to next state <] */
				/* } */
			}

			for (i = 0; i < ne; i++) {
				if (wc[i].status != IBV_WC_SUCCESS) {
					NOTIFY_COMP_ERROR_RECV(wc[i],totrcnt);
					return_value = FAILURE;
					goto cleaning;
				}

				rcnt_for_qp[wc[i].wr_id]++;
				totrcnt++;
				check_alive_data.current_totrcnt = totrcnt;

				if (user_param->test_type==DURATION && user_param->state == SAMPLE_STATE) {
					if (user_param->report_per_port) {
						user_param->iters_per_port[user_param->port_by_qp[(int)wc[i].wr_id]]++;
					}
					user_param->iters++;
				}

				if (user_param->test_type==DURATION || rcnt_for_qp[wc[i].wr_id] + size_per_qp <= user_param->iters) {
					if (user_param->use_srq) {
						if (ibv_post_srq_recv(ctx->srq, &ctx->rwr[wc[i].wr_id],&bad_wr_recv)) {
							fprintf(stderr, "Couldn't post recv SRQ. QP = %d: counter=%d\n",(int)wc[i].wr_id,(int)totrcnt);
							return_value = FAILURE;
							goto cleaning;
						}

					} else {

						if (ibv_post_recv(ctx->qp[wc[i].wr_id],&ctx->rwr[wc[i].wr_id],&bad_wr_recv)) {
							fprintf(stderr, "Couldn't post recv Qp=%d rcnt=%lu\n",(int)wc[i].wr_id,rcnt_for_qp[wc[i].wr_id]);
							return_value = 15;
							goto cleaning;
						}
					}

					if (SIZE(user_param->connection_type,user_param->size,!(int)user_param->machine) <= (ctx->cycle_buffer / 2)) {
						increase_loc_addr(ctx->rwr[wc[i].wr_id].sg_list,
								user_param->size,
								rcnt_for_qp[wc[i].wr_id] + size_per_qp -1,
								ctx->rx_buffer_addr[wc[i].wr_id],user_param->connection_type,
								ctx->cache_line_size,ctx->cycle_buffer);
					}
				}
				if (ctx->send_rcredit) {
					int credit_cnt = rcnt_for_qp[wc[i].wr_id]%user_param->rx_depth;

					if (credit_cnt%ctx->credit_cnt == 0) {
						int sne = 0;
						struct ibv_wc credit_wc;
						struct ibv_send_wr *bad_wr = NULL;
						ctx->ctrl_buf[wc[i].wr_id] = rcnt_for_qp[wc[i].wr_id];

						while ((ctx->scnt[wc[i].wr_id] + scredit_for_qp[wc[i].wr_id] - ctx->ccnt[wc[i].wr_id]) >= user_param->tx_depth) {
							sne = ibv_poll_cq(ctx->send_cq, 1, &credit_wc);
							if (sne > 0) {
								if (credit_wc.status != IBV_WC_SUCCESS) {
									fprintf(stderr, "Poll send CQ error status=%u qp %d credit=%lu scredit=%d\n",
											credit_wc.status,(int)credit_wc.wr_id,
											rcnt_for_qp[credit_wc.wr_id],scredit_for_qp[credit_wc.wr_id]);
									return_value = FAILURE;
									goto cleaning;
								}

								if (credit_wc.opcode == IBV_WC_RDMA_WRITE) {
									scredit_for_qp[credit_wc.wr_id]--;
									tot_scredit--;
								} else {
									totccnt += user_param->cq_mod;
									ctx->ccnt[(int)credit_wc.wr_id] += user_param->cq_mod;

									if (user_param->noPeak == OFF) {
										if ((user_param->test_type == ITERATIONS && (totccnt >= tot_iters - 1)))
											user_param->tcompleted[tot_iters - 1] = get_cycles();
										else
											user_param->tcompleted[totccnt-1] = get_cycles();
									}
									if (user_param->test_type==DURATION && user_param->state == SAMPLE_STATE)
										user_param->iters += user_param->cq_mod;
								}
							} else if (sne < 0) {
								fprintf(stderr, "Poll send CQ ne=%d\n",sne);
								return_value = FAILURE;
								goto cleaning;
							}
						}
						if (ibv_post_send(ctx->qp[wc[i].wr_id],&ctx->ctrl_wr[wc[i].wr_id],&bad_wr)) {
							fprintf(stderr,"Couldn't post send: qp%lu credit=%lu\n",wc[i].wr_id,rcnt_for_qp[wc[i].wr_id]);
							return_value = FAILURE;
							goto cleaning;
						}
						scredit_for_qp[wc[i].wr_id]++;
						tot_scredit++;
					}
				}
			}

		} else if (ne < 0) {
			fprintf(stderr, "poll CQ failed %d\n", ne);
			return_value = FAILURE;
			goto cleaning;
		}
		else if (ne == 0) {
			if (check_alive_data.to_exit) {
				user_param->check_alive_exited = 1;
				return_value = FAILURE;
				goto cleaning;
			}
		}

		ne = ibv_poll_cq(ctx->send_cq,CTX_POLL_BATCH,wc_tx);

		if (ne > 0) {
			for (i = 0; i < ne; i++) {
				if (wc_tx[i].status != IBV_WC_SUCCESS) {
					NOTIFY_COMP_ERROR_SEND(wc_tx[i],totscnt,totccnt);
					return_value = FAILURE;
					goto cleaning;
				}

				if (wc_tx[i].opcode == IBV_WC_RDMA_WRITE) {
					if (!ctx->send_rcredit) {
						fprintf(stderr, "Polled RDMA_WRITE completion without recv credit request\n");
						return_value = FAILURE;
						goto cleaning;
					}
					scredit_for_qp[wc_tx[i].wr_id]--;
					tot_scredit--;
				} else {
					totccnt += user_param->cq_mod;
					ctx->ccnt[(int)wc_tx[i].wr_id] += user_param->cq_mod;

					if (user_param->noPeak == OFF) {

						if ((user_param->test_type == ITERATIONS && (totccnt >= tot_iters - 1)))
							user_param->tcompleted[tot_iters - 1] = get_cycles();
						else
							user_param->tcompleted[totccnt-1] = get_cycles();
					}

					if (user_param->test_type==DURATION && user_param->state == SAMPLE_STATE) {
						if (user_param->report_per_port) {
							user_param->iters_per_port[user_param->port_by_qp[(int)wc[i].wr_id]] += user_param->cq_mod;
						}
						user_param->iters += user_param->cq_mod;
					}
				}
			}

		} else if (ne < 0) {
			fprintf(stderr, "poll CQ failed %d\n", ne);
			return_value = FAILURE;
			goto cleaning;
		}
	}

	if (user_param->noPeak == ON && user_param->test_type == ITERATIONS) {
		user_param->tcompleted[0] = get_cycles();
	}

	if (ctx->send_rcredit) {
		if (clean_scq_credit(tot_scredit, ctx, user_param)) {
			return_value = FAILURE;
			goto cleaning;
		}
	}

cleaning:
	check_alive_data.last_totrcnt=0;
	free(rcnt_for_qp);
	free(scredit_for_qp);
	free(wc);
	free(wc_tx);
	return return_value;
}

/******************************************************************************
 *
 ******************************************************************************/
int run_iter_lat_write(struct pingpong_context *ctx,struct perftest_parameters *user_param)
{
	uint64_t scnt = 0;
	uint64_t ccnt = 0;
	uint64_t rcnt = 0;
	int ne;
	int err = 0;
	int poll_buf_offset = 0;
	volatile char *poll_buf = NULL;
	volatile char *post_buf = NULL;
	#ifdef HAVE_VERBS_EXP
	struct ibv_exp_send_wr *bad_exp_wr = NULL;
	#endif
	struct ibv_send_wr *bad_wr = NULL;
	struct ibv_wc wc;

	int cpu_mhz = get_cpu_mhz(user_param->cpu_freq_f);
	int total_gap_cycles = user_param->latency_gap * cpu_mhz;
	cycles_t end_cycle, start_gap=0;

	#ifdef HAVE_VERBS_EXP
	if (user_param->use_exp == 1) {
		ctx->exp_wr[0].sg_list->length = user_param->size;
		ctx->exp_wr[0].exp_send_flags = IBV_EXP_SEND_SIGNALED;
		if (user_param->size <= user_param->inline_size)
			ctx->exp_wr[0].exp_send_flags |= IBV_EXP_SEND_INLINE;
	} else {
	#endif
		ctx->wr[0].sg_list->length = user_param->size;
		ctx->wr[0].send_flags = IBV_SEND_SIGNALED;
		if (user_param->size <= user_param->inline_size)
			ctx->wr[0].send_flags |= IBV_SEND_INLINE;
	#ifdef HAVE_VERBS_EXP
	}
	#endif

	if((user_param->use_xrc || user_param->connection_type == DC))
		poll_buf_offset = 1;

	post_buf = (char*)ctx->buf[0] + user_param->size - 1;
	poll_buf = (char*)ctx->buf[0] + (user_param->num_of_qps + poll_buf_offset)*BUFF_SIZE(ctx->size, ctx->cycle_buffer) + user_param->size - 1;

	/* Duration support in latency tests. */
	/* if (user_param->test_type == DURATION) { */
		/* duration_param=user_param; */
		/* duration_param->state = START_STATE; */
		/* signal(SIGALRM, catch_alarm); */
		/* user_param->iters = 0; */
		/* if (user_param->margin > 0) */
			/* alarm(user_param->margin); */
		/* else */
			/* catch_alarm(0); */
	/* } */

	/* Done with setup. Start the test. */
	while (scnt < user_param->iters || ccnt < user_param->iters || rcnt < user_param->iters
			|| ((user_param->test_type == DURATION && user_param->state != END_STATE))) {

		if ((rcnt < user_param->iters || user_param->test_type == DURATION) && !(scnt < 1 && user_param->machine == SERVER)) {
			rcnt++;
			while (*poll_buf != (char)rcnt && user_param->state != END_STATE);
		}

		if (scnt < user_param->iters || user_param->test_type == DURATION) {

			if (user_param->latency_gap) {
				start_gap = get_cycles();
				end_cycle = start_gap + total_gap_cycles;
				while (get_cycles() < end_cycle) {
					continue;
				}
			}

			if (user_param->test_type == ITERATIONS)
				user_param->tposted[scnt] = get_cycles();

			*post_buf = (char)++scnt;
			#ifdef HAVE_VERBS_EXP
			if (user_param->use_exp == 1)
				err = (ctx->exp_post_send_func_pointer)(ctx->qp[0],&ctx->exp_wr[0],&bad_exp_wr);
			else
				err = (ctx->post_send_func_pointer)(ctx->qp[0],&ctx->wr[0],&bad_wr);
			#else
			err = ibv_post_send(ctx->qp[0],&ctx->wr[0],&bad_wr);
			#endif
			if (err) {
				fprintf(stderr,"Couldn't post send: scnt=%lu\n",scnt);
				return 1;
			}
		}

		if (user_param->test_type == DURATION && user_param->state == END_STATE)
			break;

		if (ccnt < user_param->iters || user_param->test_type == DURATION) {

			do { ne = ibv_poll_cq(ctx->send_cq, 1, &wc); } while (ne == 0);

			if(ne > 0) {

				if (wc.status != IBV_WC_SUCCESS) {
					NOTIFY_COMP_ERROR_SEND(wc,scnt,ccnt);
					return 1;
				}

				ccnt++;
				if (user_param->test_type==DURATION && user_param->state == SAMPLE_STATE)
					user_param->iters++;

			} else if (ne < 0) {
				fprintf(stderr, "poll CQ failed %d\n", ne);
				return FAILURE;
			}
		}
	}
	return 0;
}

/******************************************************************************
 *
 ******************************************************************************/
int run_iter_lat(struct pingpong_context *ctx,struct perftest_parameters *user_param)
{
	uint64_t scnt = 0;
	int ne;
	int err = 0;
	#ifdef HAVE_VERBS_EXP
	struct ibv_exp_send_wr *bad_exp_wr = NULL;
	#endif
	struct ibv_send_wr *bad_wr = NULL;
	struct ibv_wc wc;

	int cpu_mhz = get_cpu_mhz(user_param->cpu_freq_f);
	int total_gap_cycles = user_param->latency_gap * cpu_mhz;
	cycles_t end_cycle, start_gap=0;

	#ifdef HAVE_VERBS_EXP
	if (user_param->use_exp == 1) {
		ctx->exp_wr[0].sg_list->length = user_param->size;
		ctx->exp_wr[0].exp_send_flags = IBV_EXP_SEND_SIGNALED;
	} else {
	#endif
		ctx->wr[0].sg_list->length = user_param->size;
		ctx->wr[0].send_flags = IBV_SEND_SIGNALED;
	#ifdef HAVE_VERBS_EXP
	}
	#endif

	/* Duration support in latency tests. */
	/* if (user_param->test_type == DURATION) { */
		/* duration_param=user_param; */
		/* duration_param->state = START_STATE; */
		/* signal(SIGALRM, catch_alarm); */
		/* user_param->iters = 0; */
		/* if (user_param->margin > 0) */
			/* alarm(user_param->margin); */
		/* else */
			/* catch_alarm(0); */
	/* } */

	while (scnt < user_param->iters || (user_param->test_type == DURATION && user_param->state != END_STATE)) {
		if (user_param->latency_gap) {
			start_gap = get_cycles();
			end_cycle = start_gap + total_gap_cycles;
			while (get_cycles() < end_cycle) {
				continue;
			}
		}
		if (user_param->test_type == ITERATIONS)
			user_param->tposted[scnt++] = get_cycles();

		#ifdef HAVE_VERBS_EXP
		if (user_param->use_exp == 1)
			err = (ctx->exp_post_send_func_pointer)(ctx->qp[0],&ctx->exp_wr[0],&bad_exp_wr);
		else
			err = (ctx->post_send_func_pointer)(ctx->qp[0],&ctx->wr[0],&bad_wr);
		#else
		err = ibv_post_send(ctx->qp[0],&ctx->wr[0],&bad_wr);
		#endif
		if (err) {
			fprintf(stderr,"Couldn't post send: scnt=%lu\n",scnt);
			return 1;
		}

		if (user_param->test_type == DURATION && user_param->state == END_STATE)
			break;

		if (user_param->use_event) {
			if (ctx_notify_events(ctx->channel)) {
				fprintf(stderr, "Couldn't request CQ notification\n");
				return 1;
			}
		}

		do {
			ne = ibv_poll_cq(ctx->send_cq, 1, &wc);
			if(ne > 0) {
				if (wc.status != IBV_WC_SUCCESS) {
					NOTIFY_COMP_ERROR_SEND(wc,scnt,scnt);
					return 1;
				}
				if (user_param->test_type==DURATION && user_param->state == SAMPLE_STATE)
					user_param->iters++;

			} else if (ne < 0) {
				fprintf(stderr, "poll CQ failed %d\n", ne);
				return FAILURE;
			}

		} while (!user_param->use_event && ne == 0);
	}

	return 0;
}

/******************************************************************************
 *
 ******************************************************************************/
int run_iter_lat_send(struct pingpong_context *ctx,struct perftest_parameters *user_param)
{
	uint64_t scnt = 0; /* sent packets counter */
	uint64_t rcnt = 0; /* received packets counter */
	int poll = 0;
	int ne;
	int err = 0;
	struct ibv_wc wc;
	struct ibv_recv_wr *bad_wr_recv;
	#ifdef HAVE_VERBS_EXP
	struct ibv_exp_send_wr *bad_exp_wr;
	#endif
	struct ibv_send_wr *bad_wr;
	int firstRx = 1;
	int size_per_qp = (user_param->use_srq) ?
					user_param->rx_depth/user_param->num_of_qps : user_param->rx_depth;
	int cpu_mhz = get_cpu_mhz(user_param->cpu_freq_f);
	int total_gap_cycles = user_param->latency_gap * cpu_mhz;
	int send_flows_index = 0;
	int recv_flows_index = 0;
	cycles_t end_cycle, start_gap=0;
	uintptr_t primary_send_addr = ctx->sge_list[0].addr;
	uintptr_t primary_recv_addr = ctx->recv_sge_list[0].addr;
	if (user_param->connection_type != RawEth) {
		#ifdef HAVE_VERBS_EXP
		if (user_param->use_exp == 1) {
			ctx->exp_wr[0].sg_list->length = user_param->size;
			ctx->exp_wr[0].exp_send_flags = 0;
		} else {
		#endif
			ctx->wr[0].sg_list->length = user_param->size;
			ctx->wr[0].send_flags = 0;
		#ifdef HAVE_VERBS_EXP
		}
		#endif

	}
	if (user_param->size <= user_param->inline_size) {
		#ifdef HAVE_VERBS_EXP
		if (user_param->use_exp == 1)
			ctx->exp_wr[0].exp_send_flags |= IBV_EXP_SEND_INLINE;
		else
		#endif
			ctx->wr[0].send_flags |= IBV_SEND_INLINE;
	}
	while (scnt < user_param->iters || rcnt < user_param->iters ||
			( (user_param->test_type == DURATION && user_param->state != END_STATE))) {

		/*
		 * Get the received packet. make sure that the client won't enter here until he sends
		 * his first packet (scnt < 1)
		 * server will enter here first and wait for a packet to arrive (from the client)
		 */
		if ((rcnt < user_param->iters || user_param->test_type == DURATION) && !(scnt < 1 && user_param->machine == CLIENT)) {
			if (user_param->use_event) {
				if (ctx_notify_events(ctx->channel)) {
					fprintf(stderr , " Failed to notify events to CQ");
					return 1;
				}
			}
			do {
				ne = ibv_poll_cq(ctx->recv_cq,1,&wc);
				if (user_param->test_type == DURATION && user_param->state == END_STATE)
					break;

				if (ne > 0) {
					if (firstRx) {
						set_on_first_rx_packet(user_param);
						firstRx = 0;
					}

					if (wc.status != IBV_WC_SUCCESS) {
						NOTIFY_COMP_ERROR_RECV(wc,rcnt);
						return 1;
					}

					rcnt++;

					if (user_param->test_type == DURATION && user_param->state == SAMPLE_STATE)
						user_param->iters++;

					/*if we're in duration mode or there
					 * is enough space in the rx_depth,
					 * post that you received a packet.
					 */
					if (user_param->test_type == DURATION || (rcnt + size_per_qp <= user_param->iters)) {
						if (user_param->use_srq) {

							if (ibv_post_srq_recv(ctx->srq, &ctx->rwr[wc.wr_id], &bad_wr_recv)) {
								fprintf(stderr, "Couldn't post recv SRQ. QP = %d: counter=%lu\n",(int)wc.wr_id, rcnt);
								return 1;
							}

						} else {
							if (ibv_post_recv(ctx->qp[wc.wr_id], &ctx->rwr[wc.wr_id], &bad_wr_recv)) {
								fprintf(stderr, "Couldn't post recv: rcnt=%lu\n", rcnt);
								return 15;
							}
						}
						if (user_param->flows != DEF_FLOWS) {
							if (++recv_flows_index == user_param->flows) {
								recv_flows_index = 0;
								ctx->recv_sge_list[0].addr = primary_recv_addr;
							} else {
								ctx->recv_sge_list[0].addr += INC(user_param->size, ctx->cache_line_size);
							}
						}
					}
				} else if (ne < 0) {
					fprintf(stderr, "poll CQ failed %d\n", ne);
					return 1;
				}
			} while (!user_param->use_event && ne == 0);
		}

		if (scnt < user_param->iters || (user_param->test_type == DURATION && user_param->state != END_STATE)) {

			if (user_param->latency_gap) {
				start_gap = get_cycles();
				end_cycle = start_gap + total_gap_cycles;
				while (get_cycles() < end_cycle) {
					continue;
				}
			}

			if (user_param->test_type == ITERATIONS)
				user_param->tposted[scnt] = get_cycles();

			scnt++;

			if (scnt % user_param->cq_mod == 0 || (user_param->test_type == ITERATIONS && scnt == user_param->iters)) {
				poll = 1;
				#ifdef HAVE_VERBS_EXP
				if (user_param->use_exp == 1)
					ctx->exp_wr[0].exp_send_flags |= IBV_EXP_SEND_SIGNALED;
				else
				#endif
					ctx->wr[0].send_flags |= IBV_SEND_SIGNALED;
			}

			/* if we're in duration mode and the time is over, exit from this function */
			if (user_param->test_type == DURATION && user_param->state == END_STATE)
				break;

			/* send the packet that's in index 0 on the buffer */
			#ifdef HAVE_VERBS_EXP
			if (user_param->use_exp == 1)
				err = (ctx->exp_post_send_func_pointer)(ctx->qp[0],&ctx->exp_wr[0],&bad_exp_wr);
			else
				err = (ctx->post_send_func_pointer)(ctx->qp[0],&ctx->wr[0],&bad_wr);
			#else
			err = ibv_post_send(ctx->qp[0],&ctx->wr[0],&bad_wr);
			#endif
			if (err) {
				fprintf(stderr,"Couldn't post send: scnt=%lu \n",scnt);
				return 1;
			}
			if (user_param->flows != DEF_FLOWS) {
				if (++send_flows_index == user_param->flows) {
					send_flows_index = 0;
					ctx->sge_list[0].addr = primary_send_addr;
				} else {
					ctx->sge_list[0].addr = primary_send_addr + (ctx->flow_buff_size * send_flows_index);
				}
			}
			if (poll == 1) {

				struct ibv_wc s_wc;
				int s_ne;

				if (user_param->use_event) {
					if (ctx_notify_events(ctx->channel)) {
						fprintf(stderr , " Failed to notify events to CQ");
						return FAILURE;
					}
				}

				/* wait until you get a cq for the last packet */
				do {
					s_ne = ibv_poll_cq(ctx->send_cq, 1, &s_wc);
				} while (!user_param->use_event && s_ne == 0);

				if (s_ne < 0) {
					fprintf(stderr, "poll on Send CQ failed %d\n", s_ne);
					return FAILURE;
				}

				if (s_wc.status != IBV_WC_SUCCESS) {
					NOTIFY_COMP_ERROR_SEND(s_wc,scnt,scnt)
						return 1;
				}
				poll = 0;

				#ifdef HAVE_VERBS_EXP
				if (user_param->use_exp == 1)
					ctx->exp_wr[0].exp_send_flags &= ~IBV_EXP_SEND_SIGNALED;
				else
				#endif
					ctx->wr[0].send_flags &= ~IBV_SEND_SIGNALED;
			}
		}
	}

	return 0;
}
/******************************************************************************
 *Server
 ******************************************************************************/
int run_iter_lat_burst_server(struct pingpong_context *ctx, struct perftest_parameters *user_param)
{
	int i;
	int ne = 0;
	int err = 0;
	uint64_t scnt = 0;
	uint64_t rcnt = 0;
	uint64_t ccnt = 0;
	struct ibv_wc *wc = NULL;
	struct ibv_send_wr *bad_wr;
	struct ibv_recv_wr *bad_wr_recv = NULL;
	int wc_id;

	ALLOCATE(wc, struct ibv_wc, user_param->burst_size);

	/* main loop for polling */
	while (rcnt < user_param->iters) {

		ne = ibv_poll_cq(ctx->recv_cq, user_param->burst_size, wc);
		if (ne > 0) {
			for (i = 0; i < ne; i++) {
				wc_id = (int)wc[i].wr_id;
				if (wc[i].status != IBV_WC_SUCCESS) {
					NOTIFY_COMP_ERROR_RECV(wc[i], rcnt);
					return FAILURE;
				}
				rcnt++;
				if (rcnt%user_param->reply_every == 0 && scnt - ccnt < user_param->tx_depth) {
					err = ibv_post_send(ctx->qp[0], &ctx->wr[0], &bad_wr);
					if (err) {
						fprintf(stderr, "Couldn't post send: scnt=%lu\n", scnt);
						return FAILURE;
					}
					scnt++;
				}

				if (ibv_post_recv(ctx->qp[wc_id], &ctx->rwr[wc_id], &bad_wr_recv)) {
					fprintf(stderr, "Couldn't post recv Qp=%d rcnt=%ld\n", wc_id, rcnt);
					return FAILURE;
				}
			}
		} else if (ne < 0) {
			fprintf(stderr, "poll CQ failed %d\n", ne);
			return FAILURE;
		}
		ne = ibv_poll_cq(ctx->send_cq, CTX_POLL_BATCH, wc);
		if (ne > 0) {
			for (i = 0; i < ne; i++) {
				if (wc[i].status != IBV_WC_SUCCESS) {
					NOTIFY_COMP_ERROR_SEND(wc[i], scnt, ccnt);
					return FAILURE;
				}
				ccnt++;
			}

		} else if (ne < 0) {
			fprintf(stderr, "poll CQ failed %d\n", ne);
			return FAILURE;
		}
	}
	free(wc);
	return SUCCESS;
}
/******************************************************************************
 *Client
 ******************************************************************************/
int run_iter_lat_burst(struct pingpong_context *ctx, struct perftest_parameters *user_param)
{
	uint64_t totscnt = 0; /* sent packets counter */
	uint64_t totccnt = 0; /* complete sent packets counter */
	uint64_t totrcnt = 0; /* received packets counter */
	uint64_t tot_iters;
	uint64_t pong_cnt = 0; /* counts how many pongs arrived */
	int ne, ns;
	int err = 0;
	int i = 0;
	int wc_id;
	struct ibv_wc *wc;
	#ifdef HAVE_VERBS_EXP
	struct ibv_exp_send_wr *bad_exp_wr;
	#endif
	struct ibv_send_wr *bad_wr;
	int cpu_mhz;
	int return_value = 0;
	/* Rate Limiter*/
	int rate_limit_pps = 0;
	double gap_time = 0; /* in usec */
	cycles_t gap_cycles = 0; /* in cycles */
	cycles_t gap_deadline = 0;
	unsigned int number_of_bursts = 0;
	int burst_iter = 0;
	int is_sending_burst = 0;
	struct ibv_recv_wr *bad_wr_recv = NULL;
	ALLOCATE(wc, struct ibv_wc, user_param->burst_size);

	tot_iters = (uint64_t)user_param->iters;

	/* If using rate limiter, calculate gap time between bursts */
	cpu_mhz = get_cpu_mhz(user_param->cpu_freq_f);
	if (cpu_mhz <= 0) {
		fprintf(stderr, "Failed: couldn't acquire cpu frequency for rate limiter.\n");
		return_value = FAILURE;
		goto cleaning;
	}
	if (user_param->rate_limit > 0 ) {
		if (user_param->rate_limit_type == SW_RATE_LIMIT) {
			switch (user_param->rate_units) {
				case MEGA_BYTE_PS:
					rate_limit_pps = ((double)(user_param->rate_limit) / user_param->size) * 1048576;
					break;
				case GIGA_BIT_PS:
					rate_limit_pps = ((double)(user_param->rate_limit) / (user_param->size * 8)) * 1000000000;
					break;
				case PACKET_PS:
					rate_limit_pps = user_param->rate_limit;
					break;
				default:
					fprintf(stderr, " Failed: Unknown rate limit units\n");
					return_value = FAILURE;
					goto cleaning;
			}
			number_of_bursts = rate_limit_pps / user_param->burst_size;
			gap_time = 1000000 * (1.0 / number_of_bursts);
		}
	}

	gap_cycles = cpu_mhz * gap_time;

	/* main loop for posting */
	while (totrcnt < (totscnt / user_param->reply_every) || totccnt < tot_iters) {

		if (is_sending_burst == 0) {
			if (gap_deadline > get_cycles() && user_param->rate_limit_type == SW_RATE_LIMIT) {
				/* Go right to cq polling until gap time is over. */
				goto polling;
			}
			gap_deadline = get_cycles() + gap_cycles;
			is_sending_burst = 1;
			burst_iter = 0;
		}
		while ((totscnt < user_param->iters)
			&& (totscnt - totccnt) < (user_param->tx_depth) && !(is_sending_burst == 0 )) {
			#ifdef HAVE_VERBS_EXP
			if (user_param->use_exp == 1)
				err = (ctx->exp_post_send_func_pointer)(ctx->qp[0],
					&ctx->exp_wr[0], &bad_exp_wr);
			else
				err = (ctx->post_send_func_pointer)(ctx->qp[0],&ctx->wr[0],&bad_wr);
			#else
			err = ibv_post_send(ctx->qp[0],&ctx->wr[0],&bad_wr);
                        #endif
			if (err) {
				fprintf(stderr, "Couldn't post send: scnt=%lu\n", totscnt);
				return 1;
			}
			if (user_param->post_list == 1 && user_param->size <= (ctx->cycle_buffer / 2)) {
				#ifdef HAVE_VERBS_EXP
				if (user_param->use_exp == 1)
					increase_loc_addr(ctx->exp_wr[0].sg_list, user_param->size,
								totscnt, ctx->my_addr[0], 0, ctx->cache_line_size, ctx->cycle_buffer);
				else
				#endif
					increase_loc_addr(ctx->wr[0].sg_list, user_param->size, totscnt,
							ctx->my_addr[0], 0, ctx->cache_line_size, ctx->cycle_buffer);
			}
			totscnt += user_param->post_list;
			if (totscnt % user_param->reply_every == 0 && totscnt != 0) {
				user_param->tposted[pong_cnt] = get_cycles();
				pong_cnt++;
			}
			if (++burst_iter == user_param->burst_size) {
				is_sending_burst = 0;
			}
		}
polling:
		do {
			ne = ibv_poll_cq(ctx->recv_cq, CTX_POLL_BATCH, wc);
			if (ne > 0) {
				for (i = 0; i < ne; i++) {
					wc_id = (user_param->verb_type == ACCL_INTF) ?
							0 : (int)wc[i].wr_id;
					user_param->tcompleted[totrcnt] = get_cycles();
					totrcnt++;
					if (wc[i].status != IBV_WC_SUCCESS) {
						NOTIFY_COMP_ERROR_SEND(wc[i], totscnt, totccnt);
						return_value = FAILURE;
						goto cleaning;
					}
					if (ibv_post_recv(ctx->qp[wc_id], &ctx->rwr[wc_id], &bad_wr_recv)) {
						fprintf(stderr, "Couldn't post recv Qp=%d rcnt=%ld\n", wc_id, totrcnt);
						return FAILURE;
					}
				}
			} else if (ne < 0) {
				fprintf(stderr, "poll CQ failed %d\n", ne);
				return_value = 1;
				goto cleaning;
			}
			ns = ibv_poll_cq(ctx->send_cq, user_param->burst_size, wc);
			if (ns > 0) {
				for (i = 0; i < ns; i++) {
					wc_id = (user_param->verb_type == ACCL_INTF) ?
						0 : (int)wc[i].wr_id;
					if (wc[i].status != IBV_WC_SUCCESS) {
						NOTIFY_COMP_ERROR_SEND(wc[i], totscnt, totccnt);
						return_value = FAILURE ;
						goto cleaning;
					}
					totccnt += user_param->cq_mod;
				}
			} else if (ns < 0) {
				fprintf(stderr, "poll CQ failed %d\n", ne);
				return_value = 1;
				goto cleaning;
			}
		} while (ne != 0);
	}

	return SUCCESS;
cleaning:
	free(wc);
	return return_value;
}

/******************************************************************************
 *
 ******************************************************************************/
uint16_t ctx_get_local_lid(struct ibv_context *context,int port)
{
	struct ibv_port_attr attr;

	if (ibv_query_port(context,port,&attr))
		return 0;

	return attr.lid;
}

/******************************************************************************
 *
 ******************************************************************************/
void catch_alarm(int sig)
{
	switch (duration_param->state) {
		case START_STATE:
			duration_param->state = SAMPLE_STATE;
			get_cpu_stats(duration_param,1);
			duration_param->tposted[0] = get_cycles();
			alarm(duration_param->duration - 2*(duration_param->margin));
			break;
		case SAMPLE_STATE:
			duration_param->state = STOP_SAMPLE_STATE;
			duration_param->tcompleted[0] = get_cycles();
			get_cpu_stats(duration_param,2);
			if (duration_param->margin > 0)
				alarm(duration_param->margin);
			else
				catch_alarm(0);

			break;
		case STOP_SAMPLE_STATE:
			duration_param->state = END_STATE;
			break;
		default:
			fprintf(stderr,"unknown state\n");
	}
}

void check_alive(int sig)
{
	if (check_alive_data.current_totrcnt > check_alive_data.last_totrcnt) {
		check_alive_data.last_totrcnt = check_alive_data.current_totrcnt;
		alarm(60);
	} else if (check_alive_data.current_totrcnt == check_alive_data.last_totrcnt && check_alive_data.current_totrcnt < check_alive_data.g_total_iters) {
		fprintf(stderr," Did not get Message for 120 Seconds, exiting..\n Total Received=%d, Total Iters Required=%d\n",check_alive_data.current_totrcnt, check_alive_data.g_total_iters);

		if (check_alive_data.is_events) {
			/* Can't report BW, as we are stuck in event_loop */
			fprintf(stderr," Due to this issue, Perftest cannot produce a report when in event mode.\n");
			exit(FAILURE);
		}
		else {
			/* exit nice from run_iter function and report known bw/mr */
			check_alive_data.to_exit = 1;
		}
	}
}

/******************************************************************************
 *
 ******************************************************************************/
void catch_alarm_infintely()
{
	print_report_bw(duration_param,NULL);
	duration_param->iters = 0;
	alarm(duration_param->duration);
	duration_param->tposted[0] = get_cycles();
}

/******************************************************************************
 *
 ******************************************************************************/
void *handle_signal_print_thread(void *sigmask)
{
	sigset_t *set = (sigset_t*)sigmask;
	int rc;
	int sig_caught;
	while(1){
		rc = sigwait(set, &sig_caught);
		if (rc != 0){
			printf("Error when try to wait for SIGALRM\n");
			exit(EXIT_FAILURE);
		}
		if(sig_caught == SIGALRM)
				catch_alarm_infintely();
		else {
			printf("Unsupported signal caught %d, only signal %d is supported\n", sig_caught, SIGALRM);
			exit(EXIT_FAILURE);
		}
	}

}

/******************************************************************************
 *
 ******************************************************************************/
#ifdef HAVE_MASKED_ATOMICS
int check_masked_atomics_support(struct pingpong_context *ctx)
{
	struct ibv_exp_device_attr attr;
        memset(&attr,0,sizeof (struct ibv_exp_device_attr));

	attr.comp_mask = IBV_EXP_DEVICE_ATTR_EXT_ATOMIC_ARGS | IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS;
	attr.exp_atomic_cap = IBV_EXP_ATOMIC_HCA_REPLY_BE;

	if (ibv_exp_query_device(ctx->context, &attr)) {
		fprintf(stderr, "ibv_exp_query_device failed\n");
		return -1;
	}

	return MASK_IS_SET(IBV_EXP_ATOMIC_HCA_REPLY_BE, attr.exp_atomic_cap) &&
		MASK_IS_SET(IBV_EXP_DEVICE_EXT_ATOMICS, attr.exp_device_cap_flags);
}
#endif

/******************************************************************************
 *
 ******************************************************************************/
#ifdef HAVE_PACKET_PACING_EXP
int check_packet_pacing_support(struct pingpong_context *ctx)
{
	struct ibv_exp_device_attr attr;
	memset(&attr, 0, sizeof (struct ibv_exp_device_attr));

	attr.comp_mask = IBV_EXP_DEVICE_ATTR_PACKET_PACING_CAPS;

	if (ibv_exp_query_device(ctx->context, &attr)) {
		fprintf(stderr, "ibv_exp_query_device failed\n");
		return FAILURE;
	}

	return MASK_IS_SET(IBV_EXP_DEVICE_ATTR_PACKET_PACING_CAPS, attr.comp_mask) ?
		SUCCESS : FAILURE;
}
#elif defined(HAVE_PACKET_PACING)
int check_packet_pacing_support(struct pingpong_context *ctx)
{
	struct ibv_device_attr_ex attr;
	memset(&attr, 0, sizeof (struct ibv_device_attr_ex));

	if (ibv_query_device_ex(ctx->context, NULL, &attr)) {
		fprintf(stderr, "ibv_query_device_ex failed\n");
		return FAILURE;
	}

	/* qp_rate_limit_max > 0 if PP is supported */
	return attr.packet_pacing_caps.qp_rate_limit_max > 0 ? SUCCESS : FAILURE;
}
#endif

int run_iter_fs(struct pingpong_context *ctx, struct perftest_parameters *user_param) {

	struct raw_ethernet_info *my_dest_info = NULL;
	struct raw_ethernet_info *rem_dest_info = NULL;

	#ifdef HAVE_RAW_ETH_EXP
	struct ibv_exp_flow **flow_create_result;
	struct ibv_exp_flow_attr **flow_rules;
	#else
	struct ibv_flow **flow_create_result;
	struct ibv_flow_attr **flow_rules;
	#endif
	int flow_index = 0;
	int qp_index = 0;
	int retval = SUCCESS;
	uint64_t tot_fs_cnt = 0;
	uint64_t allocated_flows = 0;
	uint64_t tot_iters = 0;

	/* Allocate user input dependable structs */
	ALLOCATE(my_dest_info, struct raw_ethernet_info, user_param->num_of_qps);
	memset(my_dest_info, 0, sizeof(struct raw_ethernet_info) * user_param->num_of_qps);
	ALLOCATE(rem_dest_info, struct raw_ethernet_info, user_param->num_of_qps);
	memset(rem_dest_info, 0, sizeof(struct raw_ethernet_info) * user_param->num_of_qps);

	if (user_param->test_type == ITERATIONS) {
		user_param->flows = user_param->iters * user_param->num_of_qps;
		allocated_flows = user_param->iters;
	} else if (user_param->test_type == DURATION) {
		allocated_flows = (2 * MAX_FS_PORT) - (user_param->server_port + user_param->client_port);
	}


	#ifdef HAVE_RAW_ETH_EXP
        ALLOCATE(flow_create_result, struct ibv_exp_flow*, allocated_flows * user_param->num_of_qps);
        ALLOCATE(flow_rules, struct ibv_exp_flow_attr*, allocated_flows * user_param->num_of_qps);
	#else
        ALLOCATE(flow_create_result, struct ibv_flow*, allocated_flows * user_param->num_of_qps);
        ALLOCATE(flow_rules, struct ibv_flow_attr*, allocated_flows * user_param->num_of_qps);
	#endif

	/* if(user_param->test_type == DURATION) { */
		/* duration_param = user_param; */
		/* user_param->iters = 0; */
		/* duration_param->state = START_STATE; */
		/* signal(SIGALRM, catch_alarm); */
		/* alarm(user_param->margin); */
		/* if (user_param->margin > 0) */
			/* alarm(user_param->margin); */
		/* else */
			/* catch_alarm(0); [> move to next state <] */
	/* } */
	if (set_up_fs_rules(flow_rules, ctx, user_param, allocated_flows)) {
			fprintf(stderr, "Unable to set up flow rules\n");
			retval = FAILURE;
			goto cleaning;
	}

	do {/* This loop runs once in Iteration mode */
		for (qp_index = 0; qp_index < user_param->num_of_qps; qp_index++) {

			for (flow_index = 0; flow_index < allocated_flows; flow_index++) {

				if (user_param->test_type == ITERATIONS)
					user_param->tposted[tot_fs_cnt] = get_cycles();
				else if (user_param->test_type == DURATION && duration_param->state == END_STATE)
					break;
				#ifdef HAVE_RAW_ETH_EXP
				flow_create_result[flow_index] =
					ibv_exp_create_flow(ctx->qp[qp_index], flow_rules[(qp_index * allocated_flows) + flow_index]);
				#else
				flow_create_result[flow_index] =
					ibv_create_flow(ctx->qp[qp_index], flow_rules[(qp_index * allocated_flows) + flow_index]);
				#endif
				if (user_param->test_type == ITERATIONS)
					user_param->tcompleted[tot_fs_cnt] = get_cycles();
				if (!flow_create_result[flow_index]) {
					perror("error");
					fprintf(stderr, "Couldn't attach QP\n");
					retval = FAILURE;
					goto cleaning;
				}
				if (user_param->test_type == ITERATIONS ||
				   (user_param->test_type == DURATION && duration_param->state == SAMPLE_STATE))
					tot_fs_cnt++;
				tot_iters++;
			}
		}
	} while (user_param->test_type == DURATION && duration_param->state != END_STATE);

	if (user_param->test_type == DURATION && user_param->state == END_STATE)
		user_param->iters = tot_fs_cnt;

cleaning:
	/* destroy open flows */
	for (flow_index = 0; flow_index < tot_iters; flow_index++) {
		#ifdef HAVE_RAW_ETH_EXP
		if (ibv_exp_destroy_flow(flow_create_result[flow_index])) {
		#else
		if (ibv_destroy_flow(flow_create_result[flow_index])) {
		#endif
			perror("error");
			fprintf(stderr, "Couldn't destroy flow\n");
		}
	}
	free(flow_rules);
	free(flow_create_result);
	free(my_dest_info);
	free(rem_dest_info);

	return retval;
}

/******************************************************************************
 * End
 ******************************************************************************/