diff --git a/usr/benchmarks/CMakeLists.txt b/usr/benchmarks/CMakeLists.txt index 9aae06bcb..9719d91b7 100644 --- a/usr/benchmarks/CMakeLists.txt +++ b/usr/benchmarks/CMakeLists.txt @@ -8,6 +8,10 @@ target_link_libraries(basic pthread) add_executable(hg hg.c hist.c rdtsc.c run.c init.c opt.c report.c setup.c) +# add_executable(ib-pingpong-ud ib/pingpong.c ib/pingpong-ud.c) +add_executable(ib_write_bw ib/write_bw.c ib/get_clock.c ib/perftest_parameters.c + ib/perftest_resources.c ib/perftest_communication.c) + add_executable(netio netio.c) add_executable(RCCE_pingpong RCCE_pingpong.c) diff --git a/usr/benchmarks/ib/atomic_bw.c b/usr/benchmarks/ib/atomic_bw.c new file mode 100755 index 000000000..1b082358e --- /dev/null +++ b/usr/benchmarks/ib/atomic_bw.c @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2006 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include +#include +#include + +#include "perftest_parameters.h" +#include "perftest_resources.h" +#include "perftest_communication.h" + +/****************************************************************************** + * + ******************************************************************************/ +int main(int argc, char *argv[]) +{ + int ret_parser, i; + struct ibv_device *ib_dev = NULL; + struct pingpong_context ctx; + struct pingpong_dest *my_dest = NULL; + struct pingpong_dest *rem_dest = NULL; + struct perftest_parameters user_param; + struct perftest_comm user_comm; + struct bw_report_data my_bw_rep, rem_bw_rep; + + /* init default values to user's parameters */ + memset(&ctx, 0, sizeof(struct pingpong_context)); + memset(&user_param, 0, sizeof(struct perftest_parameters)); + memset(&user_comm, 0, sizeof(struct perftest_comm)); + + user_param.verb = ATOMIC; + user_param.tst = BW; + strncpy(user_param.version, VERSION, sizeof(user_param.version)); + + ret_parser = parser(&user_param, argv, argc); + if (ret_parser) { + if (ret_parser != VERSION_EXIT && ret_parser != HELP_EXIT) + fprintf(stderr, " Parser function exited with Error\n"); + return FAILURE; + } + + if (user_param.use_xrc && user_param.duplex) { + user_param.num_of_qps *= 2; + } + + ib_dev = ctx_find_dev(user_param.ib_devname); + if (!ib_dev) + return 7; + + /* Getting the relevant context from the device */ + ctx.context = ibv_open_device(ib_dev); + if (!ctx.context) { + fprintf(stderr, " Couldn't get context for the device\n"); + return FAILURE; + } + + #ifdef HAVE_MASKED_ATOMICS + if (check_masked_atomics_support(&ctx)) { + user_param.masked_atomics = 1; + user_param.use_exp = 1; + } + + if (user_param.masked_atomics && (user_param.work_rdma_cm || user_param.use_rdma_cm)) { + fprintf(stderr, "atomic test is not supported with -R/-z flag (rdma_cm) with this device.\n"); + return FAILURE; + } + #endif + + /* See if MTU and link type are valid and supported. */ + if (check_link(ctx.context, &user_param)) { + fprintf(stderr, " Couldn't get context for the device\n"); + return FAILURE; + } + + /* copy the relevant user parameters to the comm struct + creating rdma_cm resources. */ + if (create_comm_struct(&user_comm, &user_param)) { + fprintf(stderr, " Unable to create RDMA_CM resources\n"); + return FAILURE; + } + + if (user_param.output == FULL_VERBOSITY && user_param.machine == SERVER) { + printf("\n************************************\n"); + printf("* Waiting for client to connect... *\n"); + printf("************************************\n"); + } + + /* Initialize the connection and print the local data. */ + if (establish_connection(&user_comm)) { + fprintf(stderr, " Unable to init the socket connection\n"); + return FAILURE; + } + + exchange_versions(&user_comm, &user_param); + + check_sys_data(&user_comm, &user_param); + + /* See if MTU and link type are valid and supported. */ + if (check_mtu(ctx.context, &user_param, &user_comm)) { + fprintf(stderr, " Couldn't get context for the device\n"); + return FAILURE; + } + + ALLOCATE(my_dest, struct pingpong_dest, user_param.num_of_qps); + memset(my_dest, 0, sizeof(struct pingpong_dest)*user_param.num_of_qps); + ALLOCATE(rem_dest, struct pingpong_dest, user_param.num_of_qps); + memset(rem_dest, 0, sizeof(struct pingpong_dest)*user_param.num_of_qps); + + /* Allocating arrays needed for the test. */ + alloc_ctx(&ctx, &user_param); + + /* Create (if nessacery) the rdma_cm ids and channel. */ + if (user_param.work_rdma_cm == ON) { + + if (user_param.machine == CLIENT) { + if (retry_rdma_connect(&ctx, &user_param)) { + fprintf(stderr, "Unable to perform rdma_client function\n"); + return FAILURE; + } + + } else { + if (create_rdma_resources(&ctx, &user_param)) { + fprintf(stderr, " Unable to create the rdma_resources\n"); + return FAILURE; + } + if (rdma_server_connect(&ctx, &user_param)) { + fprintf(stderr, "Unable to perform rdma_client function\n"); + return FAILURE; + } + } + + } else { + + /* create all the basic IB resources. */ + if (ctx_init(&ctx, &user_param)) { + fprintf(stderr, " Couldn't create IB resources\n"); + return FAILURE; + } + } + + /* Set up the Connection. */ + if (set_up_connection(&ctx, &user_param, my_dest)) { + fprintf(stderr, " Unable to set up socket connection\n"); + return FAILURE; + } + + /* Print basic test information. */ + ctx_print_test_info(&user_param); + + /* Print this machine QP information */ + for (i = 0; i < user_param.num_of_qps; i++) + ctx_print_pingpong_data(&my_dest[i], &user_comm); + + user_comm.rdma_params->side = REMOTE; + for (i = 0; i < user_param.num_of_qps; i++) { + + /* shaking hands and gather the other side info. */ + if (ctx_hand_shake(&user_comm, &my_dest[i], &rem_dest[i])) { + fprintf(stderr, "Failed to exchange data between server and clients\n"); + return FAILURE; + } + ctx_print_pingpong_data(&rem_dest[i], &user_comm); + } + + if (user_param.work_rdma_cm == OFF) { + if (ctx_check_gid_compatibility(&my_dest[0], &rem_dest[0])) { + fprintf(stderr, "\n Found Incompatibility issue with GID types.\n"); + fprintf(stderr, " Please Try to use a different IP version.\n\n"); + return FAILURE; + } + } + + if (user_param.work_rdma_cm == OFF) { + if (ctx_connect(&ctx, rem_dest, &user_param, my_dest)) { + fprintf(stderr, " Unable to Connect the HCA's through the link\n"); + return FAILURE; + } + } + + /* An additional handshake is required after moving qp to RTR. */ + if (ctx_hand_shake(&user_comm, &my_dest[0], &rem_dest[0])) { + fprintf(stderr, "Failed to exchange data between server and clients\n"); + return FAILURE; + } + + /* For half duplex tests, server just waits for client to exit */ + if (user_param.machine == SERVER && !user_param.duplex) { + if (user_param.output == FULL_VERBOSITY) { + printf(RESULT_LINE); + printf((user_param.report_fmt == MBS ? RESULT_FMT : RESULT_FMT_G)); + printf((user_param.cpu_util_data.enable ? RESULT_EXT_CPU_UTIL : RESULT_EXT)); + } + + if (ctx_hand_shake(&user_comm, &my_dest[0], &rem_dest[0])) { + fprintf(stderr, " Failed to exchange data between server and clients\n"); + return FAILURE; + } + + xchg_bw_reports(&user_comm, &my_bw_rep, &rem_bw_rep, atof(user_param.rem_version)); + print_full_bw_report(&user_param, &rem_bw_rep, NULL); + + if (user_param.output == FULL_VERBOSITY) { + printf(RESULT_LINE); + } + + if (ctx_close_connection(&user_comm, &my_dest[0], &rem_dest[0])) { + fprintf(stderr, "Failed to close connection between server and client\n"); + return FAILURE; + } + + return destroy_ctx(&ctx, &user_param); + } + + if (user_param.use_event) { + if (ibv_req_notify_cq(ctx.send_cq, 0)) { + fprintf(stderr, "Couldn't request CQ notification\n"); + return FAILURE; + } + } + if (user_param.output == FULL_VERBOSITY) { + printf(RESULT_LINE); + printf((user_param.report_fmt == MBS ? RESULT_FMT : RESULT_FMT_G)); + printf((user_param.cpu_util_data.enable ? RESULT_EXT_CPU_UTIL : RESULT_EXT)); + } + + ctx_set_send_wqes(&ctx, &user_param, rem_dest); + + if (user_param.test_method == RUN_REGULAR || user_param.test_method == RUN_ALL) { + + if (user_param.perform_warm_up) { + if (perform_warm_up(&ctx, &user_param)) { + fprintf(stderr, "Problems with warm up\n"); + return FAILURE; + } + } + + if (user_param.duplex) { + if (ctx_hand_shake(&user_comm, &my_dest[0], &rem_dest[0])) { + fprintf(stderr, "Failed to sync between server and client between different msg sizes\n"); + return FAILURE; + } + } + + if (run_iter_bw(&ctx, &user_param)) { + fprintf(stderr, " Error occurred in run_iter function\n"); + return FAILURE; + } + + print_report_bw(&user_param, &my_bw_rep); + + if (user_param.duplex) { + xchg_bw_reports(&user_comm, &my_bw_rep, &rem_bw_rep, atof(user_param.rem_version)); + print_full_bw_report(&user_param, &my_bw_rep, &rem_bw_rep); + } + + if (user_param.report_both && user_param.duplex) { + printf(RESULT_LINE); + printf("\n Local results:\n"); + printf(RESULT_LINE); + printf((user_param.report_fmt == MBS ? RESULT_FMT : RESULT_FMT_G)); + printf((user_param.cpu_util_data.enable ? RESULT_EXT_CPU_UTIL : RESULT_EXT)); + print_full_bw_report(&user_param, &my_bw_rep, NULL); + printf(RESULT_LINE); + + printf("\n Remote results:\n"); + printf(RESULT_LINE); + printf((user_param.report_fmt == MBS ? RESULT_FMT : RESULT_FMT_G)); + printf((user_param.cpu_util_data.enable ? RESULT_EXT_CPU_UTIL : RESULT_EXT)); + print_full_bw_report(&user_param, &rem_bw_rep, NULL); + } + } else if (user_param.test_method == RUN_INFINITELY) { + + if (run_iter_bw_infinitely(&ctx, &user_param)) { + fprintf(stderr, " Error occurred while running infinitely! aborting ...\n"); + return FAILURE; + } + } + + if (user_param.output == FULL_VERBOSITY) { + printf(RESULT_LINE); + } + /* For half duplex tests, server just waits for client to exit */ + if (user_param.machine == CLIENT && !user_param.duplex) { + + if (ctx_hand_shake(&user_comm, &my_dest[0], &rem_dest[0])) { + fprintf(stderr, " Failed to exchange data between server and clients\n"); + return FAILURE; + } + + xchg_bw_reports(&user_comm, &my_bw_rep, &rem_bw_rep, atof(user_param.rem_version)); + } + + if (ctx_close_connection(&user_comm, &my_dest[0], &rem_dest[0])) { + fprintf(stderr, "Failed to close connection between server and client\n"); + return FAILURE; + } + + if (!user_param.is_bw_limit_passed && (user_param.is_limit_bw == ON)) { + fprintf(stderr, "Error: BW result is below bw limit\n"); + return FAILURE; + } + + if (!user_param.is_msgrate_limit_passed && (user_param.is_limit_bw == ON)) { + fprintf(stderr, "Error: Msg rate is below msg_rate limit\n"); + return FAILURE; + } + + return destroy_ctx(&ctx, &user_param); +} diff --git a/usr/benchmarks/ib/atomic_lat.c b/usr/benchmarks/ib/atomic_lat.c new file mode 100755 index 000000000..3d8fe7a6f --- /dev/null +++ b/usr/benchmarks/ib/atomic_lat.c @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2005 Hewlett Packard, Inc (Grant Grundler) + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + + +#include +#include +#include +#include +#if !defined(__FreeBSD__) +#include +#endif + +#include "get_clock.h" +#include "perftest_parameters.h" +#include "perftest_resources.h" +#include "perftest_communication.h" + +/****************************************************************************** + * + ******************************************************************************/ +int main(int argc, char *argv[]) +{ + int ret_parser, i; + struct report_options report; + struct pingpong_context ctx; + struct pingpong_dest *my_dest = NULL; + struct pingpong_dest *rem_dest = NULL; + struct ibv_device *ib_dev; + struct perftest_parameters user_param; + struct perftest_comm user_comm; + + /* init default values to user's parameters */ + memset(&ctx,0,sizeof(struct pingpong_context)); + memset(&user_param, 0, sizeof(struct perftest_parameters)); + memset(&user_comm,0,sizeof(struct perftest_comm)); + + user_param.verb = ATOMIC; + user_param.tst = LAT; + user_param.r_flag = &report; + strncpy(user_param.version, VERSION, sizeof(user_param.version)); + + ret_parser = parser(&user_param,argv,argc); + if (ret_parser) { + if (ret_parser != VERSION_EXIT && ret_parser != HELP_EXIT) + fprintf(stderr," Parser function exited with Error\n"); + return FAILURE; + } + + if(user_param.use_xrc) + user_param.num_of_qps *= 2; + + /* Finding the IB device selected (or defalut if no selected). */ + ib_dev = ctx_find_dev(user_param.ib_devname); + if (!ib_dev) { + fprintf(stderr," Unable to find the Infiniband/RoCE device\n"); + return FAILURE; + } + + /* Getting the relevant context from the device */ + ctx.context = ibv_open_device(ib_dev); + if (!ctx.context) { + fprintf(stderr, " Couldn't get context for the device\n"); + return FAILURE; + } + + /* Verify user parameters that require the device context, + * the function will print the relevent error info. */ + if (verify_params_with_device_context(ctx.context, &user_param)) { + return FAILURE; + } + + #ifdef HAVE_MASKED_ATOMICS + if (check_masked_atomics_support(&ctx)) { + user_param.masked_atomics = 1; + user_param.use_exp = 1; + } + + if (user_param.masked_atomics && (user_param.work_rdma_cm || user_param.use_rdma_cm)) { + fprintf(stderr, "atomic test is not supported with -R/-z flag (rdma_cm) with this device.\n"); + return FAILURE; + } + + #endif + + /* See if MTU and link type are valid and supported. */ + if (check_link(ctx.context,&user_param)) { + fprintf(stderr, " Couldn't get context for the device\n"); + return FAILURE; + } + + /* copy the relevant user parameters to the comm struct + creating rdma_cm resources. */ + if (create_comm_struct(&user_comm,&user_param)) { + fprintf(stderr," Unable to create RDMA_CM resources\n"); + return FAILURE; + } + + if (user_param.output == FULL_VERBOSITY && user_param.machine == SERVER) { + printf("\n************************************\n"); + printf("* Waiting for client to connect... *\n"); + printf("************************************\n"); + } + + /* Initialize the connection and print the local data. */ + if (establish_connection(&user_comm)) { + fprintf(stderr," Unable to init the socket connection\n"); + return FAILURE; + } + + exchange_versions(&user_comm, &user_param); + + check_sys_data(&user_comm, &user_param); + + /* See if MTU and link type are valid and supported. */ + if (check_mtu(ctx.context,&user_param, &user_comm)) { + fprintf(stderr, " Couldn't get context for the device\n"); + return FAILURE; + } + + ALLOCATE(my_dest , struct pingpong_dest , user_param.num_of_qps); + memset(my_dest, 0, sizeof(struct pingpong_dest)*user_param.num_of_qps); + ALLOCATE(rem_dest , struct pingpong_dest , user_param.num_of_qps); + memset(rem_dest, 0, sizeof(struct pingpong_dest)*user_param.num_of_qps); + + /* Allocating arrays needed for the test. */ + alloc_ctx(&ctx,&user_param); + + /* Create (if nessacery) the rdma_cm ids and channel. */ + if (user_param.work_rdma_cm == ON) { + + if (user_param.machine == CLIENT) { + if (retry_rdma_connect(&ctx,&user_param)) { + fprintf(stderr,"Unable to perform rdma_client function\n"); + return FAILURE; + } + + } else { + if (create_rdma_resources(&ctx,&user_param)) { + fprintf(stderr," Unable to create the rdma_resources\n"); + return FAILURE; + } + if (rdma_server_connect(&ctx,&user_param)) { + fprintf(stderr,"Unable to perform rdma_client function\n"); + return FAILURE; + } + } + + } else { + + /* create all the basic IB resources (data buffer, PD, MR, CQ and events channel) */ + if (ctx_init(&ctx,&user_param)) { + fprintf(stderr, " Couldn't create IB resources\n"); + return FAILURE; + } + } + + /* Set up the Connection. */ + if (set_up_connection(&ctx,&user_param,my_dest)) { + fprintf(stderr," Unable to set up socket connection\n"); + return FAILURE; + } + + /* Print basic test information. */ + ctx_print_test_info(&user_param); + + for (i=0; i < user_param.num_of_qps; i++) + ctx_print_pingpong_data(&my_dest[i],&user_comm); + + /* shaking hands and gather the other side info. */ + if (ctx_hand_shake(&user_comm,my_dest,rem_dest)) { + fprintf(stderr,"Failed to exchange data between server and clients\n"); + return FAILURE; + } + + user_comm.rdma_params->side = REMOTE; + for (i=0; i < user_param.num_of_qps; i++) { + + /* shaking hands and gather the other side info. */ + if (ctx_hand_shake(&user_comm,&my_dest[i],&rem_dest[i])) { + fprintf(stderr,"Failed to exchange data between server and clients\n"); + return FAILURE; + } + + ctx_print_pingpong_data(&rem_dest[i],&user_comm); + } + + if (user_param.work_rdma_cm == OFF) { + if (ctx_check_gid_compatibility(&my_dest[0], &rem_dest[0])) { + fprintf(stderr,"\n Found Incompatibility issue with GID types.\n"); + fprintf(stderr," Please Try to use a different IP version.\n\n"); + return FAILURE; + } + } + + if (user_param.work_rdma_cm == OFF) { + if (ctx_connect(&ctx,rem_dest,&user_param,my_dest)) { + fprintf(stderr," Unable to Connect the HCA's through the link\n"); + return FAILURE; + } + } + + /* An additional handshake is required after moving qp to RTR. */ + if (ctx_hand_shake(&user_comm,my_dest,rem_dest)) { + fprintf(stderr,"Failed to exchange data between server and clients\n"); + return FAILURE; + } + + /* Only Client post read request. */ + if (user_param.machine == SERVER) { + if (ctx_close_connection(&user_comm,my_dest,rem_dest)) { + fprintf(stderr,"Failed to close connection between server and client\n"); + return FAILURE; + } + if (user_param.output == FULL_VERBOSITY) { + printf(RESULT_LINE); + } + return 0; + } + + if (user_param.use_event) { + if (ibv_req_notify_cq(ctx.send_cq, 0)) { + fprintf(stderr, "Couldn't request CQ notification\n"); + return FAILURE; + } + } + + ctx_set_send_wqes(&ctx,&user_param,rem_dest); + + if (user_param.output == FULL_VERBOSITY) { + printf(RESULT_LINE); + printf("%s",(user_param.test_type == ITERATIONS) ? RESULT_FMT_LAT : RESULT_FMT_LAT_DUR); + printf((user_param.cpu_util_data.enable ? RESULT_EXT_CPU_UTIL : RESULT_EXT)); + } + if(run_iter_lat(&ctx,&user_param)) + return 17; + + user_param.test_type == ITERATIONS ? print_report_lat(&user_param) : print_report_lat_duration(&user_param); + + if (ctx_close_connection(&user_comm,my_dest,rem_dest)) { + fprintf(stderr,"Failed to close connection between server and client\n"); + return FAILURE; + } + + if (user_param.output == FULL_VERBOSITY) { + printf(RESULT_LINE); + } + + return 0; +} diff --git a/usr/benchmarks/ib/clock_test.c b/usr/benchmarks/ib/clock_test.c new file mode 100644 index 000000000..aa473155a --- /dev/null +++ b/usr/benchmarks/ib/clock_test.c @@ -0,0 +1,24 @@ +#include +#include +#include "get_clock.h" + +int main() +{ + int no_cpu_freq_fail = 0; + double mhz; + mhz = get_cpu_mhz(no_cpu_freq_fail); + cycles_t c1, c2; + + if (!mhz) { + printf("Unable to calibrate cycles. Exiting.\n"); + return 2; + } + + printf("Type CTRL-C to cancel.\n"); + for (;;) { + c1 = get_cycles(); + sleep(1); + c2 = get_cycles(); + printf("1 sec = %g usec\n", (c2 - c1) / mhz); + } +} diff --git a/usr/benchmarks/ib/get_clock.c b/usr/benchmarks/ib/get_clock.c new file mode 100755 index 000000000..91aaf7234 --- /dev/null +++ b/usr/benchmarks/ib/get_clock.c @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + * + * Author: Michael S. Tsirkin + */ + +/* #define DEBUG 1 */ +/* #define DEBUG_DATA 1 */ +/* #define GET_CPU_MHZ_FROM_PROC 1 */ + +/* For gettimeofday */ +#define _BSD_SOURCE +#include + +#include +#include +#include +#include "get_clock.h" + +#ifndef DEBUG +#define DEBUG 0 +#endif +#ifndef DEBUG_DATA +#define DEBUG_DATA 0 +#endif + +#define MEASUREMENTS 200 +#define USECSTEP 10 +#define USECSTART 100 + +/* + Use linear regression to calculate cycles per microsecond. +http://en.wikipedia.org/wiki/Linear_regression#Parameter_estimation +*/ +static double sample_get_cpu_mhz(void) +{ + struct timeval tv1, tv2; + cycles_t start; + double sx = 0, sy = 0, sxx = 0, syy = 0, sxy = 0; + double tx, ty; + int i; + + /* Regression: y = a + b x */ + long x[MEASUREMENTS]; + cycles_t y[MEASUREMENTS]; + double a; /* system call overhead in cycles */ + double b; /* cycles per microsecond */ + double r_2; + + for (i = 0; i < MEASUREMENTS; ++i) { + start = get_cycles(); + + if (gettimeofday(&tv1, NULL)) { + fprintf(stderr, "gettimeofday failed.\n"); + return 0; + } + + do { + if (gettimeofday(&tv2, NULL)) { + fprintf(stderr, "gettimeofday failed.\n"); + return 0; + } + } while ((tv2.tv_sec - tv1.tv_sec) * 1000000 + + (tv2.tv_usec - tv1.tv_usec) < USECSTART + i * USECSTEP); + + x[i] = (tv2.tv_sec - tv1.tv_sec) * 1000000 + + tv2.tv_usec - tv1.tv_usec; + y[i] = get_cycles() - start; + if (DEBUG_DATA) + fprintf(stderr, "x=%ld y=%Ld\n", x[i], (long long)y[i]); + } + + for (i = 0; i < MEASUREMENTS; ++i) { + tx = x[i]; + ty = y[i]; + sx += tx; + sy += ty; + sxx += tx * tx; + syy += ty * ty; + sxy += tx * ty; + } + + b = (MEASUREMENTS * sxy - sx * sy) / (MEASUREMENTS * sxx - sx * sx); + a = (sy - b * sx) / MEASUREMENTS; + + if (DEBUG) + fprintf(stderr, "a = %g\n", a); + if (DEBUG) + fprintf(stderr, "b = %g\n", b); + if (DEBUG) + fprintf(stderr, "a / b = %g\n", a / b); + r_2 = (MEASUREMENTS * sxy - sx * sy) * (MEASUREMENTS * sxy - sx * sy) / + (MEASUREMENTS * sxx - sx * sx) / + (MEASUREMENTS * syy - sy * sy); + + if (DEBUG) + fprintf(stderr, "r^2 = %g\n", r_2); + if (r_2 < 0.9) { + fprintf(stderr,"Correlation coefficient r^2: %g < 0.9\n", r_2); + return 0; + } + + return b; +} + +#if !defined(__s390x__) && !defined(__s390__) +static double proc_get_cpu_mhz(int no_cpu_freq_warn) +{ + FILE* f; + char buf[256]; + double mhz = 0.0; + int print_flag = 0; + double delta; + + #if defined(__FreeBSD__) + f = popen("/sbin/sysctl hw.clockrate","r"); + #else + f = fopen("/proc/cpuinfo","r"); + #endif + + if (!f) + return 0.0; + while(fgets(buf, sizeof(buf), f)) { + double m; + int rc; + + #if defined (__ia64__) + /* Use the ITC frequency on IA64 */ + rc = sscanf(buf, "itc MHz : %lf", &m); + #elif defined (__PPC__) || defined (__PPC64__) + /* PPC has a different format as well */ + rc = sscanf(buf, "clock : %lf", &m); + #elif defined (__sparc__) && defined (__arch64__) + /* + * on sparc the /proc/cpuinfo lines that hold + * the cpu freq in HZ are as follow: + * Cpu{cpu-num}ClkTck : 00000000a9beeee4 + */ + char *s; + + s = strstr(buf, "ClkTck\t: "); + if (!s) + continue; + s += (strlen("ClkTck\t: ") - strlen("0x")); + strncpy(s, "0x", strlen("0x")); + rc = sscanf(s, "%lf", &m); + m /= 1000000; + #else + #if defined (__FreeBSD__) + rc = sscanf(buf, "hw.clockrate: %lf", &m); + #else + rc = sscanf(buf, "cpu MHz : %lf", &m); + #endif + #endif + + if (rc != 1) + continue; + + if (mhz == 0.0) { + mhz = m; + continue; + } + delta = mhz > m ? mhz - m : m - mhz; + if ((delta / mhz > 0.02) && (print_flag ==0)) { + print_flag = 1; + if (!no_cpu_freq_warn) { + fprintf(stderr, "Conflicting CPU frequency values" + " detected: %lf != %lf. CPU Frequency is not max.\n", mhz, m); + } + continue; + } + } + +#if defined(__FreeBSD__) + pclose(f); +#else + fclose(f); +#endif + return mhz; +} +#endif + +double get_cpu_mhz(int no_cpu_freq_warn) +{ + #if defined(__s390x__) || defined(__s390__) + return sample_get_cpu_mhz(); + #else + double sample, proc, delta; + sample = sample_get_cpu_mhz(); + proc = proc_get_cpu_mhz(no_cpu_freq_warn); + #ifdef __aarch64__ + if (proc < 1) + proc = sample; + #endif + if (!proc || !sample) + return 0; + + delta = proc > sample ? proc - sample : sample - proc; + if (delta / proc > 0.02) { + return sample; + } + return proc; +#endif +} diff --git a/usr/benchmarks/ib/get_clock.h b/usr/benchmarks/ib/get_clock.h new file mode 100755 index 000000000..dacbcd00d --- /dev/null +++ b/usr/benchmarks/ib/get_clock.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + * + * Author: Michael S. Tsirkin + */ + +#ifndef GET_CLOCK_H +#define GET_CLOCK_H + +#if defined (__x86_64__) || defined(__i386__) +/* Note: only x86 CPUs which have rdtsc instruction are supported. */ +typedef unsigned long long cycles_t; +static inline cycles_t get_cycles() +{ + unsigned low, high; + unsigned long long val; + asm volatile ("rdtsc" : "=a" (low), "=d" (high)); + val = high; + val = (val << 32) | low; + return val; +} +#elif defined(__PPC__) || defined(__PPC64__) +/* Note: only PPC CPUs which have mftb instruction are supported. */ +/* PPC64 has mftb */ +typedef unsigned long cycles_t; +static inline cycles_t get_cycles() +{ + cycles_t ret; + + __asm__ __volatile__ ("\n\t isync" "\n\t mftb %0" : "=r"(ret)); + return ret; +} +#elif defined(__ia64__) +/* Itanium2 and up has ar.itc (Itanium1 has errata) */ +typedef unsigned long cycles_t; +static inline cycles_t get_cycles() +{ + cycles_t ret; + + asm volatile ("mov %0=ar.itc" : "=r" (ret)); + return ret; +} +#elif defined(__ARM_ARCH_7A__) +typedef unsigned long long cycles_t; +static inline cycles_t get_cycles(void) +{ + cycles_t clk; + asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (clk)); + return clk; +} +#elif defined(__s390x__) || defined(__s390__) +typedef unsigned long long cycles_t; +static inline cycles_t get_cycles(void) +{ + cycles_t clk; + asm volatile("stck %0" : "=Q" (clk) : : "cc"); + return clk >> 2; +} +#elif defined(__sparc__) && defined(__arch64__) +typedef unsigned long long cycles_t; +static inline cycles_t get_cycles(void) +{ + cycles_t v; + asm volatile ("rd %%tick, %0" : "=r" (v) : ); + return v; +} +#elif defined(__aarch64__) + +typedef unsigned long cycles_t; +static inline cycles_t get_cycles() +{ + cycles_t cval; + asm volatile("isb" : : : "memory"); + asm volatile("mrs %0, cntvct_el0" : "=r" (cval)); + return cval; +} + +#else +#warning get_cycles not implemented for this architecture: attempt asm/timex.h +#include +#endif + +extern double get_cpu_mhz(int); + +#endif diff --git a/usr/benchmarks/ib/multicast_resources.c b/usr/benchmarks/ib/multicast_resources.c new file mode 100755 index 000000000..3a8edbaba --- /dev/null +++ b/usr/benchmarks/ib/multicast_resources.c @@ -0,0 +1,266 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if !defined(__FreeBSD__) +#include +#endif +#include +#include +#if defined(__FreeBSD__) +#include +#else +#include +#endif +#include +#include +#include "multicast_resources.h" + +/* This is when we get sig handler from the user before we remove the join request. */ +struct mcast_parameters *sighandler_params; + +/****************************************************************************** + * signalCatcher - cacth user signal in order to reregiser the mcast group + ******************************************************************************/ +static void signalCatcher (int sig) +{ + if (sig == SIGINT) { + + if (join_multicast_group(SUBN_ADM_METHOD_DELETE,sighandler_params)) + fprintf(stderr,"Couldn't Unregister the Mcast group on the SM\n"); + + if (sighandler_params->is_2nd_mgid_used) { + memcpy(sighandler_params->mgid.raw,sighandler_params->base_mgid.raw,16); + if (join_multicast_group(SUBN_ADM_METHOD_DELETE,sighandler_params)) + fprintf(stderr,"Couldn't Unregister the Base Mcast group on the SM\n"); + } + } + exit(1); +} + +/****************************************************************************** + * prepare_mcast_mad + ******************************************************************************/ +static void prepare_mcast_mad(uint8_t method, + struct mcast_parameters *params, + struct sa_mad_packet_t *samad_packet) +{ + uint8_t *ptr; + uint64_t comp_mask; + + memset(samad_packet,0,sizeof(*samad_packet)); + + /* prepare the MAD header. according to Table 145 in IB spec 1.2.1 */ + ptr = samad_packet->mad_header_buf; + ptr[0] = 0x01; /* BaseVersion */ + ptr[1] = MANAGMENT_CLASS_SUBN_ADM; /* MgmtClass */ + ptr[2] = 0x02; /* ClassVersion */ + ptr[3] = INSERTF(ptr[3], 0, method, 0, 7); /* Method */ + (*(uint64_t *)(ptr + 8)) = htonll((uint64_t)DEF_TRANS_ID); /* TransactionID */ + (*(uint16_t *)(ptr + 16)) = htons(SUBN_ADM_ATTR_MC_MEMBER_RECORD); /* AttributeID */ + + ptr = samad_packet->SubnetAdminData; + + memcpy(&ptr[0],params->mgid.raw, 16); + memcpy(&ptr[16],params->port_gid.raw, 16); + + (*(uint32_t *)(ptr + 32)) = htonl(DEF_QKEY); + (*(uint16_t *)(ptr + 40)) = htons(params->pkey); + ptr[39] = DEF_TCLASS; + ptr[44] = INSERTF(ptr[44], 4, DEF_SLL, 0, 4); + ptr[44] = INSERTF(ptr[44], 0, DEF_FLOW_LABLE, 16, 4); + ptr[45] = INSERTF(ptr[45], 0, DEF_FLOW_LABLE, 8, 8); + ptr[46] = INSERTF(ptr[46], 0, DEF_FLOW_LABLE, 0, 8); + ptr[48] = INSERTF(ptr[48], 0, MCMEMBER_JOINSTATE_FULL_MEMBER, 0, 4); + + comp_mask = SUBN_ADM_COMPMASK_MGID | SUBN_ADM_COMPMASK_PORT_GID | SUBN_ADM_COMPMASK_Q_KEY | + SUBN_ADM_COMPMASK_P_KEY | SUBN_ADM_COMPMASK_TCLASS | SUBN_ADM_COMPMASK_SL | + SUBN_ADM_COMPMASK_FLOW_LABEL | SUBN_ADM_COMPMASK_JOIN_STATE; + + samad_packet->ComponentMask = htonll(comp_mask); +} + +/****************************************************************************** + * check_mad_status + ******************************************************************************/ +static int check_mad_status(struct sa_mad_packet_t *samad_packet) +{ + uint8_t *ptr; + uint32_t user_trans_id; + uint16_t mad_header_status; + + ptr = samad_packet->mad_header_buf; + + /* the upper 32 bits of TransactionID were set by the kernel */ + user_trans_id = ntohl(*(uint32_t *)(ptr + 12)); + + /* check the TransactionID to make sure this is the response */ + /* for the join/leave multicast group request we posted */ + if (user_trans_id != DEF_TRANS_ID) { + fprintf(stderr, "received a mad with TransactionID 0x%x, when expecting 0x%x\n", + (unsigned int)user_trans_id, (unsigned int)DEF_TRANS_ID);; + return 1; + } + + mad_header_status = 0x0; + mad_header_status = INSERTF(mad_header_status, 8, ptr[4], 0, 7); + mad_header_status = INSERTF(mad_header_status, 0, ptr[5], 0, 8); + + if (mad_header_status) { + fprintf(stderr,"received UMAD with an error: 0x%x\n", mad_header_status); + return 1; + } + + return 0; +} + + +/****************************************************************************** + * get_mlid_from_mad + ******************************************************************************/ +static void get_mlid_from_mad(struct sa_mad_packet_t *samad_packet,uint16_t *mlid) +{ + uint8_t *ptr; + ptr = samad_packet->SubnetAdminData; + *mlid = ntohs(*(uint16_t *)(ptr + 36)); +} + +/****************************************************************************** + * set_multicast_gid + ******************************************************************************/ +void set_multicast_gid(struct mcast_parameters *params,uint32_t qp_num,int is_client) +{ + uint8_t mcg_gid[16] = MCG_GID; + const char *pstr = params->user_mgid; + char *term = NULL; + char tmp[20]; + int i; + + if (params->user_mgid) { + term = strpbrk(pstr, ":"); + memcpy(tmp, pstr, term - pstr+1); + tmp[term - pstr] = 0; + + mcg_gid[0] = (unsigned char)strtoll(tmp, NULL, 0); + + for (i = 1; i < 15; ++i) { + pstr += term - pstr + 1; + term = strpbrk(pstr, ":"); + memcpy(tmp, pstr, term - pstr+1); + tmp[term - pstr] = 0; + + mcg_gid[i] = (unsigned char)strtoll(tmp, NULL, 0); + } + pstr += term - pstr + 1; + + strcpy(tmp, pstr); + mcg_gid[15] = (unsigned char)strtoll(tmp, NULL, 0); + } + + memcpy(params->mgid.raw,mcg_gid,16); + if (is_client && params->user_mgid==NULL) + params->mgid.raw[15]++; +} + +/****************************************************************************** + * join_multicast_group + ******************************************************************************/ +int join_multicast_group(subn_adm_method method,struct mcast_parameters *params) +{ + int portid = -1; + int agentid = -1; + void *umad_buff = NULL; + void *mad = NULL; + int length = MAD_SIZE; + int test_result = 0; + + /* mlid will be assigned to the new LID after the join */ + if (umad_init() < 0) { + fprintf(stderr, "failed to init the UMAD library\n"); + goto cleanup; + } + /* use casting to loose the "const char0 *" */ + portid = umad_open_port((char*)params->ib_devname,params->ib_port); + if (portid < 0) { + fprintf(stderr,"failed to open UMAD port %d\n",params->ib_port); + goto cleanup; + } + + agentid = umad_register(portid,MANAGMENT_CLASS_SUBN_ADM, 2, 0, 0); + if (agentid < 0) { + fprintf(stderr,"failed to register UMAD agent for MADs\n"); + goto cleanup; + } + + umad_buff = umad_alloc(1, umad_size() + MAD_SIZE); + if (!umad_buff) { + fprintf(stderr, "failed to allocate MAD buffer\n"); + goto cleanup; + } + + mad = umad_get_mad(umad_buff); + prepare_mcast_mad(method,params,(struct sa_mad_packet_t *)mad); + + if (umad_set_addr(umad_buff,params->sm_lid,1,params->sm_sl,QP1_WELL_KNOWN_Q_KEY) < 0) { + fprintf(stderr, "failed to set the destination address of the SMP\n"); + goto cleanup; + } + + if (umad_send(portid,agentid,umad_buff,MAD_SIZE,100,5) < 0) { + fprintf(stderr, "failed to send MAD\n"); + goto cleanup; + } + + if (umad_recv(portid,umad_buff,&length,5000) < 0) { + fprintf(stderr, "failed to receive MAD response\n"); + goto cleanup; + } + + if (check_mad_status((struct sa_mad_packet_t*)mad)) { + fprintf(stderr, "failed to get mlid from MAD\n"); + goto cleanup; + } + + /* "Join multicast group" message was sent */ + if (method == SUBN_ADM_METHOD_SET) { + get_mlid_from_mad((struct sa_mad_packet_t*)mad,¶ms->mlid); + params->mcast_state |= MCAST_IS_JOINED; + if (params->is_2nd_mgid_used == 0) { + sighandler_params = params; + signal(SIGINT,signalCatcher); + } + } else { + params->mcast_state &= ~MCAST_IS_JOINED; + } + +cleanup: + if (umad_buff) + umad_free(umad_buff); + + if (portid >= 0) { + if (agentid >= 0) { + if (umad_unregister(portid, agentid)) { + fprintf(stderr, "failed to deregister UMAD agent for MADs\n"); + test_result = 1; + } + } + + if (umad_close_port(portid)) { + fprintf(stderr, "failed to close UMAD portid\n"); + test_result = 1; + } + } + + return test_result; +} + +/****************************************************************************** + * End + ******************************************************************************/ diff --git a/usr/benchmarks/ib/multicast_resources.h b/usr/benchmarks/ib/multicast_resources.h new file mode 100644 index 000000000..1e879b97d --- /dev/null +++ b/usr/benchmarks/ib/multicast_resources.h @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Author: Ido Shamay + */ + +#ifndef MULTICAST_RESOURCES_H +#define MULTICAST_RESOURCES_H + +/* Multicast Module for perftest. + * + * Description : + * + * This file contains the structures and methods for implementing a multiple + * multicast groups in user space enviroment. + * The module is in use in "send_bw" and "send_lat" ,but can be used on other + * applications and can generate more methods and serve more benchmarks. + * The Module uses only the structire defined here , enabling generic use of it. + * + * Defined Types : + * + * mcast_parameters - Contains all the parameters needed for this module. + * mcast_group - The multicast group entitiy itself. + * mcg_qp - Is a QP structure that is attahced to the group. + * + */ + + +/************************************************************************ + * Macros , Defines and Files included for work. * + ************************************************************************/ + +#include +#include +#include "get_clock.h" + +#define QPNUM_MCAST 0xffffff +#define DEF_QKEY 0x11111111 +#define DEF_PKEY_IDX 0 +#define DEF_SLL 0 +#define MAX_POLL_ITERATION_TIMEOUT 1000000 +#define MCG_GID {255,1,0,0,0,2,201,133,0,0,0,0,0,0,0,0} + +/* Definitions section for MADs */ +#define SUBN_ADM_ATTR_MC_MEMBER_RECORD 0x38 +#define MANAGMENT_CLASS_SUBN_ADM 0x03 /* Subnet Administration class */ +#define MCMEMBER_JOINSTATE_FULL_MEMBER 0x1 +#define MAD_SIZE 256 /* The size of a MAD is 256 bytes */ +#define QP1_WELL_KNOWN_Q_KEY 0x80010000 /* Q_Key value of QP1 */ +#define DEF_TRANS_ID 0x12345678 /* TransactionID */ +#define DEF_TCLASS 0 +#define DEF_FLOW_LABLE 0 + +/* Macro for 64 bit variables to switch to from net */ +#ifndef ntohll +#define ntohll(x) (((uint64_t)(ntohl((int)((x << 32) >> 32))) << 32) | (unsigned int)ntohl(((int)(x >> 32)))) +#endif +#ifndef htonll +#define htonll(x) ntohll(x) +#endif + +/* generate a bit mask S bits width */ +#define MASK32(S) ( ((uint32_t) ~0L) >> (32-(S)) ) + +/* generate a bit mask with bits O+S..O set (assumes 32 bit integer). */ +#define BITS32(O,S) ( MASK32(S) << (O) ) + +/* extract S bits from (u_int32_t)W with offset O and shifts them O places to the right */ +#define EXTRACT32(W,O,S) ( ((W)>>(O)) & MASK32(S) ) + +/* insert S bits with offset O from field F into word W (u_int32_t) */ +#define INSERT32(W,F,O,S) (/*(W)=*/ ( ((W) & (~BITS32(O,S)) ) | (((F) & MASK32(S))<<(O)) )) + +#ifndef INSERTF +#define INSERTF(W,O1,F,O2,S) (INSERT32(W, EXTRACT32(F, O2, S), O1, S) ) +#endif + + +/* according to Table 187 in the IB spec 1.2.1 */ +typedef enum { + SUBN_ADM_METHOD_SET = 0x2, + SUBN_ADM_METHOD_DELETE = 0x15 +} subn_adm_method; + +/* Utilities for Umad Usage. */ +typedef enum { + SUBN_ADM_COMPMASK_MGID = (1ULL << 0), + SUBN_ADM_COMPMASK_PORT_GID = (1ULL << 1), + SUBN_ADM_COMPMASK_Q_KEY = (1ULL << 2), + SUBN_ADM_COMPMASK_P_KEY = (1ULL << 7), + SUBN_ADM_COMPMASK_TCLASS = (1ULL << 6), + SUBN_ADM_COMPMASK_SL = (1ULL << 12), + SUBN_ADM_COMPMASK_FLOW_LABEL = (1ULL << 13), + SUBN_ADM_COMPMASK_JOIN_STATE = (1ULL << 16), +} subn_adm_component_mask; + +typedef enum { + MCAST_IS_JOINED = 1, + MCAST_IS_ATTACHED = (1 << 1) +} mcast_state; + + +/************************************************************************ + * Multicast data structures. * + ************************************************************************/ + +/* Needed parameters for creating a multiple multicast group entity. */ +struct mcast_parameters { + int num_qps_on_group; + int is_user_mgid; + int mcast_state; + int ib_port; + uint16_t mlid; + uint16_t base_mlid; + const char *user_mgid; + char *ib_devname; + uint16_t pkey; + uint16_t sm_lid; + uint8_t sm_sl; + union ibv_gid port_gid; + union ibv_gid mgid; + /* In case it's a latency test. */ + union ibv_gid base_mgid; + int is_2nd_mgid_used; +}; + +/* according to Table 195 in the IB spec 1.2.1 */ + +struct sa_mad_packet_t { + u_int8_t mad_header_buf[24]; + u_int8_t rmpp_header_buf[12]; + u_int64_t SM_Key; + u_int16_t AttributeOffset; + u_int16_t Reserved1; + u_int64_t ComponentMask; + u_int8_t SubnetAdminData[200]; +}__attribute__((packed)); + +/************************************************************************ + * Multicast resources methods. * + ************************************************************************/ + +/* set_multicast_gid . + * + * Description : + * + * Sets the Multicast GID , and stores it in the "mgid" value of + * mcast resourcs. If the user requested for a specific MGID, which + * is stored in params->user_mgid (in this case params->is_user_mgid should be 1) + * than it will be his MGID, if not the library choose a default one. + * + * Parameters : + * + * params - The parameters of the machine + * my_dest ,rem_dest - The 2 sides that ends the connection. + * + * Return Value : 0 upon success. -1 if it fails. + */ +void set_multicast_gid(struct mcast_parameters *params,uint32_t qp_num,int is_client); + + +/* ctx_close_connection . + * + * Description : + * + * Close the connection between the 2 machines. + * It performs an handshake to ensure the 2 sides are there. + * + * Parameters : + * + * params - The parameters of the machine + * my_dest ,rem_dest - The 2 sides that ends the connection. + * + * Return Value : 0 upon success. -1 if it fails. + */ +int join_multicast_group(subn_adm_method method,struct mcast_parameters *params); + + +#endif /* MULTICAST_RESOURCES_H */ diff --git a/usr/benchmarks/ib/perftest_communication.c b/usr/benchmarks/ib/perftest_communication.c new file mode 100755 index 000000000..b4fb4de74 --- /dev/null +++ b/usr/benchmarks/ib/perftest_communication.c @@ -0,0 +1,1683 @@ +#include +#include +#include +#include +#include +#include +#include +/* #include */ +#include +/* #include */ +#include +#include +#include +#include + +#include "perftest_communication.h" + + +static const char *sideArray[] = {"local", "remote"}; +static const char *gidArray[] = {"GID" , "MGID"}; + +static inline int valid_mtu_size(int mtu_size) +{ + return !(mtu_size < IBV_MTU_256 || mtu_size > IBV_MTU_4096); +} + +static inline int ipv6_addr_v4mapped(const struct in6_addr *a) +{ + return ((a->s6_addr32[0] | a->s6_addr32[1]) | + (a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0UL || + /* IPv4 encoded multicast addresses */ + (a->s6_addr32[0] == htonl(0xff0e0000) && + ((a->s6_addr32[1] | + (a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0UL)); +} + + +/****************************************************************************** + * + ******************************************************************************/ + +double bswap_double(double x) +{ + union { + double ddata; + uint64_t u64data; + } d1, d2; + + d1.ddata = x; + d2.u64data = bswap_64(d1.u64data); + return d2.ddata; +} + +/****************************************************************************** + * + ******************************************************************************/ + + +static int post_one_recv_wqe(struct pingpong_context *ctx) +{ + struct ibv_recv_wr wr; + struct ibv_recv_wr *bad_wr; + struct ibv_sge list; + + list.addr = (uintptr_t)ctx->buf[0]; + list.length = sizeof(struct pingpong_dest); + list.lkey = ctx->mr[0]->lkey; + + wr.next = NULL; + wr.wr_id = SYNC_SPEC_ID; + wr.sg_list = &list; + wr.num_sge = 1; + + if (ibv_post_recv(ctx->qp[0],&wr,&bad_wr)) { + fprintf(stderr, "Function ibv_post_recv failed for RDMA_CM QP\n"); + return FAILURE; + } + + return SUCCESS; +} + +/****************************************************************************** + * + ******************************************************************************/ +static int post_recv_to_get_ah(struct pingpong_context *ctx) +{ + struct ibv_recv_wr wr; + struct ibv_recv_wr *bad_wr; + struct ibv_sge list; + + list.addr = (uintptr_t)ctx->buf[0]; + list.length = UD_ADDITION + sizeof(uint32_t); + list.lkey = ctx->mr[0]->lkey; + + wr.next = NULL; + wr.wr_id = 0; + wr.sg_list = &list; + wr.num_sge = 1; + + if (ibv_post_recv(ctx->qp[0],&wr,&bad_wr)) { + fprintf(stderr, "Function ibv_post_recv failed for RDMA_CM QP\n"); + return FAILURE; + } + + return SUCCESS; + +} + +/****************************************************************************** + * + ******************************************************************************/ +static int send_qp_num_for_ah(struct pingpong_context *ctx, + struct perftest_parameters *user_param) +{ + struct ibv_send_wr wr; + struct ibv_send_wr *bad_wr; + struct ibv_sge list; + struct ibv_wc wc; + int ne; + + memcpy(ctx->buf[0], &ctx->qp[0]->qp_num, sizeof(uint32_t)); + + list.addr = (uintptr_t)ctx->buf[0]; + list.length = sizeof(uint32_t); + list.lkey = ctx->mr[0]->lkey; + + wr.wr_id = 0; + wr.sg_list = &list; + wr.num_sge = 1; + wr.opcode = IBV_WR_SEND_WITH_IMM; + wr.send_flags = IBV_SEND_SIGNALED; + wr.next = NULL; + wr.imm_data = htonl(ctx->qp[0]->qp_num); + + wr.wr.ud.ah = ctx->ah[0]; + wr.wr.ud.remote_qpn = user_param->rem_ud_qpn; + wr.wr.ud.remote_qkey = user_param->rem_ud_qkey; + + + if (ibv_post_send(ctx->qp[0],&wr,&bad_wr)) { + fprintf(stderr, "Function ibv_post_send failed\n"); + return 1; + } + + do { + ne = ibv_poll_cq(ctx->send_cq, 1,&wc); + } while (ne == 0); + + if (wc.status || wc.opcode != IBV_WC_SEND || wc.wr_id != 0) { + fprintf(stderr, " Couldn't post send my QP number %d\n",(int)wc.status); + return 1; + } + + return 0; + +} + +/****************************************************************************** + * + ******************************************************************************/ +static int create_ah_from_wc_recv(struct pingpong_context *ctx, + struct perftest_parameters *user_param) +{ + struct ibv_qp_attr attr; + struct ibv_qp_init_attr init_attr; + struct ibv_wc wc; + int ne; + + do { + ne = ibv_poll_cq(ctx->recv_cq,1,&wc); + } while (ne == 0); + + if (wc.status || !(wc.opcode & IBV_WC_RECV) || wc.wr_id != 0) { + fprintf(stderr, "Bad wc status when trying to create AH -- %d -- %d \n",(int)wc.status,(int)wc.wr_id); + return 1; + } + + ctx->ah[0] = ibv_create_ah_from_wc(ctx->pd, &wc, (struct ibv_grh*)ctx->buf[0], ctx->cm_id->port_num); + user_param->rem_ud_qpn = ntohl(wc.imm_data); + ibv_query_qp(ctx->qp[0],&attr, IBV_QP_QKEY,&init_attr); + user_param->rem_ud_qkey = attr.qkey; + + return 0; +} + + +/****************************************************************************** + * + ******************************************************************************/ +static int ethernet_write_keys(struct pingpong_dest *my_dest, + struct perftest_comm *comm) +{ + if (my_dest->gid_index == -1) { + + char msg[KEY_MSG_SIZE]; + + sprintf(msg,KEY_PRINT_FMT,my_dest->lid,my_dest->out_reads, + my_dest->qpn,my_dest->psn, my_dest->rkey, my_dest->vaddr, my_dest->srqn); + + if (write(comm->rdma_params->sockfd,msg,sizeof msg) != sizeof msg) { + perror("client write"); + fprintf(stderr, "Couldn't send local address\n"); + return 1; + } + + } else { + char msg[KEY_MSG_SIZE_GID]; + sprintf(msg,KEY_PRINT_FMT_GID, my_dest->lid,my_dest->out_reads, + my_dest->qpn,my_dest->psn, my_dest->rkey, my_dest->vaddr, + my_dest->gid.raw[0],my_dest->gid.raw[1], + my_dest->gid.raw[2],my_dest->gid.raw[3], + my_dest->gid.raw[4],my_dest->gid.raw[5], + my_dest->gid.raw[6],my_dest->gid.raw[7], + my_dest->gid.raw[8],my_dest->gid.raw[9], + my_dest->gid.raw[10],my_dest->gid.raw[11], + my_dest->gid.raw[12],my_dest->gid.raw[13], + my_dest->gid.raw[14],my_dest->gid.raw[15], + my_dest->srqn); + + if (write(comm->rdma_params->sockfd, msg, sizeof msg) != sizeof msg) { + perror("client write"); + fprintf(stderr, "Couldn't send local address\n"); + return 1; + } + + } + + return 0; +} + +/****************************************************************************** + * + ******************************************************************************/ +static int ethernet_read_keys(struct pingpong_dest *rem_dest, + struct perftest_comm *comm) +{ + if (rem_dest->gid_index == -1) { + + int parsed; + char msg[KEY_MSG_SIZE]; + + if (read(comm->rdma_params->sockfd, msg, sizeof msg) != sizeof msg) { + fprintf(stderr, "ethernet_read_keys: Couldn't read remote address\n"); + return 1; + } + + parsed = sscanf(msg,KEY_PRINT_FMT,(unsigned int*)&rem_dest->lid, + &rem_dest->out_reads,&rem_dest->qpn, + &rem_dest->psn, &rem_dest->rkey,&rem_dest->vaddr,&rem_dest->srqn); + + if (parsed != 7) { + fprintf(stderr, "Couldn't parse line <%.*s>\n",(int)sizeof msg, msg); + return 1; + } + + } else { + + char msg[KEY_MSG_SIZE_GID]; + char *pstr = msg, *term; + char tmp[120]; + int i; + + if (read(comm->rdma_params->sockfd, msg, sizeof msg) != sizeof msg) { + fprintf(stderr, "ethernet_read_keys: Couldn't read remote address\n"); + return 1; + } + + term = strpbrk(pstr, ":"); + memcpy(tmp, pstr, term - pstr); + tmp[term - pstr] = 0; + rem_dest->lid = (int)strtol(tmp, NULL, 16); /*LID*/ + + pstr += term - pstr + 1; + term = strpbrk(pstr, ":"); + memcpy(tmp, pstr, term - pstr); + tmp[term - pstr] = 0; + rem_dest->out_reads = (int)strtol(tmp, NULL, 16); /*OUT_READS*/ + + pstr += term - pstr + 1; + term = strpbrk(pstr, ":"); + memcpy(tmp, pstr, term - pstr); + tmp[term - pstr] = 0; + rem_dest->qpn = (int)strtol(tmp, NULL, 16); /*QPN*/ + + pstr += term - pstr + 1; + term = strpbrk(pstr, ":"); + memcpy(tmp, pstr, term - pstr); + tmp[term - pstr] = 0; + rem_dest->psn = (int)strtol(tmp, NULL, 16); /*PSN*/ + + pstr += term - pstr + 1; + term = strpbrk(pstr, ":"); + memcpy(tmp, pstr, term - pstr); + tmp[term - pstr] = 0; + rem_dest->rkey = (unsigned)strtoul(tmp, NULL, 16); /*RKEY*/ + + pstr += term - pstr + 1; + term = strpbrk(pstr, ":"); + memcpy(tmp, pstr, term - pstr); + tmp[term - pstr] = 0; + + rem_dest->vaddr = strtoull(tmp, NULL, 16); /*VA*/ + + for (i = 0; i < 15; ++i) { + pstr += term - pstr + 1; + term = strpbrk(pstr, ":"); + memcpy(tmp, pstr, term - pstr); + tmp[term - pstr] = 0; + + rem_dest->gid.raw[i] = (unsigned char)strtoll(tmp, NULL, 16); + } + + pstr += term - pstr + 1; + + strcpy(tmp, pstr); + rem_dest->gid.raw[15] = (unsigned char)strtoll(tmp, NULL, 16); + + + pstr += term - pstr + 4; + + term = strpbrk(pstr, ":"); + memcpy(tmp, pstr, term - pstr); + tmp[term - pstr] = 0; + rem_dest->srqn = (unsigned)strtoul(tmp, NULL, 16); /*SRQN*/ + + } + return 0; +} + +/****************************************************************************** + * + ******************************************************************************/ +static int rdma_write_keys(struct pingpong_dest *my_dest, + struct perftest_comm *comm) +{ + struct ibv_send_wr wr; + struct ibv_send_wr *bad_wr; + struct ibv_sge list; + struct ibv_wc wc; + int ne; + + #ifdef HAVE_ENDIAN + int i; + struct pingpong_dest m_my_dest; + + m_my_dest.lid = htobe32(my_dest->lid); + m_my_dest.out_reads = htobe32(my_dest->out_reads); + m_my_dest.qpn = htobe32(my_dest->qpn); + m_my_dest.psn = htobe32(my_dest->psn); + m_my_dest.rkey = htobe32(my_dest->rkey); + m_my_dest.srqn = htobe32(my_dest->srqn); + m_my_dest.gid_index = htobe32(my_dest->gid_index); + m_my_dest.vaddr = htobe64(my_dest->vaddr); + + for(i=0; i<16; i++) { + m_my_dest.gid.raw[i] = my_dest->gid.raw[i]; + } + + memcpy(comm->rdma_ctx->buf[0], &m_my_dest, sizeof(struct pingpong_dest)); + #else + memcpy(comm->rdma_ctx->buf[0], &my_dest, sizeof(struct pingpong_dest)); + #endif + list.addr = (uintptr_t)comm->rdma_ctx->buf[0]; + list.length = sizeof(struct pingpong_dest); + list.lkey = comm->rdma_ctx->mr[0]->lkey; + + + wr.wr_id = SYNC_SPEC_ID; + wr.sg_list = &list; + wr.num_sge = 1; + wr.opcode = IBV_WR_SEND; + wr.send_flags = IBV_SEND_SIGNALED; + wr.next = NULL; + + if (ibv_post_send(comm->rdma_ctx->qp[0],&wr,&bad_wr)) { + fprintf(stderr, "Function ibv_post_send failed\n"); + return 1; + } + + do { + ne = ibv_poll_cq(comm->rdma_ctx->send_cq, 1,&wc); + } while (ne == 0); + + if (wc.status || wc.opcode != IBV_WC_SEND || wc.wr_id != SYNC_SPEC_ID) { + fprintf(stderr, " Bad wc status %d\n",(int)wc.status); + return 1; + } + + return 0; +} + +/****************************************************************************** + * + ******************************************************************************/ +static int rdma_read_keys(struct pingpong_dest *rem_dest, + struct perftest_comm *comm) +{ + #ifdef HAVE_ENDIAN + struct pingpong_dest a_rem_dest; + #endif + struct ibv_wc wc; + int ne; + + do { + ne = ibv_poll_cq(comm->rdma_ctx->recv_cq,1,&wc); + } while (ne == 0); + + if (wc.status || !(wc.opcode & IBV_WC_RECV) || wc.wr_id != SYNC_SPEC_ID) { + fprintf(stderr, "Bad wc status -- %d -- %d \n",(int)wc.status,(int)wc.wr_id); + return 1; + } + + #ifdef HAVE_ENDIAN + memcpy(&a_rem_dest,comm->rdma_ctx->buf[0],sizeof(struct pingpong_dest)); + rem_dest->lid = ntohl(a_rem_dest.lid); + rem_dest->out_reads = ntohl(a_rem_dest.out_reads); + rem_dest->qpn = ntohl(a_rem_dest.qpn); + rem_dest->psn = ntohl(a_rem_dest.psn); + rem_dest->rkey = ntohl(a_rem_dest.rkey); + + rem_dest->vaddr = be64toh(a_rem_dest.vaddr); + memcpy(rem_dest->gid.raw, &(a_rem_dest.gid), 16*sizeof(uint8_t)); + #else + memcpy(&rem_dest,comm->rdma_ctx->buf[0],sizeof(struct pingpong_dest)); + #endif + + if (post_one_recv_wqe(comm->rdma_ctx)) { + fprintf(stderr, "Couldn't post send \n"); + return 1; + } + + return 0; +} + + +#ifdef HAVE_GID_ATTR +enum who_is_better {LEFT_IS_BETTER, EQUAL, RIGHT_IS_BETTER}; + +struct roce_version_sorted_enum { + enum ibv_exp_roce_gid_type type; + int rate; +}; + +/* This struct defines which RoCE version is more important for default usage */ +struct roce_version_sorted_enum roce_versions_sorted [] = { + {IBV_EXP_IB_ROCE_V1_GID_TYPE, 1}, + {IBV_EXP_ROCE_V2_GID_TYPE, 2}, + {IBV_EXP_ROCE_V1_5_GID_TYPE, 3} +}; + +int find_roce_version_rate (int roce_ver) +{ + int i; + int arr_len = GET_ARRAY_SIZE(roce_versions_sorted); + + for (i = 0; i < arr_len; i++) { + if (roce_versions_sorted[i].type == roce_ver) + return roce_versions_sorted[i].rate; + } + + return -1; +} + +/* RoCE V1.5 > V2 > V1 + * other RoCE versions will be ignored until added to roce_versions_sorted array */ +static int check_better_roce_version (int roce_ver, int roce_ver_rival) +{ + int roce_ver_rate = find_roce_version_rate(roce_ver); + int roce_ver_rate_rival = find_roce_version_rate(roce_ver_rival); + + if (roce_ver_rate < roce_ver_rate_rival) + return RIGHT_IS_BETTER; + else if (roce_ver_rate > roce_ver_rate_rival) + return LEFT_IS_BETTER; + else + return EQUAL; +} +#endif + +static int get_best_gid_index (struct pingpong_context *ctx, + struct perftest_parameters *user_param, + struct ibv_port_attr *attr, int port) +{ + int gid_index = 0, i; + union ibv_gid temp_gid, temp_gid_rival; + int is_ipv4, is_ipv4_rival; + + for (i = 1; i < attr->gid_tbl_len; i++) { + if (ibv_query_gid(ctx->context, port, gid_index, &temp_gid)) { + return -1; + } + + if (ibv_query_gid(ctx->context, port, i, &temp_gid_rival)) { + return -1; + } + + is_ipv4 = ipv6_addr_v4mapped((struct in6_addr *)temp_gid.raw); + is_ipv4_rival = ipv6_addr_v4mapped((struct in6_addr *)temp_gid_rival.raw); + + if (is_ipv4_rival && !is_ipv4 && !user_param->ipv6) + gid_index = i; + else if (!is_ipv4_rival && is_ipv4 && user_param->ipv6) + gid_index = i; +#ifdef HAVE_GID_ATTR + else { + int roce_version, roce_version_rival; + struct ibv_exp_gid_attr gid_attr; + + gid_attr.comp_mask = IBV_EXP_QUERY_GID_ATTR_TYPE; + if (ibv_exp_query_gid_attr(ctx->context, port, gid_index, &gid_attr)) + return -1; + roce_version = gid_attr.type; + + if (ibv_exp_query_gid_attr(ctx->context, port, i, &gid_attr)) + return -1; + roce_version_rival = gid_attr.type; + + if (check_better_roce_version(roce_version, roce_version_rival) == RIGHT_IS_BETTER) + gid_index = i; + } +#endif + } + + return gid_index; +} + +/****************************************************************************** + * + ******************************************************************************/ +static int ethernet_client_connect(struct perftest_comm *comm) +{ + struct addrinfo *res, *t; + struct addrinfo hints; + char *service; + + int sockfd = -1; + memset(&hints, 0, sizeof hints); + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; + + if (check_add_port(&service,comm->rdma_params->port,comm->rdma_params->servername,&hints,&res)) { + fprintf(stderr, "Problem in resolving basic address and port\n"); + return 1; + } + + for (t = res; t; t = t->ai_next) { + sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); + + if (sockfd >= 0) { + if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) + break; + close(sockfd); + sockfd = -1; + } + } + + freeaddrinfo(res); + + if (sockfd < 0) { + fprintf(stderr, "Couldn't connect to %s:%d\n",comm->rdma_params->servername,comm->rdma_params->port); + return 1; + } + + comm->rdma_params->sockfd = sockfd; + return 0; +} + +/****************************************************************************** + * + ******************************************************************************/ +static int ethernet_server_connect(struct perftest_comm *comm) +{ + struct addrinfo *res, *t; + struct addrinfo hints; + char *service; + int n; + + int sockfd = -1, connfd; + memset(&hints, 0, sizeof hints); + hints.ai_flags = AI_PASSIVE; + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; + + if (check_add_port(&service,comm->rdma_params->port,NULL,&hints,&res)) { + fprintf(stderr, "Problem in resolving basic address and port\n"); + return 1; + } + + for (t = res; t; t = t->ai_next) { + sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); + + if (sockfd >= 0) { + n = 1; + setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); + if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) + break; + close(sockfd); + sockfd = -1; + } + } + freeaddrinfo(res); + + if (sockfd < 0) { + fprintf(stderr, "Couldn't listen to port %d\n", comm->rdma_params->port); + return 1; + } + + listen(sockfd, 1); + connfd = accept(sockfd, NULL, 0); + + if (connfd < 0) { + perror("server accept"); + fprintf(stderr, "accept() failed\n"); + close(sockfd); + return 1; + } + close(sockfd); + comm->rdma_params->sockfd = connfd; + return 0; +} + +/****************************************************************************** + * + ******************************************************************************/ +int set_up_connection(struct pingpong_context *ctx, + struct perftest_parameters *user_param, + struct pingpong_dest *my_dest) +{ + int num_of_qps = user_param->num_of_qps; + int num_of_qps_per_port = user_param->num_of_qps / 2; + int i; + union ibv_gid temp_gid; + union ibv_gid temp_gid2; + struct ibv_port_attr attr; + + srand48(getpid() * time(NULL)); + + /*in xrc with bidirectional, + there are send qps and recv qps. the actual number of send/recv qps + is num_of_qps / 2. + */ + if ( (user_param->connection_type == DC || user_param->use_xrc) && (user_param->duplex || user_param->tst == LAT)) { + num_of_qps /= 2; + num_of_qps_per_port = num_of_qps / 2; + } + + if (user_param->gid_index != -1) { + if (ibv_query_port(ctx->context, user_param->ib_port, &attr)) + return 0; + + if (user_param->use_gid_user) { + if (ibv_query_gid(ctx->context, user_param->ib_port, user_param->gid_index, &temp_gid)) + return -1; + } else { + user_param->gid_index = get_best_gid_index(ctx, user_param, &attr, user_param->ib_port); + if (user_param->gid_index < 0) + return -1; + if (ibv_query_gid(ctx->context, user_param->ib_port, user_param->gid_index, &temp_gid)) + return -1; + } + } + + if (user_param->dualport == ON) { + if (user_param->gid_index2 != -1) { + if (ibv_query_port(ctx->context, user_param->ib_port2, &attr)) + return 0; + + if (user_param->use_gid_user) { + if (ibv_query_gid(ctx->context, user_param->ib_port2, user_param->gid_index, &temp_gid2)) + return -1; + } else { + user_param->gid_index2 = get_best_gid_index(ctx, user_param, &attr, user_param->ib_port2); + if (user_param->gid_index2 < 0) + return -1; + if (ibv_query_gid(ctx->context, user_param->ib_port2, user_param->gid_index2, &temp_gid2)) + return -1; + } + } + } + + for (i = 0; i < user_param->num_of_qps; i++) { + + if (user_param->dualport == ON) { + /*first half of qps are for ib_port and second half are for ib_port2 + in xrc with bidirectional, the first half of qps are xrc_send qps and + the second half are xrc_recv qps. the first half of the send/recv qps + are for ib_port1 and the second half are for ib_port2 + */ + if (i % num_of_qps < num_of_qps_per_port) { + my_dest[i].lid = ctx_get_local_lid(ctx->context,user_param->ib_port); + my_dest[i].gid_index = user_param->gid_index; + } else { + my_dest[i].lid = ctx_get_local_lid(ctx->context,user_param->ib_port2); + my_dest[i].gid_index = user_param->gid_index2; + } + /*single-port case*/ + } else { + my_dest[i].lid = ctx_get_local_lid(ctx->context,user_param->ib_port); + my_dest[i].gid_index = user_param->gid_index; + } + + my_dest[i].qpn = ctx->qp[i]->qp_num; + my_dest[i].psn = lrand48() & 0xffffff; + my_dest[i].rkey = ctx->mr[i]->rkey; + + /* Each qp gives his receive buffer address.*/ + my_dest[i].out_reads = user_param->out_reads; + if (user_param->mr_per_qp) + my_dest[i].vaddr = (uintptr_t)ctx->buf[i] + BUFF_SIZE(ctx->size,ctx->cycle_buffer); + else + my_dest[i].vaddr = (uintptr_t)ctx->buf[0] + (user_param->num_of_qps + i)*BUFF_SIZE(ctx->size,ctx->cycle_buffer); + + if (user_param->dualport==ON) { + + if (i % num_of_qps < num_of_qps_per_port) + memcpy(my_dest[i].gid.raw,temp_gid.raw ,16); + + else + memcpy(my_dest[i].gid.raw,temp_gid2.raw ,16); + } else { + memcpy(my_dest[i].gid.raw,temp_gid.raw ,16); + } + + /* + We do not fail test upon lid above RoCE. + if ( (user_param->gid_index < 0) || ((user_param->gid_index2 < 0) && (user_param->dualport == ON)) ){ + if (!my_dest[i].lid) { + fprintf(stderr," Local lid 0x0 detected. Is an SM running? \n"); + return -1; + } + } + */ + } + + #ifdef HAVE_XRCD + if (user_param->use_xrc) { + for (i=0; i < user_param->num_of_qps; i++) { + if (ibv_get_srq_num(ctx->srq,&(my_dest[i].srqn))) { + fprintf(stderr, "Couldn't get SRQ number\n"); + return 1; + } + } + } + #endif + + #ifdef HAVE_DC + if(user_param->machine == SERVER || user_param->duplex || user_param->tst == LAT) { + if (user_param->connection_type == DC) { + for (i=0; i < user_param->num_of_qps; i++) { + if (ibv_get_srq_num(ctx->srq, &(my_dest[i].srqn))) { + fprintf(stderr, "Couldn't get SRQ number\n"); + return 1; + } + } + } + } + #endif + return 0; +} + +/****************************************************************************** + * + ******************************************************************************/ +int rdma_client_connect(struct pingpong_context *ctx,struct perftest_parameters *user_param) +{ + char *service; + int temp,num_of_retry= NUM_OF_RETRIES; + struct sockaddr_in sin; + struct addrinfo *res; + struct rdma_cm_event *event; + struct rdma_conn_param conn_param; + struct addrinfo hints; + + memset(&hints, 0, sizeof hints); + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; + + if (check_add_port(&service,user_param->port,user_param->servername,&hints,&res)) { + fprintf(stderr, "Problem in resolving basic address and port\n"); + return FAILURE; + } + + if (res->ai_family != PF_INET) { + return FAILURE; + } + + memcpy(&sin, res->ai_addr, sizeof(sin)); + sin.sin_port = htons((unsigned short)user_param->port); + + while (1) { + + if (num_of_retry == 0) { + fprintf(stderr, "Received %d times ADDR_ERROR\n",NUM_OF_RETRIES); + return FAILURE; + } + + if (rdma_resolve_addr(ctx->cm_id, NULL,(struct sockaddr *)&sin,2000)) { + fprintf(stderr, "rdma_resolve_addr failed\n"); + return FAILURE; + } + + if (rdma_get_cm_event(ctx->cm_channel,&event)) { + fprintf(stderr, "rdma_get_cm_events failed\n"); + return FAILURE; + } + + if (event->event == RDMA_CM_EVENT_ADDR_ERROR) { + num_of_retry--; + rdma_ack_cm_event(event); + continue; + } + + if (event->event != RDMA_CM_EVENT_ADDR_RESOLVED) { + fprintf(stderr, "unexpected CM event %d\n",event->event); + rdma_ack_cm_event(event); + return FAILURE; + } + + rdma_ack_cm_event(event); + break; + } + + if (user_param->tos != DEF_TOS) { + + if (rdma_set_option(ctx->cm_id,RDMA_OPTION_ID,RDMA_OPTION_ID_TOS,&user_param->tos,sizeof(uint8_t))) { + fprintf(stderr, " Set TOS option failed: %d\n",event->event); + return FAILURE; + } + } + + while (1) { + + if (num_of_retry <= 0) { + fprintf(stderr, "Received %d times ADDR_ERROR - aborting\n",NUM_OF_RETRIES); + return FAILURE; + } + + if (rdma_resolve_route(ctx->cm_id,2000)) { + fprintf(stderr, "rdma_resolve_route failed\n"); + return FAILURE; + } + + if (rdma_get_cm_event(ctx->cm_channel,&event)) { + fprintf(stderr, "rdma_get_cm_events failed\n"); + return FAILURE; + } + + if (event->event == RDMA_CM_EVENT_ROUTE_ERROR) { + num_of_retry--; + rdma_ack_cm_event(event); + continue; + } + + if (event->event != RDMA_CM_EVENT_ROUTE_RESOLVED) { + fprintf(stderr, "unexpected CM event %d\n",event->event); + rdma_ack_cm_event(event); + return FAILURE; + } + + rdma_ack_cm_event(event); + break; + } + + ctx->context = ctx->cm_id->verbs; + temp = user_param->work_rdma_cm; + user_param->work_rdma_cm = ON; + + if (ctx_init(ctx, user_param)) { + fprintf(stderr," Unable to create the resources needed by comm struct\n"); + return FAILURE; + } + + memset(&conn_param, 0, sizeof conn_param); + if (user_param->verb == READ || user_param->verb == ATOMIC) { + conn_param.responder_resources = user_param->out_reads; + conn_param.initiator_depth = user_param->out_reads; + } + user_param->work_rdma_cm = temp; + conn_param.retry_count = user_param->retry_count; + conn_param.rnr_retry_count = 7; + + if (user_param->work_rdma_cm == OFF) { + + if (post_one_recv_wqe(ctx)) { + fprintf(stderr, "Couldn't post send \n"); + return 1; + } + } + + if (rdma_connect(ctx->cm_id,&conn_param)) { + fprintf(stderr, "Function rdma_connect failed\n"); + return FAILURE; + } + + if (rdma_get_cm_event(ctx->cm_channel,&event)) { + fprintf(stderr, "rdma_get_cm_events failed\n"); + return FAILURE; + } + + if (event->event != RDMA_CM_EVENT_ESTABLISHED) { + fprintf(stderr, "Unexpected CM event bl blka %d\n", event->event); + rdma_ack_cm_event(event); + return FAILURE; + } + + if (user_param->connection_type == UD) { + + user_param->rem_ud_qpn = event->param.ud.qp_num; + user_param->rem_ud_qkey = event->param.ud.qkey; + + ctx->ah[0] = ibv_create_ah(ctx->pd,&event->param.ud.ah_attr); + if (!ctx->ah[0]) { + printf(" Unable to create address handler for UD QP\n"); + return FAILURE; + } + + if (user_param->tst == LAT || (user_param->tst == BW && user_param->duplex)) { + + if (send_qp_num_for_ah(ctx,user_param)) { + printf(" Unable to send my QP number\n"); + return FAILURE; + } + } + } + + rdma_ack_cm_event(event); + return SUCCESS; +} + +/****************************************************************************** + * + ******************************************************************************/ +int retry_rdma_connect(struct pingpong_context *ctx, + struct perftest_parameters *user_param) +{ + int i, max_retries = 10; + int delay = 100000; /* 100 millisec */ + + for (i = 0; i < max_retries; i++) { + if (create_rdma_resources(ctx,user_param)) { + fprintf(stderr," Unable to create rdma resources\n"); + return FAILURE; + } + if (rdma_client_connect(ctx,user_param) == SUCCESS) + return SUCCESS; + if (destroy_rdma_resources(ctx,user_param)) { + fprintf(stderr,"Unable to destroy rdma resources\n"); + return FAILURE; + } + usleep(delay); + } + fprintf(stderr,"Unable to connect (retries = %d)\n", max_retries); + return FAILURE; +} + +/****************************************************************************** + + * + + ******************************************************************************/ +int rdma_server_connect(struct pingpong_context *ctx, + struct perftest_parameters *user_param) +{ + int temp; + struct addrinfo *res; + struct rdma_cm_event *event; + struct rdma_conn_param conn_param; + struct addrinfo hints; + char *service; + struct sockaddr_in sin; + + memset(&hints, 0, sizeof hints); + hints.ai_flags = AI_PASSIVE; + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; + + if (check_add_port(&service,user_param->port,user_param->servername,&hints,&res)) { + fprintf(stderr, "Problem in resolving basic address and port\n"); + return FAILURE; + } + + if (res->ai_family != PF_INET) { + return FAILURE; + } + memcpy(&sin, res->ai_addr, sizeof(sin)); + sin.sin_port = htons((unsigned short)user_param->port); + + if (rdma_bind_addr(ctx->cm_id_control,(struct sockaddr *)&sin)) { + fprintf(stderr," rdma_bind_addr failed\n"); + return 1; + } + + if (rdma_listen(ctx->cm_id_control,0)) { + fprintf(stderr, "rdma_listen failed\n"); + return 1; + } + + if (rdma_get_cm_event(ctx->cm_channel,&event)) { + fprintf(stderr, "rdma_get_cm_events failed\n"); + return 1; + } + + if (event->event != RDMA_CM_EVENT_CONNECT_REQUEST) { + fprintf(stderr, "bad event waiting for connect request %d\n",event->event); + return 1; + } + + ctx->cm_id = (struct rdma_cm_id*)event->id; + ctx->context = ctx->cm_id->verbs; + + if (user_param->work_rdma_cm == ON) + alloc_ctx(ctx,user_param); + + temp = user_param->work_rdma_cm; + user_param->work_rdma_cm = ON; + + if (ctx_init(ctx,user_param)) { + fprintf(stderr," Unable to create the resources needed by comm struct\n"); + return FAILURE; + } + + memset(&conn_param, 0, sizeof conn_param); + if (user_param->verb == READ || user_param->verb == ATOMIC) { + conn_param.responder_resources = user_param->out_reads; + conn_param.initiator_depth = user_param->out_reads; + } + if (user_param->connection_type == UD) + conn_param.qp_num = ctx->qp[0]->qp_num; + + conn_param.retry_count = user_param->retry_count; + conn_param.rnr_retry_count = 7; + user_param->work_rdma_cm = temp; + + if (user_param->work_rdma_cm == OFF) { + + if (post_one_recv_wqe(ctx)) { + fprintf(stderr, "Couldn't post send \n"); + return 1; + } + + } else if (user_param->connection_type == UD) { + + if (user_param->tst == LAT || (user_param->tst == BW && user_param->duplex)) { + + if (post_recv_to_get_ah(ctx)) { + fprintf(stderr, "Couldn't post send \n"); + return 1; + } + } + } + + if (rdma_accept(ctx->cm_id, &conn_param)) { + fprintf(stderr, "Function rdma_accept failed\n"); + return 1; + } + + if (user_param->work_rdma_cm && user_param->connection_type == UD) { + + if (user_param->tst == LAT || (user_param->tst == BW && user_param->duplex)) { + if (create_ah_from_wc_recv(ctx,user_param)) { + fprintf(stderr, "Unable to create AH from WC\n"); + return 1; + } + } + } + + rdma_ack_cm_event(event); + rdma_destroy_id(ctx->cm_id_control); + freeaddrinfo(res); + return 0; +} + +/****************************************************************************** + * + ******************************************************************************/ +int create_comm_struct(struct perftest_comm *comm, + struct perftest_parameters *user_param) +{ + ALLOCATE(comm->rdma_params, struct perftest_parameters, 1); + memset(comm->rdma_params, 0, sizeof(struct perftest_parameters)); + + comm->rdma_params->port = user_param->port; + comm->rdma_params->sockfd = -1; + comm->rdma_params->gid_index = user_param->gid_index; + comm->rdma_params->gid_index2 = user_param->gid_index2; + comm->rdma_params->use_rdma_cm = user_param->use_rdma_cm; + comm->rdma_params->servername = user_param->servername; + comm->rdma_params->machine = user_param->machine; + comm->rdma_params->side = LOCAL; + comm->rdma_params->verb = user_param->verb; + comm->rdma_params->use_mcg = user_param->use_mcg; + comm->rdma_params->duplex = user_param->duplex; + comm->rdma_params->tos = DEF_TOS; + comm->rdma_params->use_xrc = user_param->use_xrc; + comm->rdma_params->connection_type = user_param->connection_type; + comm->rdma_params->output = user_param->output; + comm->rdma_params->report_per_port = user_param->report_per_port; + comm->rdma_params->retry_count = user_param->retry_count; + comm->rdma_params->mr_per_qp = user_param->mr_per_qp; + comm->rdma_params->dlid = user_param->dlid; + + if (user_param->use_rdma_cm) { + + ALLOCATE(comm->rdma_ctx, struct pingpong_context, 1); + memset(comm->rdma_ctx, 0, sizeof(struct pingpong_context)); + + comm->rdma_params->tx_depth = 1; + comm->rdma_params->rx_depth = 1; + comm->rdma_params->connection_type = RC; + comm->rdma_params->num_of_qps = 1; + comm->rdma_params->verb = SEND; + comm->rdma_params->size = sizeof(struct pingpong_dest); + comm->rdma_ctx->context = NULL; + + ALLOCATE(comm->rdma_ctx->mr, struct ibv_mr*, user_param->num_of_qps); + ALLOCATE(comm->rdma_ctx->buf, void* , user_param->num_of_qps); + ALLOCATE(comm->rdma_ctx->qp,struct ibv_qp*,comm->rdma_params->num_of_qps); + comm->rdma_ctx->buff_size = user_param->cycle_buffer; + + if (create_rdma_resources(comm->rdma_ctx,comm->rdma_params)) { + fprintf(stderr," Unable to create the resources needed by comm struct\n"); + return FAILURE; + } + } + + return SUCCESS; +} + +/****************************************************************************** + * + ******************************************************************************/ +int establish_connection(struct perftest_comm *comm) +{ + int (*ptr)(struct perftest_comm*); + + if (comm->rdma_params->use_rdma_cm) { + if (comm->rdma_params->machine == CLIENT) { + if (rdma_client_connect(comm->rdma_ctx,comm->rdma_params)) { + fprintf(stderr," Unable to perform rdma_client function\n"); + return 1; + } + } else { + if (rdma_server_connect(comm->rdma_ctx,comm->rdma_params)) { + fprintf(stderr," Unable to perform rdma_server function\n"); + return 1; + } + } + } else { + ptr = comm->rdma_params->servername ? ðernet_client_connect : ðernet_server_connect; + + if ((*ptr)(comm)) { + fprintf(stderr,"Unable to open file descriptor for socket connection"); + return 1; + } + } + return 0; +} + +/****************************************************************************** + * + ******************************************************************************/ +int ctx_hand_shake(struct perftest_comm *comm, + struct pingpong_dest *my_dest, + struct pingpong_dest *rem_dest) +{ + int (*read_func_ptr) (struct pingpong_dest*,struct perftest_comm*); + int (*write_func_ptr)(struct pingpong_dest*,struct perftest_comm*); + + if (comm->rdma_params->use_rdma_cm || comm->rdma_params->work_rdma_cm) { + read_func_ptr = &rdma_read_keys; + write_func_ptr = &rdma_write_keys; + + } else { + read_func_ptr = ðernet_read_keys; + write_func_ptr = ðernet_write_keys; + + } + + rem_dest->gid_index = my_dest->gid_index; + if (comm->rdma_params->servername) { + if ((*write_func_ptr)(my_dest,comm)) { + fprintf(stderr," Unable to write to socket/rdam_cm\n"); + return 1; + } + if ((*read_func_ptr)(rem_dest,comm)) { + fprintf(stderr," Unable to read from socket/rdam_cm\n"); + return 1; + } + + /*Server side will wait for the client side to reach the write function.*/ + } else { + + if ((*read_func_ptr)(rem_dest,comm)) { + fprintf(stderr," Unable to read to socket/rdam_cm\n"); + return 1; + } + if ((*write_func_ptr)(my_dest,comm)) { + fprintf(stderr," Unable to write from socket/rdam_cm\n"); + return 1; + } + } + + return 0; +} + + + + + +/****************************************************************************** + * + ******************************************************************************/ +int ctx_xchg_data_ethernet( struct perftest_comm *comm, + void *my_data, + void *rem_data,int size) +{ + if (comm->rdma_params->servername) { + if (ethernet_write_data(comm, (char *) my_data, size)) { + fprintf(stderr," Unable to write to socket/rdam_cm\n"); + return 1; + } + + if (ethernet_read_data(comm, (char *) rem_data, size)) { + fprintf(stderr," Unable to read from socket/rdam_cm\n"); + return 1; + } + + /*Server side will wait for the client side to reach the write function.*/ + } else { + + if (ethernet_read_data(comm, (char *) rem_data, size)) { + fprintf(stderr," Unable to read to socket/rdam_cm\n"); + return 1; + } + + if (ethernet_write_data(comm, (char *) my_data, size)) { + fprintf(stderr," Unable to write from socket/rdam_cm\n"); + return 1; + } + } + return 0; +} + +/****************************************************************************** + * + ******************************************************************************/ +int ctx_xchg_data_rdma( struct perftest_comm *comm, + void *my_data, + void *rem_data,int size) +{ + if (comm->rdma_params->servername) { + if (rdma_write_data(my_data,comm,size)) { + fprintf(stderr," Unable to write to socket/rdam_cm\n"); + return 1; + } + + if (rdma_read_data(rem_data,comm,size)) { + fprintf(stderr," Unable to read from socket/rdam_cm\n"); + return 1; + } + + /*Server side will wait for the client side to reach the write function.*/ + } else { + + if (rdma_read_data(rem_data,comm,size)) { + fprintf(stderr," Unable to read to socket/rdam_cm\n"); + return 1; + } + + if (rdma_write_data(my_data,comm,size)) { + fprintf(stderr," Unable to write from socket/rdam_cm\n"); + return 1; + } + } + return 0; +} + + +/****************************************************************************** + * + ******************************************************************************/ +int rdma_read_data(void *data, + struct perftest_comm *comm, int size) +{ + struct ibv_wc wc; + int ne; + + do { + ne = ibv_poll_cq(comm->rdma_ctx->recv_cq,1,&wc); + } while (ne == 0); + + if (wc.status || !(wc.opcode & IBV_WC_RECV) || wc.wr_id != SYNC_SPEC_ID) { + fprintf(stderr, "Bad wc status -- %d -- %d \n",(int)wc.status,(int)wc.wr_id); + return 1; + } + + memcpy(data,comm->rdma_ctx->buf[0], size); + + if (post_one_recv_wqe(comm->rdma_ctx)) { + fprintf(stderr, "Couldn't post send \n"); + return 1; + } + + return 0; +} + +/****************************************************************************** + * + ******************************************************************************/ +int rdma_write_data(void *data, + struct perftest_comm *comm, int size) +{ + struct ibv_send_wr wr; + struct ibv_send_wr *bad_wr; + struct ibv_sge list; + struct ibv_wc wc; + int ne; + memcpy(comm->rdma_ctx->buf[0],data,size); + + list.addr = (uintptr_t)comm->rdma_ctx->buf[0]; + list.length = size; + list.lkey = comm->rdma_ctx->mr[0]->lkey; + + wr.wr_id = SYNC_SPEC_ID; + wr.sg_list = &list; + wr.num_sge = 1; + wr.opcode = IBV_WR_SEND; + wr.send_flags = IBV_SEND_SIGNALED; + wr.next = NULL; + + if (ibv_post_send(comm->rdma_ctx->qp[0],&wr,&bad_wr)) { + fprintf(stderr, "Function ibv_post_send failed\n"); + return 1; + } + + do { + ne = ibv_poll_cq(comm->rdma_ctx->send_cq, 1,&wc); + } while (ne == 0); + + if (wc.status || wc.opcode != IBV_WC_SEND || wc.wr_id != SYNC_SPEC_ID) { + fprintf(stderr, " Bad wc status %d\n",(int)wc.status); + return 1; + } + + return 0; +} + +/****************************************************************************** + * + ******************************************************************************/ +int ethernet_write_data(struct perftest_comm *comm, char *msg, size_t size) +{ + if (write(comm->rdma_params->sockfd, msg, size) != size) { + perror("client write"); + fprintf(stderr, "Couldn't send reports\n"); + return 1; + } + + return 0; + +} +/****************************************************************************** + * + ******************************************************************************/ +int ethernet_read_data(struct perftest_comm *comm, char *recv_msg, size_t size) +{ + if (read(comm->rdma_params->sockfd, recv_msg, size) != size) { + fprintf(stderr, "ethernet_read_data: Couldn't read reports\n"); + return 1; + } + + return 0; +} + + +/****************************************************************************** + * + ******************************************************************************/ +int ctx_xchg_data( struct perftest_comm *comm, + void *my_data, + void *rem_data,int size) +{ + if (comm->rdma_params->use_rdma_cm || comm->rdma_params->work_rdma_cm) + ctx_xchg_data_rdma(comm,my_data,rem_data,size); + else + ctx_xchg_data_ethernet(comm,my_data,rem_data,size); + + return 0; +} + +/****************************************************************************** + * + ******************************************************************************/ +void xchg_bw_reports (struct perftest_comm *comm, struct bw_report_data *my_bw_rep, + struct bw_report_data *rem_bw_rep, float remote_version) +{ + struct bw_report_data temp; + int size; + + temp.size = hton_long(my_bw_rep->size); + + if ( remote_version >= 5.33 ) + temp.iters = hton_long(my_bw_rep->iters); + else + temp.iters = hton_int(my_bw_rep->iters); + + temp.bw_peak = hton_double(my_bw_rep->bw_peak); + temp.bw_avg = hton_double(my_bw_rep->bw_avg); + temp.bw_avg_p1 = hton_double(my_bw_rep->bw_avg_p1); + temp.bw_avg_p2 = hton_double(my_bw_rep->bw_avg_p2); + temp.msgRate_avg = hton_double(my_bw_rep->msgRate_avg); + temp.msgRate_avg_p1 = hton_double(my_bw_rep->msgRate_avg_p1); + temp.msgRate_avg_p2 = hton_double(my_bw_rep->msgRate_avg_p2); + + /*******************Exchange Reports*******************/ + if (ctx_xchg_data(comm, (void*) (&temp.size), (void*) (&rem_bw_rep->size), sizeof(unsigned long))) { + fprintf(stderr," Failed to exchange data between server and clients\n"); + exit(1); + } + + size = (remote_version >= 5.33) ? sizeof(uint64_t) : sizeof(int); + + if (ctx_xchg_data(comm, (void*) (&temp.iters), (void*) (&rem_bw_rep->iters), size)) { + fprintf(stderr," Failed to exchange data between server and clients\n"); + exit(1); + } + if (ctx_xchg_data(comm, (void*) (&temp.bw_peak), (void*) (&rem_bw_rep->bw_peak), sizeof(double))) { + fprintf(stderr," Failed to exchange data between server and clients\n"); + exit(1); + } + if (ctx_xchg_data(comm, (void*) (&temp.bw_avg), (void*) (&rem_bw_rep->bw_avg), sizeof(double))) { + fprintf(stderr," Failed to exchange data between server and clients\n"); + exit(1); + } + if (ctx_xchg_data(comm, (void*) (&temp.msgRate_avg), (void*) (&rem_bw_rep->msgRate_avg), sizeof(double))) { + fprintf(stderr," Failed to exchange data between server and clients\n"); + exit(1); + } + + /*exchange data for report per port feature. should keep compatibility*/ + if (comm->rdma_params->report_per_port) { + if (ctx_xchg_data(comm, (void*) (&temp.bw_avg_p1), (void*) (&rem_bw_rep->bw_avg_p1), sizeof(double))) { + fprintf(stderr," Failed to exchange data between server and clients\n"); + exit(1); + } + if (ctx_xchg_data(comm, (void*) (&temp.msgRate_avg_p1), (void*) (&rem_bw_rep->msgRate_avg_p1), sizeof(double))) { + fprintf(stderr," Failed to exchange data between server and clients\n"); + exit(1); + } + if (ctx_xchg_data(comm, (void*) (&temp.bw_avg_p2), (void*) (&rem_bw_rep->bw_avg_p2), sizeof(double))) { + fprintf(stderr," Failed to exchange data between server and clients\n"); + exit(1); + } + if (ctx_xchg_data(comm, (void*) (&temp.msgRate_avg_p2), (void*) (&rem_bw_rep->msgRate_avg_p2), sizeof(double))) { + fprintf(stderr," Failed to exchange data between server and clients\n"); + exit(1); + } + } + + rem_bw_rep->size = hton_long(rem_bw_rep->size); + + if ( remote_version >= 5.33 ) + rem_bw_rep->iters = hton_long(rem_bw_rep->iters); + else + rem_bw_rep->iters = hton_int(rem_bw_rep->iters); + + rem_bw_rep->bw_peak = hton_double(rem_bw_rep->bw_peak); + rem_bw_rep->bw_avg = hton_double(rem_bw_rep->bw_avg); + rem_bw_rep->bw_avg_p1 = hton_double(rem_bw_rep->bw_avg_p1); + rem_bw_rep->bw_avg_p2 = hton_double(rem_bw_rep->bw_avg_p2); + rem_bw_rep->msgRate_avg = hton_double(rem_bw_rep->msgRate_avg); + rem_bw_rep->msgRate_avg_p1 = hton_double(rem_bw_rep->msgRate_avg_p1); + rem_bw_rep->msgRate_avg_p2 = hton_double(rem_bw_rep->msgRate_avg_p2); + +} + +/****************************************************************************** + * + ******************************************************************************/ +void ctx_print_pingpong_data(struct pingpong_dest *element, + struct perftest_comm *comm) +{ + int is_there_mgid,local_mgid,remote_mgid; + + /* use dlid value from user (if user specified and only on the remote side) */ + uint16_t dlid = (comm->rdma_params->dlid && comm->rdma_params->side) ? + comm->rdma_params->dlid : element->lid; + + if (comm->rdma_params->output != FULL_VERBOSITY) + return; + /*First of all we print the basic format.*/ + printf(BASIC_ADDR_FMT, sideArray[comm->rdma_params->side], dlid, element->qpn, element->psn); + + switch (comm->rdma_params->verb) { + case 2 : printf(READ_FMT,element->out_reads); + case 1 : printf(RDMA_FMT,element->rkey,element->vaddr); + default : ; + } + + if (comm->rdma_params->use_xrc) { + printf(XRC_FMT,element->srqn); + } else if (comm->rdma_params->connection_type == DC){ + printf(DC_FMT,element->srqn); + } + + putchar('\n'); + + local_mgid = (comm->rdma_params->side == 0) && (comm->rdma_params->machine == 0); + remote_mgid = (comm->rdma_params->side == 1) && (comm->rdma_params->machine == 1); + is_there_mgid = comm->rdma_params->duplex || remote_mgid || local_mgid; + + if ((comm->rdma_params->gid_index > -1 || (comm->rdma_params->use_mcg && is_there_mgid)) && comm->rdma_params->connection_type != RawEth) { + + printf(PERF_GID_FMT,gidArray[comm->rdma_params->use_mcg && is_there_mgid], + element->gid.raw[0], element->gid.raw[1], + element->gid.raw[2], element->gid.raw[3], + element->gid.raw[4], element->gid.raw[5], + element->gid.raw[6], element->gid.raw[7], + element->gid.raw[8], element->gid.raw[9], + element->gid.raw[10],element->gid.raw[11], + element->gid.raw[12],element->gid.raw[13], + element->gid.raw[14],element->gid.raw[15]); + } +} + +/****************************************************************************** + * + ******************************************************************************/ +int ctx_close_connection(struct perftest_comm *comm, + struct pingpong_dest *my_dest, + struct pingpong_dest *rem_dest) +{ + /*Signal client is finished.*/ + if (ctx_hand_shake(comm,my_dest,rem_dest)) { + return 1; + } + + if (!comm->rdma_params->use_rdma_cm && !comm->rdma_params->work_rdma_cm) { + + if (write(comm->rdma_params->sockfd,"done",sizeof "done") != sizeof "done") { + perror(" Client write"); + fprintf(stderr,"Couldn't write to socket\n"); + return -1; + } + + close(comm->rdma_params->sockfd); + return 0; + } + + return 0; +} + +/****************************************************************************** + * + ******************************************************************************/ +void exchange_versions(struct perftest_comm *user_comm, struct perftest_parameters *user_param) +{ + if (!user_param->dont_xchg_versions) { + if (ctx_xchg_data(user_comm,(void*)(&user_param->version),(void*)(&user_param->rem_version),sizeof(user_param->rem_version))) { + fprintf(stderr," Failed to exchange data between server and clients\n"); + exit(1); + } + } +} + +/****************************************************************************** + * + ******************************************************************************/ +void check_sys_data(struct perftest_comm *user_comm, struct perftest_parameters *user_param) +{ + int rem_cycle_buffer = 0; + int rem_cache_line_size = 0; + + int m_cycle_buffer = hton_int(user_param->cycle_buffer); + int m_cache_line_size = hton_int(user_param->cache_line_size); + + /*keep compatibility between older versions, without this feature.*/ + if ( !(atof(user_param->rem_version) >= 5.32) ) { + return; + } + + if (!user_param->dont_xchg_versions) { + if (ctx_xchg_data(user_comm,(void*)(&m_cycle_buffer),(void*)(&rem_cycle_buffer), sizeof(user_param->cycle_buffer))) { + fprintf(stderr," Failed to exchange Page Size data between server and client\n"); + exit(1); + } + if (ctx_xchg_data(user_comm,(void*)(&m_cache_line_size),(void*)(&rem_cache_line_size), sizeof(user_param->cache_line_size))) { + fprintf(stderr," Failed to exchange Cache Line Size data between server and client\n"); + exit(1); + } + } + + rem_cycle_buffer = ntoh_int(rem_cycle_buffer); + rem_cache_line_size = ntoh_int(rem_cache_line_size); + + /*take the max and update user_param*/ + user_param->cycle_buffer = (rem_cycle_buffer > user_param->cycle_buffer) ? rem_cycle_buffer : user_param->cycle_buffer; + user_param->cache_line_size = (rem_cache_line_size > user_param->cache_line_size) ? rem_cache_line_size : user_param->cache_line_size; + + /*update user_comm as well*/ + if (user_param->use_rdma_cm) { + user_comm->rdma_ctx->buff_size = user_param->cycle_buffer; + } + +} + +/****************************************************************************** + * + ******************************************************************************/ +int check_mtu(struct ibv_context *context,struct perftest_parameters *user_param, struct perftest_comm *user_comm) { + int curr_mtu=0, rem_mtu=0; + char cur[2]; + char rem[2]; + int size_of_cur; + float rem_vers = atof(user_param->rem_version); + + if (user_param->connection_type == RawEth) { + if (set_eth_mtu(user_param) != 0 ) { + fprintf(stderr, " Couldn't set Eth MTU\n"); + return FAILURE; + } + } else { + curr_mtu = (int) (set_mtu(context,user_param->ib_port,user_param->mtu)); + if (!user_param->dont_xchg_versions) { + /*add mtu set in remote node from version 5.1 and above*/ + if (rem_vers >= 5.1 ) { + sprintf(cur,"%d",curr_mtu); + + /*fix a buffer overflow issue in ppc.*/ + size_of_cur = (rem_vers >= 5.31) ? sizeof(char[2]) : sizeof(int); + + if (ctx_xchg_data(user_comm,(void*)(cur),(void*)(rem),size_of_cur)) { + fprintf(stderr," Failed to exchange data between server and clients\n"); + exit(1); + } + rem_mtu = (int) strtol(rem, (char **)NULL, 10); + user_param->curr_mtu = (enum ibv_mtu)((valid_mtu_size(rem_mtu) && (curr_mtu > rem_mtu)) ? rem_mtu : curr_mtu); + } else { + user_param->curr_mtu = (enum ibv_mtu)(curr_mtu); + } + } else { + user_param->curr_mtu = (enum ibv_mtu)(curr_mtu); + } + } + + if (user_param->connection_type == UD && user_param->size > MTU_SIZE(user_param->curr_mtu)) { + + if (user_param->test_method == RUN_ALL) { + fprintf(stderr," Max msg size in UD is MTU %lu\n",MTU_SIZE(user_param->curr_mtu)); + fprintf(stderr," Changing to this MTU\n"); + } + user_param->size = MTU_SIZE(user_param->curr_mtu); + } + /*checking msg size in raw ethernet*/ + if (user_param->connection_type == RawEth){ + if (user_param->size > user_param->curr_mtu) { + fprintf(stderr," Max msg size in RawEth is MTU %d\n",user_param->curr_mtu); + fprintf(stderr," Changing msg size to this MTU\n"); + user_param->size = user_param->curr_mtu; + } else if (user_param->size < RAWETH_MIN_MSG_SIZE) { + printf(" Min msg size for RawEth is 64B - changing msg size to 64 \n"); + user_param->size = RAWETH_MIN_MSG_SIZE; + } + } + + return SUCCESS; +} + +int ctx_check_gid_compatibility(struct pingpong_dest *my_dest, + struct pingpong_dest *rem_dest) +{ + int gid_type1, gid_type2; + + /*ipv4 - 1 , ipv6 - 0 */ + gid_type1 = ipv6_addr_v4mapped((struct in6_addr *)my_dest->gid.raw); + gid_type2 = ipv6_addr_v4mapped((struct in6_addr *)rem_dest->gid.raw); + + if (gid_type1 != gid_type2) + return 1; + + return 0; +} +/****************************************************************************** + * End + ******************************************************************************/ diff --git a/usr/benchmarks/ib/perftest_communication.h b/usr/benchmarks/ib/perftest_communication.h new file mode 100755 index 000000000..923603f66 --- /dev/null +++ b/usr/benchmarks/ib/perftest_communication.h @@ -0,0 +1,444 @@ +/* + * Copyright (c) 2011 Mellanox Technologies Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Author: Ido Shamay + * + * Description : ... + * + * Methods : ... + */ + +#ifndef PERFTEST_COMMUNICATION_H +#define PERFTEST_COMMUNICATION_H + +#include + +// #include +#include +#include + +// #include +#include "perftest_resources.h" + +/* Macro for 64 bit variables to switch to/from net */ +#if __BYTE_ORDER == __BIG_ENDIAN || __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER == __BIG_ENDIAN +#define ntoh_64(x) (x) +#define hton_64(x) (x) +#define ntoh_double(x) (x) +#define hton_double(x) (x) +#else +#define ntoh_64(x) bswap_64(x) +#define hton_64(x) bswap_64(x) +#define ntoh_double(x) bswap_double(x) +#define hton_double(x) bswap_double(x) +#endif +#else +#error "Only BIG_ENDIAN and LITTLE_ENDIAN are supported." +#endif + +/* long is 64-bit in LP64 mode, 32-bit in LLP64 mode. */ +#if defined(_LP64) || defined(__LP64__) +#define ntoh_long(x) ntoh_64(x) +#define hton_long(x) hton_64(x) +#else +#define ntoh_long(x) ntohl(x) +#define hton_long(x) htonl(x) +#endif + +/* int is 32-bit in both LP64 and LLP64 modes. */ +#define ntoh_int(x) (int) ntohl((uint32_t) (x)) +#define hton_int(x) (int) htonl((uint32_t) (x)) + +#define KEY_MSG_SIZE (59) /* Message size without gid. */ +#define KEY_MSG_SIZE_GID (108) /* Message size with gid (MGID as well). */ +#define SYNC_SPEC_ID (5) + +/* The Format of the message we pass through sockets , without passing Gid. */ +#define KEY_PRINT_FMT "%04x:%04x:%06x:%06x:%08x:%016Lx:%08x" + +/* The Format of the message we pass through sockets (With Gid). */ +#define KEY_PRINT_FMT_GID "%04x:%04x:%06x:%06x:%08x:%016Lx:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%08x:" + +/* The Basic print format for all verbs. */ +#define BASIC_ADDR_FMT " %s address: LID %#04x QPN %#06x PSN %#06x" + +/* Addition format string for READ - the outstanding reads. */ +#define READ_FMT " OUT %#04x" + +/* The print format of the pingpong_dest element for RDMA verbs. */ +#define RDMA_FMT " RKey %#08x VAddr %#016Lx" + +/* The print number of SRQ in case of XRC */ +#define XRC_FMT " SRQn %#08x" +#define DC_FMT " SRQn %#08x" + +/* The print format of a global address or a multicast address. */ +#define PERF_GID_FMT " %s: %02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d\n" + +/* The print format of a global address or a multicast address. */ +#define PERF_RAW_MGID_FMT " %s: %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n" + +struct perftest_comm { + struct pingpong_context *rdma_ctx; + struct perftest_parameters *rdma_params; +}; + +/* bswap_double + * + * Description : swap byte order for double. + * + * Parameters : + * x - input double variable + * + * Return Value : double after byte order being swapped. + */ +double bswap_double(double x); + + +/* create_comm_struct + * + * Description : Creating the communication struct for Etherent or rdma_cm options. + * + * Parameters : + * comm - An empty Communication struct. + * user_param - Perftest parameters. + * + * Return Value : SUCCESS,FAILURE. + */ +int create_comm_struct (struct perftest_comm *comm, + struct perftest_parameters *user_param); + + +/* set_up_connection . + * + * Description : Fills the my_dest with all of the machine proporties. + * + * + * Parameters : + * ctx - Pingoong context after the ctx_init function. + * user_param - Perftest parameters. + * my_dest - An empty pingpong_dest struct. + * + * Return Value : SUCCESS,FAILURE. + */ +int set_up_connection(struct pingpong_context *ctx, + struct perftest_parameters *user_param, + struct pingpong_dest *my_dest); + +/* establish_connection . + * + * Description : + * + * Connect the client the a well known server to a requested port. + * It assumes the Server is waiting for request on the port. + * It uses Ethernet sockets or rdma_cm as mentioned in use_rdma_cm. + * + * Parameters : + * comm - The communication struct with all the data. + * + * Return Value : SUCCESS,FAILURE. + */ +int establish_connection(struct perftest_comm *comm); + +/* rdma_client_connect . + * + * Description : Connects the client to a QP on the other machine with rdma_cm. + * + * Parameters : + * ctx - An empty resources struct to fill the resources created for this QP. + * user_param - Perftest parameters. + * + * Return Value : SUCCESS,FAILURE. + */ +int rdma_client_connect(struct pingpong_context *ctx, + struct perftest_parameters *user_param); + +/* retry_rdma_connect . + * + * Description : Retries rdma_client_connect() because the listener may not be ready + * when the rdma client attempts to connect + * + * Parameters : + * ctx - An empty resources struct to fill the resources created for this QP. + * user_param - Perftest parameters. + * + * Return Value : SUCCESS,FAILURE. + */ +int retry_rdma_connect(struct pingpong_context *ctx, + struct perftest_parameters *user_param); + +/* rdma_server_connect . + * + * Description : Assinging a server to listen on a rdma_cm port and connect to it. + * + * Parameters : + * ctx - An empty resources struct to fill the resources created for this QP. + * user_param - Perftest parameters. + * + * Return Value : SUCCESS,FAILURE. + */ +int rdma_server_connect(struct pingpong_context *ctx, + struct perftest_parameters *user_param); +/* ctx_hand_shake . + * + * Description : + * + * Exchanging the data , represented in struct pingpong_dest , between + * a server and client that performed the ctx_server/clinet_connect. + * The method fills in rem_dest the remote machine data , and passed the data + * in my_dest to other machine. + * + * Parameters : + * + * params - The parameters needed for this method. Are mentioned above ,and + * contains standard IB info. (exists on perftest). + * my_dest - Contains the data you want to pass to the other side. + * rem_dest - The other side data. + * + * Return Value : 0 upon success. -1 if it fails. + */ +int ctx_hand_shake(struct perftest_comm *comm, + struct pingpong_dest *my_dest, + struct pingpong_dest *rem_dest); + + + +/* ctx_print_pingpong_data. + * + * Description : + * + * Prints the data stored in the struct pingpong_dest. + * + * Parameters : + * + * params - The parameters of the machine. + * element - The element to print. + */ +void ctx_print_pingpong_data(struct pingpong_dest *element, + struct perftest_comm *comm); + +/* ctx_close_connection . + * + * Description : + * + * Close the connection between the 2 machines. + * It performs an handshake to ensure the 2 sides are there. + * + * Parameters : + * + * params - The parameters of the machine + * my_dest ,rem_dest - The 2 sides that ends the connection. + * + * Return Value : 0 upon success. -1 if it fails. + */ +int ctx_close_connection(struct perftest_comm *comm, + struct pingpong_dest *my_dest, + struct pingpong_dest *rem_dest); + +/* ctx_xchg_data . + * + * Description : + * + * Exchanging data between + * a server and client after performing ctx_server/client_connect. + * The method fills in rem_data the remote machine data , and passed the data + * in my_dest to other machine. + * + * Parameters : + * + * comm - contains connections info + * my_data - Contains the data you want to pass to the other side. + * rem_data - The other side data. + * size - size of my_data (after casting is made) + * + * Return Value : 0 upon success. -1 if it fails. + */ +int ctx_xchg_data( struct perftest_comm *comm, + void *my_data, + void *rem_data,int size); + +/* ethernet_write_data . + * + * Description : + * + * Sends data that is written in msg using ethernet + * This functions can send any basic type (int,float,double,char*,string, etc..). + * If you need to send a struct, decoder and encoder must be implemented to convert + * the struct to a string + * + * Parameters : + * + * comm - contains connections info + * msg - message that will be sent + * size - size of the message + * Return Value : 0 upon success. -1 if it fails. + */ +int ethernet_write_data(struct perftest_comm *comm, char *msg, size_t size); + +/* ethernet_read_data . + * + * Description : + * + * Read data from remote machine using ethernet. + * + * Parameters : + * + * comm - contains connections info + * recv_msg - function will return, in this argument, the message from remote machine + * size - size of the message + * Return Value : 0 upon success. -1 if it fails. + */ +int ethernet_read_data(struct perftest_comm *comm, char *recv_msg, size_t size); + +/* rdma_write_data . + * + * Description : + * + * Sends data that to remote machine using RDMA. + * This functions can send any variable type + * + * Parameters : + * + * data - data that will be sent + * comm - contains connections info + * size - size of data + * Return Value : 0 upon success. -1 if it fails. + */ +int rdma_write_data(void *data, struct perftest_comm *comm, int size); + +/* rdma_read_data . + * + * Description : + * + * Reads data from remote machine using RDMA. + * This functions can read any variable type + * + * Parameters : + * + * data - data that will be sent + * comm - contains connections info + * size - size of data + * Return Value : 0 upon success. -1 if it fails. + * Return Value : 0 upon success. -1 if it fails. + */ +int rdma_read_data(void *data, struct perftest_comm *comm, int size); + +/* ctx_xchg_data . + * + * Description : + * + * Implements ctx_xchg_data for ethernet + * + * Parameters : + * + * comm - contains connections info + * my_data - Contains the data you want to pass to the other side. + * rem_data - The other side data. + * size - size of my_data (after casting is made) + * + * Return Value : 0 upon success. -1 if it fails. + */ +int ctx_xchg_data_ethernet( struct perftest_comm *comm, void *my_data, void *rem_data,int size); + +/* ctx_xchg_data . + * + * Description : + * + * Implements ctx_xchg_data for RDMA + * + * Parameters : + * + * comm - contains connections info + * my_data - Contains the data you want to pass to the other side. + * rem_data - The other side data. + * size - size of my_data (after casting is made) + * + * Return Value : 0 upon success. -1 if it fails. + */ +int ctx_xchg_data_rdma( struct perftest_comm *comm, void *my_data, void *rem_data,int size); + +/* ctx_xchg_data . + * + * Description : + * + * Exchanging bw reports between + * a server and client after performing ctx_server/client_connect. + * The method fills in rem_data the remote machine data , and passed the data + * in my_dest to other machine. + * + * Parameters : + * + * comm - contains connections info + * my_bw_rep - Contains the data you want to pass to the other side. + * rem_bw_rep - The other side data. + * + * Return Value : 0 upon success. -1 if it fails. + */ +void xchg_bw_reports (struct perftest_comm *comm, struct bw_report_data *my_bw_rep, + struct bw_report_data *rem_bw_rep, float remote_version); + +/* exchange_versions. + * + * Description : + * Exchange versions between sides. + * + */ +void exchange_versions (struct perftest_comm *user_comm, struct perftest_parameters *user_param); + +/* check_sys_data. + * + * Description : + * Exchange system data between sides. + * + */ +void check_sys_data(struct perftest_comm *user_comm, struct perftest_parameters *user_param); + +/* check_mtu + * + * Description : Configures test MTU. + * + * Parameters : + * + * context - Context of the device. + * user_param - Perftest parameters. + * user_comm - user communication struct. + * Return Value : SUCCESS, FAILURE. + */ +int check_mtu(struct ibv_context *context,struct perftest_parameters *user_param, struct perftest_comm *user_comm); + +int ctx_check_gid_compatibility(struct pingpong_dest *my_dest, + struct pingpong_dest *rem_dest); + +#endif /* PERFTEST_COMMUNICATION_H */ + + + diff --git a/usr/benchmarks/ib/perftest_parameters.c b/usr/benchmarks/ib/perftest_parameters.c new file mode 100755 index 000000000..f316b3281 --- /dev/null +++ b/usr/benchmarks/ib/perftest_parameters.c @@ -0,0 +1,3182 @@ +#include +#include +#include +#include +#include +/* #include */ +#include +#if defined(__FreeBSD__) +#include +#include +#endif +#include "perftest_parameters.h" +#include "raw_ethernet_resources.h" +#include +#define MAC_LEN (17) +#define ETHERTYPE_LEN (6) +#define MAC_ARR_LEN (6) +#define HEX_BASE (16) +static const char *connStr[] = {"RC","UC","UD","RawEth","XRC","DC"}; +static const char *testsStr[] = {"Send","RDMA_Write","RDMA_Read","Atomic"}; +static const char *portStates[] = {"Nop","Down","Init","Armed","","Active Defer"}; +static const char *qp_state[] = {"OFF","ON"}; +static const char *exchange_state[] = {"Ethernet","rdma_cm"}; +static const char *atomicTypesStr[] = {"CMP_AND_SWAP","FETCH_AND_ADD"}; + +/****************************************************************************** + * parse_mac_from_str. + * + * Description : parse string by format of"XX:XX:XX:XX:XX:XX" to uint8_t array in size 6 for MAC adderes + * + * Parameters : + * mac - char*. + * *addr - pointer to output array + * + * Return Value : SUCCESS, FAILURE. + ******************************************************************************/ +#if defined(__FreeBSD__) +#define strdupa(_s) \ +({ \ + char *_d; \ + int _len; \ + \ + _len = strlen(_s) + 1; \ + _d = alloca(_len); \ + if (_d) \ + memcpy(_d, _s, _len); \ + _d; \ +}) +#endif + +static int parse_mac_from_str(char *mac, u_int8_t *addr) +{ + char tmpMac[MAC_LEN+1]; + char *tmpField; + int fieldNum = 0; + + if (strlen(mac) != MAC_LEN) { + fprintf(stderr, "invalid MAC length\n"); + return FAILURE; + } + if (addr == NULL) { + fprintf(stderr, "invalid output addr array\n"); + return FAILURE; + } + + strcpy(tmpMac, mac); + tmpField = strtok(tmpMac, ":"); + while (tmpField != NULL && fieldNum < MAC_ARR_LEN) { + char *chk; + int tmpVal; + tmpVal = strtoul(tmpField, &chk, HEX_BASE); + if (tmpVal > 0xff) { + fprintf(stderr, "field %d value %X out of range\n", fieldNum, tmpVal); + return FAILURE; + } + if (*chk != 0) { + fprintf(stderr, "Non-digit character %c (%0x) detected in field %d\n", *chk, *chk, fieldNum); + return FAILURE; + } + addr[fieldNum++] = (u_int8_t) tmpVal; + tmpField = strtok(NULL, ":"); + } + if (tmpField != NULL || fieldNum != MAC_ARR_LEN) { + fprintf(stderr, "MAC address longer than six fields\n"); + return FAILURE; + } + return SUCCESS; +} +static int parse_ethertype_from_str(char *ether_str, uint16_t *ethertype_val) +{ + if (strlen(ether_str) != ETHERTYPE_LEN) { + fprintf(stderr, "invalid ethertype length\n"); + return FAILURE; + } + *ethertype_val = strtoul(ether_str, NULL, HEX_BASE); + if (!*ethertype_val) + return FAILURE; + return SUCCESS; +} + +/****************************************************************************** + parse_ip_from_str. + * + * Description : Convert from presentation format of an Internet number in nuffer + starting at CP to the binary network format and store result for + interface type AF in buffer starting at BUF. + * + * Parameters : + * *ip - char* ip string. + * *addr - pointer to output array + * + * Return Value : SUCCESS, FAILURE. + * + ******************************************************************************/ +int parse_ip_from_str(char *ip, u_int32_t *addr) +{ + return inet_pton(AF_INET, ip, addr); +} + +/******************************************************************************/ +int parse_ip6_from_str(char *ip6, struct in6_addr *addr) +{ + return inet_pton(AF_INET6, ip6, addr); +} + +/****************************************************************************** + check_valid_udp_port. + ******************************************************************************/ +int check_if_valid_udp_port(int udp_port) +{ + return ON; +} +/****************************************************************************** + get cache line size from system + ******************************************************************************/ +static int get_cache_line_size() +{ + int size = 0; + #if !defined(__FreeBSD__) + size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); + if (size == 0) { + #if defined(__sparc__) && defined(__arch64__) + char* file_name = + "/sys/devices/system/cpu/cpu0/l2_cache_line_size"; + #else + char* file_name = + "/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size"; + #endif + + FILE *fp; + char line[10]; + fp = fopen(file_name, "r"); + if (fp == NULL) { + return DEF_CACHE_LINE_SIZE; + } + if(fgets(line,10,fp) != NULL) { + size = atoi(line); + fclose(fp); + } + } +#endif + if (size <= 0) + size = DEF_CACHE_LINE_SIZE; + + return size; +} +/****************************************************************************** + * + ******************************************************************************/ +static void usage(const char *argv0, VerbType verb, TestType tst, int connection_type) +{ + printf("Usage:\n"); + + if (tst != FS_RATE) { + printf(" %s start a server and wait for connection\n", argv0); + printf(" %s connect to server at \n", argv0); + } else + printf(" %s run a server to measure FS rate \n", argv0); + + printf("\n"); + printf("Options:\n"); + + if (verb != ATOMIC && connection_type != RawEth) { + printf(" -a, --all "); + printf(" Run sizes from 2 till 2^23\n"); + } + + if (verb == ATOMIC) { + printf(" -A, --atomic_type= "); + printf(" type of atomic operation from {CMP_AND_SWAP,FETCH_AND_ADD} (default FETCH_AND_ADD)\n"); + } + + if (tst == BW) { + printf(" -b, --bidirectional "); + printf(" Measure bidirectional bandwidth (default unidirectional)\n"); + } + + if (connection_type != RawEth) { + if (verb == SEND) { + printf(" -c, --connection= "); + printf(" Connection type RC/XRC/UC/UD/DC (default RC)\n"); + } else if (verb == WRITE) { + printf(" -c, --connection= "); + printf(" Connection type RC/XRC/UC/DC (default RC)\n"); + } else if (verb == READ || verb == ATOMIC) { + printf(" -c, --connection= "); + printf(" Connection type RC/XRC/DC (default RC)\n"); + } + } + + if (tst == LAT) { + printf(" -C, --report-cycles "); + printf(" report times in cpu cycle units (default microseconds)\n"); + } + + printf(" -d, --ib-dev= "); + printf(" Use IB device (default first device found)\n"); + + printf(" -D, --duration "); + printf(" Run test for a customized period of seconds.\n"); + + if (verb != WRITE && connection_type != RawEth) { + printf(" -e, --events "); + printf(" Sleep on CQ events (default poll)\n"); + + printf(" -X, --vector= "); + printf(" Set used for events\n"); + } + + printf(" -f, --margin "); + printf(" measure results within margins. (default=2sec)\n"); + + printf(" -F, --CPU-freq "); + printf(" Do not show a warning even if cpufreq_ondemand module is loaded, and cpu-freq is not on max.\n"); + + if (verb == SEND && tst != FS_RATE) { + printf(" -g, --mcg "); + printf(" Send messages to multicast group with 1 QP attached to it.\n"); + } + + printf(" -h, --help "); + printf(" Show this help screen.\n"); + + if (tst == LAT || tst == LAT_BY_BW || tst == FS_RATE) { + printf(" -H, --report-histogram "); + printf(" Print out all results (default print summary only)\n"); + } + + printf(" -i, --ib-port= "); + printf(" Use port of IB device (default %d)\n",DEF_IB_PORT); + + if (verb != READ && verb != ATOMIC) { + printf(" -I, --inline_size= "); + printf(" Max size of message to be sent in inline\n"); + } + + if (tst == BW || tst == LAT_BY_BW) { + printf(" -l, --post_list="); + printf(" Post list of WQEs of size (instead of single post)\n"); + } + + if (tst != FS_RATE) { + if (connection_type == RawEth) { + printf(" -m, --mtu= "); + printf(" MTU size : 64 - 9600 (default port mtu)\n"); + } else { + printf(" -m, --mtu= "); + printf(" MTU size : 256 - 4096 (default port mtu)\n"); + } + + if (verb == SEND) { + printf(" -M, --MGID= "); + printf(" In multicast, uses as the group MGID.\n"); + } + } + + printf(" -n, --iters= "); + printf(" Number of exchanges (at least %d, default %d)\n", MIN_ITER, ((verb == WRITE) && (tst == BW)) ? DEF_ITERS_WB : DEF_ITERS); + + if (tst == BW) { + printf(" -N, --noPeak"); + printf(" Cancel peak-bw calculation (default with peak up to iters=20000)\n"); + } + + if (verb == READ || verb == ATOMIC) { + printf(" -o, --outs= "); + printf(" num of outstanding read/atom(default max of device)\n"); + } + + if (tst == BW && connection_type != RawEth) { + printf(" -O, --dualport "); + printf(" Run test in dual-port mode.\n"); + } + + printf(" -p, --port= "); + printf(" Listen on/connect to port (default %d)\n",DEF_PORT); + + if (tst == BW ) { + printf(" -q, --qp= Num of qp's(default %d)\n", DEF_NUM_QPS); + } + + if (tst == BW) { + printf(" -Q, --cq-mod "); + printf(" Generate Cqe only after <--cq-mod> completion\n"); + } + + if (verb == SEND && tst != FS_RATE) { + printf(" -r, --rx-depth= "); + printf(" Rx queue size (default %d).",DEF_RX_SEND); + printf(" If using srq, rx-depth controls max-wr size of the srq\n"); + } + + if (connection_type != RawEth) { + printf(" -R, --rdma_cm "); + printf(" Connect QPs with rdma_cm and run test on those QPs\n"); + } + + if (verb != ATOMIC) { + printf(" -s, --size= "); + printf(" Size of message to exchange (default %d)\n", tst == LAT ? DEF_SIZE_LAT : DEF_SIZE_BW); + } + + if (tst != FS_RATE) { + printf(" -S, --sl= "); + printf(" SL (default %d)\n",DEF_SL); + + if (tst == BW || tst == LAT_BY_BW) { + printf(" -t, --tx-depth= "); + printf(" Size of tx queue (default %d)\n", tst == LAT ? DEF_TX_LAT : DEF_TX_BW); + } + + printf(" -T, --tos= "); + printf(" Set to RDMA-CM QPs. available only with -R flag. values 0-256 (default off)\n"); + } + + printf(" -u, --qp-timeout= "); + printf(" QP timeout, timeout value is 4 usec * 2 ^(timeout), default %d\n",DEF_QP_TIME); + + if (tst == LAT || tst == LAT_BY_BW || tst == FS_RATE) { + printf(" -U, --report-unsorted "); + printf(" (implies -H) print out unsorted results (default sorted)\n"); + } + + printf(" -V, --version "); + printf(" Display version number\n"); + + if (tst == BW) { + printf(" -w, --limit_bw= "); + printf(" Set verifier limit for bandwidth\n"); + } + + if (connection_type != RawEth) { + printf(" -x, --gid-index= "); + printf(" Test uses GID with GID index (Default : IB - no gid . ETH - 0)\n"); + } + + if (tst == BW) { + printf(" -y, --limit_msgrate= "); + printf(" Set verifier limit for Msg Rate\n"); + } + + if (connection_type != RawEth) { + printf(" -z, --com_rdma_cm "); + printf(" Communicate with rdma_cm module to exchange data - use regular QPs\n"); + } + + /*Long flags*/ + putchar('\n'); + + printf(" --cpu_util "); + printf(" Show CPU Utilization in report, valid only in Duration mode \n"); + + if (tst != FS_RATE) { + printf(" --dlid "); + printf(" Set a Destination LID instead of getting it from the other side.\n"); + } + + if (connection_type != RawEth) { + printf(" --dont_xchg_versions "); + printf(" Do not exchange versions and MTU with other side \n"); + } + + if (tst != FS_RATE) { + printf(" --force-link= "); + printf(" Force the link(s) to a specific type: IB or Ethernet.\n"); + } + + if (verb != WRITE) { + printf(" --inline_recv= "); + printf(" Max size of message to be sent in inline receive\n"); + } + + if (connection_type != RawEth) { + printf(" --ipv6 "); + printf(" Use IPv6 GID. Default is IPv4\n"); + } + + if (tst == LAT) { + printf(" --latency_gap= "); + printf(" delay time between each post send\n"); + } + + if (connection_type != RawEth) { + printf(" --mmap=file "); + printf(" Use an mmap'd file as the buffer for testing P2P transfers.\n"); + printf(" --mmap-offset= "); + printf(" Use an mmap'd file as the buffer for testing P2P transfers.\n"); + } + + if (tst == BW) { + printf(" --mr_per_qp "); + printf(" Create memory region for each qp.\n"); + } + + #if defined HAVE_EX_ODP || defined HAVE_EXP_ODP + printf(" --odp "); + printf(" Use On Demand Paging instead of Memory Registration.\n"); + #endif + + printf(" --output="); + printf(" Set verbosity output level: bandwidth , message_rate, latency \n"); + printf(" Latency measurement is Average calculation \n"); + + if (tst != FS_RATE) { + printf(" --perform_warm_up"); + printf(" Perform some iterations before start measuring in order to warming-up memory cache, valid in Atomic, Read and Write BW tests\n"); + + printf(" --pkey_index= PKey index to use for QP\n"); + } + + if ( tst == BW ) { + printf(" --report-both "); + printf(" Report RX & TX results separately on Bidirectinal BW tests\n"); + + printf(" --report_gbits "); + printf(" Report Max/Average BW of test in Gbit/sec (instead of MB/sec)\n"); + + if (connection_type != RawEth) { + printf(" --report-per-port "); + printf(" Report BW data on both ports when running Dualport and Duration mode\n"); + } + + printf(" --reversed "); + printf(" Reverse traffic direction - Server send to client\n"); + + printf(" --run_infinitely "); + printf(" Run test forever, print results every seconds\n"); + } + + if (connection_type != RawEth) { + printf(" --retry_count= "); + printf(" Set retry count value in rdma_cm mode\n"); + } + + if (tst != FS_RATE) { + printf(" --tclass= "); + printf(" Set the Traffic Class in GRH (if GRH is in use)\n"); + + #ifdef HAVE_CUDA + printf(" --use_cuda "); + printf(" Use CUDA lib for GPU-Direct testing.\n"); + #endif + + #ifdef HAVE_VERBS_EXP + printf(" --use_exp "); + printf(" Use Experimental verbs in data path. Default is OFF.\n"); + #endif + + printf(" --use_hugepages "); + printf(" Use Hugepages instead of contig, memalign allocations.\n"); + + + #ifdef HAVE_ACCL_VERBS + printf(" --use_res_domain "); + printf(" Use shared resource domain\n"); + + printf(" --verb_type=