#include #include #include #include #include #include #include #include "perftest_parameters_write_bw.h" #define MAC_LEN (17) #define ETHERTYPE_LEN (6) #define MAC_ARR_LEN (6) #define HEX_BASE (16) #define inet_pton(af, src, dst) \ (((af) == AF_INET) ? ip4addr_aton((src), (ip4_addr_t*)(dst)) : 0) static const char *connStr[] = {"RC", "UC", "UD", "RawEth", "XRC", "DC"}; static const char *testsStr[] = {"Send", "RDMA_Write", "RDMA_Read", "Atomic"}; static const char *portStates[] = {"Nop", "Down", "Init", "Armed", "", "Active Defer"}; static const char *qp_state[] = {"OFF", "ON"}; static const char *exchange_state[] = {"Ethernet", "rdma_cm"}; static const char *atomicTypesStr[] = {"CMP_AND_SWAP", "FETCH_AND_ADD"}; // ----------------------------------------------------------------------------- const int str_link_layer(const char *str) { if (strncasecmp("IB", str, 2) == 0) return IBV_LINK_LAYER_INFINIBAND; else if (strncasecmp("Ethernet", str, 8) == 0) return IBV_LINK_LAYER_ETHERNET; else return LINK_FAILURE; } static int parse_mac_from_str(char *mac, u_int8_t *addr) { char tmpMac[MAC_LEN+1]; char *tmpField; int fieldNum = 0; if (strlen(mac) != MAC_LEN) { fprintf(stderr, "invalid MAC length\n"); return FAILURE; } if (addr == NULL) { fprintf(stderr, "invalid output addr array\n"); return FAILURE; } strcpy(tmpMac, mac); tmpField = strtok(tmpMac, ":"); while (tmpField != NULL && fieldNum < MAC_ARR_LEN) { char *chk; int tmpVal; tmpVal = strtoul(tmpField, &chk, HEX_BASE); if (tmpVal > 0xff) { fprintf(stderr, "field %d value %X out of range\n", fieldNum, tmpVal); return FAILURE; } if (*chk != 0) { fprintf(stderr, "Non-digit character %c (%0x) detected in field %d\n", *chk, *chk, fieldNum); return FAILURE; } addr[fieldNum++] = (u_int8_t) tmpVal; tmpField = strtok(NULL, ":"); } if (tmpField != NULL || fieldNum != MAC_ARR_LEN) { fprintf(stderr, "MAC address longer than six fields\n"); return FAILURE; } return SUCCESS; } int parse_ip_from_str(char *ip, u_int32_t *addr) { return inet_pton(AF_INET, ip, addr); } const char *link_layer_str(int8_t link_layer) { switch (link_layer) { case IBV_LINK_LAYER_UNSPECIFIED: case IBV_LINK_LAYER_INFINIBAND: return "IB"; case IBV_LINK_LAYER_ETHERNET: return "Ethernet"; default: return "Unknown"; } } void flow_rules_force_dependecies(struct perftest_parameters *user_param) { int min_iter_req = 0; if (user_param->flows != DEF_FLOWS) { if (user_param->is_server_port == OFF) { fprintf(stderr, " Flows feature works with UDP/TCP packets only for now\n"); exit(1); } if (user_param->test_type == ITERATIONS) { min_iter_req = user_param->flows * user_param->flows_burst; if (user_param->iters / min_iter_req < 1) { fprintf(stderr, " Current iteration number will not complete full cycle on all flows, it need to be multiple of the product between flows and flows_burst\n"); fprintf(stderr, " Set N*%d Iterations \n", user_param->flows * user_param->flows_burst); exit(FAILURE); } } if (user_param->tst == FS_RATE) { fprintf(stderr, "FS rate test not requiring flows parameter\n"); exit(FAILURE); } if (user_param->duplex) { fprintf(stderr, " Flows is currently designed to work with unidir tests only\n"); exit(FAILURE); } } else { if (user_param->flows_burst > 1) { fprintf(stderr, " Flows burst is designed to work with more then single flow\n"); exit(FAILURE); } } return; } void get_gbps_str_by_ibv_rate(char *rate_input_value, int *rate) { int i; for (i = 0; i < RATE_VALUES_COUNT; i++) { if (strcmp(rate_input_value, RATE_VALUES[i].rate_gbps_str) == 0) { *rate = (int)RATE_VALUES[i].rate_gbps_enum; return; } } printf("\x1b[31mThe input value for hw rate limit is not supported\x1b[0m\n"); /* print_supported_ibv_rate_values(); */ } static void change_conn_type(int *cptr, VerbType verb, const char *optarg) { if (*cptr == RawEth) return; if (strcmp(connStr[0], optarg) == 0) *cptr = RC; else if (strcmp(connStr[1], optarg) == 0) { *cptr = UC; if (verb == READ || verb == ATOMIC) { fprintf(stderr, " UC connection not possible in READ/ATOMIC verbs\n"); exit(1); } } else if (strcmp(connStr[2], optarg)==0) { *cptr = UD; if (verb != SEND) { fprintf(stderr, " UD connection only possible in SEND verb\n"); exit(1); } } else if(strcmp(connStr[3], optarg)==0) { *cptr = RawEth; } else if(strcmp(connStr[4], optarg)==0) { /* #ifdef HAVE_XRCD */ /* *cptr = XRC; */ /* #else */ fprintf(stderr, " XRC not detected in libibverbs\n"); exit(1); /* #endif */ } else if (strcmp(connStr[5], optarg)==0) { /* #ifdef HAVE_DC */ /* *cptr = DC; */ /* #else */ fprintf(stderr, " DC not detected in libibverbs\n"); exit(1); /* #endif */ } else { fprintf(stderr, " Invalid Connection type . please choose from {RC, UC, UD}\n"); exit(1); } } // ----------------------------------------------------------------------------- static int get_cache_line_size() { // HermitCore's cache line size is 64 (== DEF_CACHE_LINE_SIZE) int size = DEF_CACHE_LINE_SIZE; /* int size = 0; */ /* size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); */ /* if (size == 0) { */ /* #if defined(__sparc__) && defined(__arch64__) */ /* char* file_name = */ /* "/sys/devices/system/cpu/cpu0/l2_cache_line_size"; */ /* #else */ /* char* file_name = */ /* "/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size"; */ /* #endif */ /* FILE *fp; */ /* char line[10]; */ /* fp = fopen(file_name, "r"); */ /* if (fp == NULL) { */ /* return DEF_CACHE_LINE_SIZE; */ /* } */ /* if(fgets(line, 10, fp) != NULL) { */ /* size = atoi(line); */ /* fclose(fp); */ /* } */ /* } */ /* if (size <= 0) */ /* size = DEF_CACHE_LINE_SIZE; */ return size; } static void usage(const char *argv0, VerbType verb, TestType tst, int connection_type) { printf("Usage:\n"); if (tst != FS_RATE) { printf(" %s start a server and wait for connection\n", argv0); printf(" %s connect to server at \n", argv0); } else printf(" %s run a server to measure FS rate \n", argv0); printf("\n"); printf("Options:\n"); if (verb != ATOMIC && connection_type != RawEth) { printf(" -a, --all "); printf(" Run sizes from 2 till 2^23\n"); } if (verb == ATOMIC) { printf(" -A, --atomic_type= "); printf(" type of atomic operation from {CMP_AND_SWAP, FETCH_AND_ADD} (default FETCH_AND_ADD)\n"); } if (tst == BW) { printf(" -b, --bidirectional "); printf(" Measure bidirectional bandwidth (default unidirectional)\n"); } if (connection_type != RawEth) { if (verb == SEND) { printf(" -c, --connection= "); printf(" Connection type RC/XRC/UC/UD/DC (default RC)\n"); } else if (verb == WRITE) { printf(" -c, --connection= "); printf(" Connection type RC/XRC/UC/DC (default RC)\n"); } else if (verb == READ || verb == ATOMIC) { printf(" -c, --connection= "); printf(" Connection type RC/XRC/DC (default RC)\n"); } } if (tst == LAT) { printf(" -C, --report-cycles "); printf(" report times in cpu cycle units (default microseconds)\n"); } printf(" -d, --ib-dev= "); printf(" Use IB device (default first device found)\n"); printf(" -D, --duration "); printf(" Run test for a customized period of seconds.\n"); if (verb != WRITE && connection_type != RawEth) { printf(" -e, --events "); printf(" Sleep on CQ events (default poll)\n"); printf(" -X, --vector= "); printf(" Set used for events\n"); } printf(" -f, --margin "); printf(" measure results within margins. (default=2sec)\n"); printf(" -F, --CPU-freq "); printf(" Do not show a warning even if cpufreq_ondemand module is loaded, and cpu-freq is not on max.\n"); if (verb == SEND && tst != FS_RATE) { printf(" -g, --mcg "); printf(" Send messages to multicast group with 1 QP attached to it.\n"); } printf(" -h, --help "); printf(" Show this help screen.\n"); if (tst == LAT || tst == LAT_BY_BW || tst == FS_RATE) { printf(" -H, --report-histogram "); printf(" Print out all results (default print summary only)\n"); } printf(" -i, --ib-port= "); printf(" Use port of IB device (default %d)\n", DEF_IB_PORT); if (verb != READ && verb != ATOMIC) { printf(" -I, --inline_size= "); printf(" Max size of message to be sent in inline\n"); } if (tst == BW || tst == LAT_BY_BW) { printf(" -l, --post_list="); printf(" Post list of WQEs of size (instead of single post)\n"); } if (tst != FS_RATE) { if (connection_type == RawEth) { printf(" -m, --mtu= "); printf(" MTU size : 64 - 9600 (default port mtu)\n"); } else { printf(" -m, --mtu= "); printf(" MTU size : 256 - 4096 (default port mtu)\n"); } if (verb == SEND) { printf(" -M, --MGID= "); printf(" In multicast, uses as the group MGID.\n"); } } printf(" -n, --iters= "); printf(" Number of exchanges (at least %d, default %d)\n", MIN_ITER, ((verb == WRITE) && (tst == BW)) ? DEF_ITERS_WB : DEF_ITERS); if (tst == BW) { printf(" -N, --noPeak"); printf(" Cancel peak-bw calculation (default with peak up to iters=20000)\n"); } if (verb == READ || verb == ATOMIC) { printf(" -o, --outs= "); printf(" num of outstanding read/atom(default max of device)\n"); } if (tst == BW && connection_type != RawEth) { printf(" -O, --dualport "); printf(" Run test in dual-port mode.\n"); } printf(" -p, --port= "); printf(" Listen on/connect to port (default %d)\n", DEF_PORT); if (tst == BW ) { printf(" -q, --qp= Num of qp's(default %d)\n", DEF_NUM_QPS); } if (tst == BW) { printf(" -Q, --cq-mod "); printf(" Generate Cqe only after <--cq-mod> completion\n"); } if (verb == SEND && tst != FS_RATE) { printf(" -r, --rx-depth= "); printf(" Rx queue size (default %d).", DEF_RX_SEND); printf(" If using srq, rx-depth controls max-wr size of the srq\n"); } if (connection_type != RawEth) { printf(" -R, --rdma_cm "); printf(" Connect QPs with rdma_cm and run test on those QPs\n"); } if (verb != ATOMIC) { printf(" -s, --size= "); printf(" Size of message to exchange (default %d)\n", tst == LAT ? DEF_SIZE_LAT : DEF_SIZE_BW); } if (tst != FS_RATE) { printf(" -S, --sl= "); printf(" SL (default %d)\n", DEF_SL); if (tst == BW || tst == LAT_BY_BW) { printf(" -t, --tx-depth= "); printf(" Size of tx queue (default %d)\n", tst == LAT ? DEF_TX_LAT : DEF_TX_BW); } printf(" -T, --tos= "); printf(" Set to RDMA-CM QPs. available only with -R flag. values 0-256 (default off)\n"); } printf(" -u, --qp-timeout= "); printf(" QP timeout, timeout value is 4 usec * 2 ^(timeout), default %d\n", DEF_QP_TIME); if (tst == LAT || tst == LAT_BY_BW || tst == FS_RATE) { printf(" -U, --report-unsorted "); printf(" (implies -H) print out unsorted results (default sorted)\n"); } printf(" -V, --version "); printf(" Display version number\n"); if (tst == BW) { printf(" -w, --limit_bw= "); printf(" Set verifier limit for bandwidth\n"); } if (connection_type != RawEth) { printf(" -x, --gid-index= "); printf(" Test uses GID with GID index (Default : IB - no gid . ETH - 0)\n"); } if (tst == BW) { printf(" -y, --limit_msgrate= "); printf(" Set verifier limit for Msg Rate\n"); } if (connection_type != RawEth) { printf(" -z, --com_rdma_cm "); printf(" Communicate with rdma_cm module to exchange data - use regular QPs\n"); } /*Long flags*/ putchar('\n'); printf(" --cpu_util "); printf(" Show CPU Utilization in report, valid only in Duration mode \n"); if (tst != FS_RATE) { printf(" --dlid "); printf(" Set a Destination LID instead of getting it from the other side.\n"); } if (connection_type != RawEth) { printf(" --dont_xchg_versions "); printf(" Do not exchange versions and MTU with other side \n"); } if (tst != FS_RATE) { printf(" --force-link= "); printf(" Force the link(s) to a specific type: IB or Ethernet.\n"); } if (verb != WRITE) { printf(" --inline_recv= "); printf(" Max size of message to be sent in inline receive\n"); } if (connection_type != RawEth) { printf(" --ipv6 "); printf(" Use IPv6 GID. Default is IPv4\n"); } if (tst == LAT) { printf(" --latency_gap= "); printf(" delay time between each post send\n"); } if (connection_type != RawEth) { printf(" --mmap=file "); printf(" Use an mmap'd file as the buffer for testing P2P transfers.\n"); printf(" --mmap-offset= "); printf(" Use an mmap'd file as the buffer for testing P2P transfers.\n"); } if (tst == BW) { printf(" --mr_per_qp "); printf(" Create memory region for each qp.\n"); } #if defined HAVE_EX_ODP || defined HAVE_EXP_ODP printf(" --odp "); printf(" Use On Demand Paging instead of Memory Registration.\n"); #endif printf(" --output="); printf(" Set verbosity output level: bandwidth , message_rate, latency \n"); printf(" Latency measurement is Average calculation \n"); if (tst != FS_RATE) { printf(" --perform_warm_up"); printf(" Perform some iterations before start measuring in order to warming-up memory cache, valid in Atomic, Read and Write BW tests\n"); printf(" --pkey_index= PKey index to use for QP\n"); } if ( tst == BW ) { printf(" --report-both "); printf(" Report RX & TX results separately on Bidirectinal BW tests\n"); printf(" --report_gbits "); printf(" Report Max/Average BW of test in Gbit/sec (instead of MB/sec)\n"); if (connection_type != RawEth) { printf(" --report-per-port "); printf(" Report BW data on both ports when running Dualport and Duration mode\n"); } printf(" --reversed "); printf(" Reverse traffic direction - Server send to client\n"); printf(" --run_infinitely "); printf(" Run test forever, print results every seconds\n"); } if (connection_type != RawEth) { printf(" --retry_count= "); printf(" Set retry count value in rdma_cm mode\n"); } if (tst != FS_RATE) { printf(" --tclass= "); printf(" Set the Traffic Class in GRH (if GRH is in use)\n"); #ifdef HAVE_CUDA printf(" --use_cuda "); printf(" Use CUDA lib for GPU-Direct testing.\n"); #endif #ifdef HAVE_VERBS_EXP printf(" --use_exp "); printf(" Use Experimental verbs in data path. Default is OFF.\n"); #endif printf(" --use_hugepages "); printf(" Use Hugepages instead of contig, memalign allocations.\n"); #ifdef HAVE_ACCL_VERBS printf(" --use_res_domain "); printf(" Use shared resource domain\n"); printf(" --verb_type=