diff --git a/Makefile b/Makefile index d34b1fa..a4ccfb9 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ TARGETS = netem mark.so -OBJS = main.o probe.o emulate.o timing.o hist.o utils.o ts.o tc.o tcp.o dist.o +OBJS = main.o probe.o emulate.o timing.o hist.o utils.o ts.o tc.o tcp.o dist.o dist-maketable.o CC = gcc @@ -8,7 +8,6 @@ CFLAGS = -g -lrt -std=c99 -Wall CFLAGS += -I/usr/local/include/libnl3 CFLAGS += -I/usr/include/libnl3 -CFLAGS += -I./libnl/include LDLIBS = -lnl-3 -lnl-route-3 -lm diff --git a/README.md b/README.md index c182a5e..59ceebc 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,13 @@ # Netem Tool -This tool uses a 3-way TCP handshake to measure the real RTT of a TCP connection. -The gathered information can be used to configure the Linux network emulation queing discipline (see tc-netem(8)). +*Note:* This tool is in the alpha stage! -It is possible to directly pass the results of the RTT probes to the Kernel. -Therefore it allows to simulate an existing network connection in realtime. -Alternatively, the measurements can be stored and replayed later. +This tool uses a 3-way TCP handshake to measure the _real_ round-trip-time of a TCP connection. +The gathered information can be used to configure the Linux network emulation queuing discipline (see [tc-netem(8)](http://man7.org/linux/man-pages/man8/tc-netem.8.html)). -One use case might be to test your application, equipement, protocols against sudden changes of the link quality. -It also allows to generate custom delay distributions which can be used with tc-netem(8). +It is possible to directly pass the results of the RTT probes to the Kernel by using a netlink socket and [libnl](http://www.infradead.org/~tgr/libnl/). +Therefore it allows to simulate an existing network connection on-the-fly. +Alternatively, the measurements can be stored in a file and replayed later using Bash's IO redirection. ### Usage @@ -18,16 +17,24 @@ Run TCP SYN/ACK probes to measure round-trip-time (RTT): ./netem probe 8.8.8.8 53 > measurements.dat +The `probe` sub-command returns the following fields per line on STDOUT: + + current_rtt, mean, sigma + ###### Use case 2a: convert measurements into delay distribution table -Collect measurements to build a tc-netem(8) delay distribution table +Collect measurements to build a [tc-netem(8)](http://man7.org/linux/man-pages/man8/tc-netem.8.html) delay distribution table ./netem dist generate < measurements.dat > google_dns.dist +*Please note:* you might have to change the scaling by adjusting the compile time constants in `dist-maketable.h`! + ###### Use case 2b: generate distribution from measurements and load it to the Kernel ./netem dist load < probing.dat +*Please note:* you might have to change the scaling by adjusting the compile time constants in `dist-maketable.h`! + ###### Use case 3: on-the-fly link simulation The output of this command and be stored in a file or directly passed to the `emulate` subcommand: @@ -38,26 +45,77 @@ or ./netem emulate < measurements.dat +The emulate sub-command expects the following fields on STDIN seperated by whitespaces: + + current_rtt, mean, sigma, gap, loss_prob, loss_corr, reorder_prob, reorder_corr, corruption_prob, corruption_corr, duplication_prob, duplication_corr; + +At least the first three fields have to be given. The remaining ones are optional. + ###### Use case 4: Limit the effect of the network emulation to a specific application -To apply the network emulation only to a limit stream of packets, you can use the `mark` tool: +To apply the network emulation only to a limit stream of packets, you can use the `mark` tool. ./netem -m 0xCD dist load < measurements.dat sudo LD_PRELOAD=${PWD}/mark.so MARK=0xCD ping google.de -Please make sure the specify the environmental variables after the sudo command! +This tool uses the dynamic linker to hook into the `socket()` wrapper-function of libc (see `mark.c`). +Usually, the hook will simply call the original `socket(2)` syscall for non-AF_NET sockets. +But for AF_INET sockets, the hook will additionally call `setsockopt(sd, SOL_SOCKET, SO_MARK, ...)` after the socket has been created. + +Later on, the `netem` tool will use combination of the classfull `prio` qdisc and the `fw` classifier to limit the network emulation only to the _marked_ application (see use case 5, below). + +*Note:* Please make sure the specify the environmental variables after the sudo command! This is necessary, as `ping` is a SUID program. The dynamic linker strips certain enviromental variables (as `LD_PRELOAD`) for security reasons! +###### Use case 5: Show the current Traffic Controller setup + + ./tcdump.sh eth0 + + ======= eth0: qdisc ======== + qdisc prio 1: root refcnt 2 bands 4 priomap 2 3 3 3 2 3 1 1 2 2 2 2 2 2 2 2 + Sent 17304 bytes 126 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + qdisc netem 2: parent 1:1 limit 1000 delay 3.3ms 9.1ms + Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + ======= eth0: filter ======== + filter parent 1: protocol all pref 49152 fw + filter parent 1: protocol all pref 49152 fw handle 0xcd classid 1:1 + ======= eth0: class ======== + class prio 1:1 parent 1: leaf 2: + Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + class prio 1:2 parent 1: + Sent 15126 bytes 115 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + class prio 1:3 parent 1: + Sent 3270 bytes 17 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + class prio 1:4 parent 1: + Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + ### ToDo -Add more metrics: +##### More metrics: + +Add more metrics to the probing system: - loss - duplication - corruption - reordering + +##### Hardware Timestamping Support: + +There is experimental support for using Linux' HW / Kernelspace timestamping support (see `ts.c`). +This allows to measure the RTT by using the arrival / departure times of packets in the NIC or in the Kernel, instead of relying on the inaccuarate user space. + +Unfortunately, this hardware timestamping support requires special driver support. +Therefore it's still disabled. + ### Building ##### Install libnl diff --git a/dist-maketable.c b/dist-maketable.c new file mode 100644 index 0000000..386b405 --- /dev/null +++ b/dist-maketable.c @@ -0,0 +1,224 @@ +/* + * Experimental data distribution table generator + * Taken from the uncopyrighted NISTnet code (public domain). + * + * Read in a series of "random" data values, either + * experimentally or generated from some probability distribution. + * From this, create the inverse distribution table used to approximate + * the distribution. + * + * @link https://github.com/shemminger/iproute2/blob/master/netem/maketable.c + */ + +#define _POSIX_C_SOURCE 200809L + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dist-maketable.h" + +double * readdoubles(FILE *fp, int *number) +{ + struct stat info; + double *x; + int limit; + int n=0, i=0; + + fstat(fileno(fp), &info); + if (info.st_size > 0) + limit = 2 * info.st_size / sizeof(double); /* @@ approximate */ + else + limit = 10000; + + x = calloc(limit, sizeof(double)); + if (!x) + error(-1, 0, "Alloc"); + + size_t linelen = 0; + char *line; + + for (i = 0; i < limit; i++) { + if (getline(&line, &linelen, fp) > 0) { + if (line[0] == '#' || line[0] == '\r' || line[0] == '\n') + continue; + + fscanf(fp, "%lf", &x[i]); + + ++n; + } + else if (feof(fp)) + break; + } + + *number = n; + + return x; +} + +void arraystats(double *x, int limit, double *mu, double *sigma, double *rho) +{ + int n=0, i; + double sumsquare=0.0, sum=0.0, top=0.0; + double sigma2=0.0; + + for (i=0; i= DISTTABLESIZE) index = DISTTABLESIZE-1; + ++table[index]; + if (index > last) + last = index +1; + if (index < first) + first = index; + } + return table; +} + +/* replace an array by its cumulative distribution */ +void cumulativedist(int *table, int limit, int *total) +{ + int accum=0; + + while (--limit >= 0) { + accum += *table; + *table++ = accum; + } + *total = accum; +} + +short * inverttable(int *table, int inversesize, int tablesize, int cumulative) +{ + int i, inverseindex, inversevalue; + short *inverse; + double findex, fvalue; + + inverse = (short *)malloc(inversesize*sizeof(short)); + for (i=0; i < inversesize; ++i) { + inverse[i] = MINSHORT; + } + for (i=0; i < tablesize; ++i) { + findex = ((double)i/(double)DISTTABLEGRANULARITY) - DISTTABLEDOMAIN; + fvalue = (double)table[i]/(double)cumulative; + inverseindex = (int)rint(fvalue*inversesize); + inversevalue = (int)rint(findex*TABLEFACTOR); + if (inversevalue <= MINSHORT) inversevalue = MINSHORT+1; + if (inversevalue > MAXSHORT) inversevalue = MAXSHORT; + inverse[inverseindex] = inversevalue; + } + return inverse; + +} + +/* Run simple linear interpolation over the table to fill in missing entries */ +void interpolatetable(short *table, int limit) +{ + int i, j, last, lasti = -1; + + last = MINSHORT; + for (i=0; i < limit; ++i) { + if (table[i] == MINSHORT) { + for (j=i; j < limit; ++j) + if (table[j] != MINSHORT) + break; + if (j < limit) { + table[i] = last + (i-lasti)*(table[j]-last)/(j-lasti); + } else { + table[i] = last + (i-lasti)*(MAXSHORT-last)/(limit-lasti); + } + } else { + last = table[i]; + lasti = i; + } + } +} + +void printtable(const short *table, int limit) +{ + int i; + + printf("# This is the distribution table for the experimental distribution.\n"); + + for (i=0 ; i < limit; ++i) { + printf("%d%c", table[i], + (i % 8) == 7 ? '\n' : ' '); + } +} + +/*int +main(int argc, char **argv) +{ + FILE *fp; + double *x; + double mu, sigma, rho; + int limit; + int *table; + short *inverse; + int total; + + if (argc > 1) { + if (!(fp = fopen(argv[1], "r"))) { + perror(argv[1]); + exit(1); + } + } else { + fp = stdin; + } + x = readdoubles(fp, &limit); + if (limit <= 0) { + fprintf(stderr, "Nothing much read!\n"); + exit(2); + } + arraystats(x, limit, &mu, &sigma, &rho); +#ifdef DEBUG + fprintf(stderr, "%d values, mu %10.4f, sigma %10.4f, rho %10.4f\n", + limit, mu, sigma, rho); +#endif + + table = makedist(x, limit, mu, sigma); + free((void *) x); + cumulativedist(table, DISTTABLESIZE, &total); + inverse = inverttable(table, TABLESIZE, DISTTABLESIZE, total); + interpolatetable(inverse, TABLESIZE); + printtable(inverse, TABLESIZE); + return 0; +}*/ \ No newline at end of file diff --git a/dist-maketable.h b/dist-maketable.h new file mode 100644 index 0000000..8a24ef9 --- /dev/null +++ b/dist-maketable.h @@ -0,0 +1,53 @@ +/* + * Experimental data distribution table generator + * Taken from the uncopyrighted NISTnet code (public domain). + * + * Read in a series of "random" data values, either + * experimentally or generated from some probability distribution. + * From this, create the inverse distribution table used to approximate + * the distribution. + * + * @link https://github.com/shemminger/iproute2/blob/master/netem/maketable.c + */ + +#ifndef _DIST_MAKETABLE_H_ +#define _DIST_MAKETABLE_H_ + +#include + +/* Create a (normalized) distribution table from a set of observed + * values. The table is fixed to run from (as it happens) -4 to +4, + * with granularity .00002. + */ + +#define TABLESIZE 16384/4 +#define TABLEFACTOR 8192 +#ifndef MINSHORT +#define MINSHORT -32768 +#define MAXSHORT 32767 +#endif + +/* Since entries in the inverse are scaled by TABLEFACTOR, and can't be bigger + * than MAXSHORT, we don't bother looking at a larger domain than this: + */ +#define DISTTABLEDOMAIN ((MAXSHORT/TABLEFACTOR)+1) +#define DISTTABLEGRANULARITY 50000 +#define DISTTABLESIZE (DISTTABLEDOMAIN*DISTTABLEGRANULARITY*2) + +double * readdoubles(FILE *fp, int *number); + +void arraystats(double *x, int limit, double *mu, double *sigma, double *rho); + +int * makedist(double *x, int limit, double mu, double sigma); + +/* replace an array by its cumulative distribution */ +void cumulativedist(int *table, int limit, int *total); + +short * inverttable(int *table, int inversesize, int tablesize, int cumulative); + +/* Run simple linear interpolation over the table to fill in missing entries */ +void interpolatetable(short *table, int limit); + +void printtable(const short *table, int limit); + +#endif \ No newline at end of file diff --git a/dist.c b/dist.c index 27cf428..784eacf 100644 --- a/dist.c +++ b/dist.c @@ -9,20 +9,31 @@ * @license GPLv3 *********************************************************************************/ +#define _POSIX_C_SOURCE 1 +#include + #include #include +#include #include -#include #include #include +#include + +#include "netlink-private.h" +#include "dist-maketable.h" +#include "tc.h" +#include "config.h" + +#define SCH_NETEM_ATTR_DIST 0x2000 /** * Set the delay distribution. Latency/jitter must be set before applying. * @arg qdisc Netem qdisc. * @return 0 on success, error code on failure. */ -int rtnl_netem_set_delay_distribution_data(struct rtnl_qdisc *qdisc, double *data, size_t len) { +int rtnl_netem_set_delay_distribution_data(struct rtnl_qdisc *qdisc, short *data, size_t len) { struct rtnl_netem *netem; if (!(netem = rtnl_tc_data(TC_CAST(qdisc)))) @@ -35,7 +46,7 @@ int rtnl_netem_set_delay_distribution_data(struct rtnl_qdisc *qdisc, double *dat size_t i; for (i = 0; i < len; i++) - netem->qnm_dist.dist_data[n++] = data[i]; + netem->qnm_dist.dist_data[i] = data[i]; netem->qnm_dist.dist_size = len; netem->qnm_mask |= SCH_NETEM_ATTR_DIST; @@ -43,125 +54,113 @@ int rtnl_netem_set_delay_distribution_data(struct rtnl_qdisc *qdisc, double *dat return 0; } -/* Create a (normalized) distribution table from a set of observed - * values. The table is fixed to run from (as it happens) -4 to +4, - * with granularity .00002. - */ - -#define TABLESIZE 16384/4 -#define TABLEFACTOR 8192 -#ifndef MINSHORT -#define MINSHORT -32768 -#define MAXSHORT 32767 -#endif - -/* Since entries in the inverse are scaled by TABLEFACTOR, and can't be bigger - * than MAXSHORT, we don't bother looking at a larger domain than this: - */ -#define DISTTABLEDOMAIN ((MAXSHORT/TABLEFACTOR)+1) -#define DISTTABLEGRANULARITY 50000 -#define DISTTABLESIZE (DISTTABLEDOMAIN*DISTTABLEGRANULARITY*2) - -static int * makedist(double *x, int limit, double mu, double sigma) +static short * dist_make(FILE *fp, double *mu, double *sigma, double *rho) { + int limit; + double *x; int *table; - int i, index, first=DISTTABLESIZE, last=0; - double input; - - table = calloc(DISTTABLESIZE, sizeof(int)); - if (!table) { - perror("table alloc"); - exit(3); - } - - for (i=0; i < limit; ++i) { - /* Normalize value */ - input = (x[i]-mu)/sigma; - - index = (int)rint((input+DISTTABLEDOMAIN)*DISTTABLEGRANULARITY); - if (index < 0) index = 0; - if (index >= DISTTABLESIZE) index = DISTTABLESIZE-1; - ++table[index]; - if (index > last) - last = index +1; - if (index < first) - first = index; - } - return table; -} - -/* replace an array by its cumulative distribution */ -static void cumulativedist(int *table, int limit, int *total) -{ - int accum=0; - - while (--limit >= 0) { - accum += *table; - *table++ = accum; - } - *total = accum; -} - -static short * inverttable(int *table, int inversesize, int tablesize, int cumulative) -{ - int i, inverseindex, inversevalue; short *inverse; - double findex, fvalue; + int total; - inverse = (short *) malloc(inversesize * sizeof(short)); - for (i=0; i < inversesize; ++i) - inverse[i] = MINSHORT; + x = readdoubles(fp, &limit); + if (limit <= 0) + error(-1, 0, "Nothing much read!"); - for (i=0; i < tablesize; ++i) { - findex = ((double)i/(double)DISTTABLEGRANULARITY) - DISTTABLEDOMAIN; - fvalue = (double)table[i]/(double)cumulative; - inverseindex = (int)rint(fvalue*inversesize); - inversevalue = (int)rint(findex*TABLEFACTOR); - - if (inversevalue <= MINSHORT) - inversevalue = MINSHORT+1; - if (inversevalue > MAXSHORT) - inversevalue = MAXSHORT; - - inverse[inverseindex] = inversevalue; - } + arraystats(x, limit, mu, sigma, rho); + fprintf(stderr, "Read %d values, mu %10.4f, sigma %10.4f, rho %10.4f\n", + limit, *mu, *sigma, *rho); + + table = makedist(x, limit, *mu, *sigma); + free((void *) x); + + cumulativedist(table, DISTTABLESIZE, &total); + inverse = inverttable(table, TABLESIZE, DISTTABLESIZE, total); + interpolatetable(inverse, TABLESIZE); + return inverse; } -/* Run simple linear interpolation over the table to fill in missing entries */ -static void interpolatetable(short *table, int limit) +static int dist_generate(int argc, char *argv[]) { - int i, j, last, lasti = -1; + FILE *fp; + double mu, sigma, rho; - last = MINSHORT; - - for (i=0; i < limit; ++i) { - if (table[i] == MINSHORT) { - for (j=i; j < limit; ++j) { - if (table[j] != MINSHORT) - break; - } - - if (j < limit) - table[i] = last + (i-lasti)*(table[j]-last)/(j-lasti); - else - table[i] = last + (i-lasti)*(MAXSHORT-last)/(limit-lasti); - } - else { - last = table[i]; - lasti = i; - } + if (argc == 1) { + if (!(fp = fopen(argv[0], "r"))) + error(-1, errno, "Failed to open file: %s", argv[0]); } -} + else + fp = stdin; + + short *inverse = dist_make(fp, &mu, &sigma, &rho); + if (!inverse) + error(-1, 0, "Failed to generate distribution"); + + printtable(inverse, TABLESIZE); -int dist_generate(int argc, char *argv[]) -{ return 0; } -int dist_load(int argc, char *argv[]) +static int dist_load(int argc, char *argv[]) { + FILE *fp; + double mu, sigma, rho; + + if (argc == 1) { + if (!(fp = fopen(argv[0], "r"))) + error(-1, errno, "Failed to open file: %s", argv[0]); + } + else + fp = stdin; + + short *inverse = dist_make(fp, &mu, &sigma, &rho); + if (!inverse) + error(-1, 0, "Failed to generate distribution"); + + int ret; + + struct nl_sock *sock; + + struct rtnl_link *link; + struct rtnl_tc *qdisc_prio = NULL; + struct rtnl_tc *qdisc_netem = NULL; + struct rtnl_tc *cls_fw = NULL; + + /* Create connection to netlink */ + sock = nl_socket_alloc(); + nl_connect(sock, NETLINK_ROUTE); + + /* Get interface */ + link = tc_get_link(sock, cfg.dev); + if (!link) + error(-1, 0, "Interface does not exist: %s", cfg.dev); + + /* Reset TC subsystem */ + ret = tc_reset(sock, link); + if (ret && ret != -NLE_OBJ_NOTFOUND) + error(-1, 0, "Failed to reset TC: %s", nl_geterror(ret)); + + /* Setup TC subsystem */ + if ((ret = tc_prio(sock, link, &qdisc_prio))) + error(-1, 0, "Failed to setup TC: prio qdisc: %s", nl_geterror(ret)); + + if ((ret = tc_classifier(sock, link, &cls_fw, cfg.mark, cfg.mask))) + error(-1, 0, "Failed to setup TC: fw filter: %s", nl_geterror(ret)); + + if ((ret = tc_netem(sock, link, &qdisc_netem))) + error(-1, 0, "Failed to setup TC: netem qdisc: %s", nl_geterror(ret)); + + /* We will use the default normal distribution for now */ + if (rtnl_netem_set_delay_distribution_data((struct rtnl_qdisc *) qdisc_netem, inverse, TABLESIZE)) + error(-1, 0, "Failed to set netem delay distrubtion: %s", nl_geterror(ret)); + + rtnl_netem_set_delay((struct rtnl_qdisc *) qdisc_netem, mu); + rtnl_netem_set_jitter((struct rtnl_qdisc *) qdisc_netem, sigma); + + nl_close(sock); + nl_socket_free(sock); + return 0; } @@ -176,4 +175,6 @@ int dist(int argc, char *argv[]) return dist_generate(argc-1, argv+1); else if (!strcmp(subcmd, "load")) return dist_load(argc-1, argv+1); + else + return -1; } \ No newline at end of file diff --git a/emulate.c b/emulate.c index cbc6a29..15e2a05 100644 --- a/emulate.c +++ b/emulate.c @@ -12,6 +12,8 @@ #include #include +#include + #include #include @@ -19,24 +21,87 @@ #include "config.h" #include "timing.h" +enum input_fields { + CURRENT_RTT, + MEAN, + SIGMA, + GAP, + LOSS_PROB, + LOSS_CORR, + REORDER_PROB, + REORDER_CORR, + CORRUPTION_PROB, + CORRUPTION_CORR, + DUPLICATION_PROB, + DUPLICATION_CORR, + MAXFIELDS +}; + +static int emulate_parse_line(char *line, struct rtnl_tc *tc) +{ + double val; + char *cur, *end = line; + int i = 0; + + struct rtnl_qdisc *ne = (struct rtnl_qdisc *) tc; + + do { + cur = end; + val = strtod(cur, &end); + + switch (i) { + case CURRENT_RTT: + rtnl_netem_set_delay(ne, val * 1e6 / 2); + break; /* we approximate: delay = RTT / 2 */ + case MEAN: + break; /* ignored */ + case SIGMA: + rtnl_netem_set_jitter(ne, val * 1e6 + 1); + break; + case GAP: + rtnl_netem_set_gap(ne, val); + break; + case LOSS_PROB: + rtnl_netem_set_loss(ne, val); + break; + case LOSS_CORR: + rtnl_netem_set_loss_correlation(ne, val); + break; + case REORDER_PROB: + rtnl_netem_set_reorder_probability(ne, val); + break; + case REORDER_CORR: + rtnl_netem_set_reorder_correlation(ne, val); + break; + case CORRUPTION_PROB: + rtnl_netem_set_corruption_probability(ne, val); + break; + case CORRUPTION_CORR: + rtnl_netem_set_corruption_correlation(ne, val); + break; + case DUPLICATION_PROB: + rtnl_netem_set_duplicate(ne, val); + break; + case DUPLICATION_CORR: + rtnl_netem_set_duplicate_correlation(ne, val); + break; + } + } while (cur != end && ++i < MAXFIELDS); + + + return (i >= 3) ? 0 : -1; /* we need at least 3 fields: rtt + jitter */ +} + int emulate(int argc, char *argv[]) { - int ret; + int ret, tfd, run = 0; struct nl_sock *sock; - struct nl_dump_params dp_param = { - .dp_type = NL_DUMP_STATS, - .dp_fd = stdout - }; + struct rtnl_link *link; struct rtnl_tc *qdisc_prio = NULL; struct rtnl_tc *qdisc_netem = NULL; struct rtnl_tc *cls_fw = NULL; - - struct tc_netem netem = { - .limit = cfg.limit, - .delay = 100000 - }; /* Create connection to netlink */ sock = nl_socket_alloc(); @@ -53,70 +118,66 @@ int emulate(int argc, char *argv[]) error(-1, 0, "Failed to reset TC: %s", nl_geterror(ret)); /* Setup TC subsystem */ - ret = tc_prio(sock, link, &qdisc_prio); - if (ret) + if ((ret = tc_prio(sock, link, &qdisc_prio))) error(-1, 0, "Failed to setup TC: prio qdisc: %s", nl_geterror(ret)); - ret = tc_classifier(sock, link, &cls_fw, cfg.mark, cfg.mask); - if (ret) + if ((ret = tc_classifier(sock, link, &cls_fw, cfg.mark, cfg.mask))) error(-1, 0, "Failed to setup TC: fw filter: %s", nl_geterror(ret)); - ret = tc_netem(sock, link, &qdisc_netem, &netem); - if (ret) + if ((ret = tc_netem(sock, link, &qdisc_netem))) error(-1, 0, "Failed to setup TC: netem qdisc: %s", nl_geterror(ret)); /* We will use the default normal distribution for now */ - rtnl_netem_set_delay_distribution(qdisc_netem, "normal"); + if (rtnl_netem_set_delay_distribution((struct rtnl_qdisc *) qdisc_netem, "normal")) + error(-1, 0, "Failed to set netem delay distrubtion: %s", nl_geterror(ret)); + + rtnl_netem_set_limit((struct rtnl_qdisc *) qdisc_netem, 0); /* Start timer */ - struct itimerspec its = { - .it_interval = time_from_double(1 / cfg.rate), - .it_value = { 1, 0 } - }; - - int tfd = timerfd_create(CLOCK_REALTIME, 0); - if (tfd < 0) - error(-1, errno, "Failed to create timer"); - - if (timerfd_settime(tfd, 0, &its, NULL)) - error(-1, errno, "Failed to start timer"); + if ((tfd = timerfd_init(cfg.rate)) < 0) + error(-1, errno, "Failed to initilize timer"); char *line = NULL; size_t linelen = 0; + ssize_t len; - unsigned run = 0; - while (!cfg.limit || run < cfg.limit) { - float rtt, mu, sigma; + do { +#if 0 + struct nl_dump_params dp_param = { + .dp_type = NL_DUMP_DETAILS, + .dp_fd = stdout + }; - /* Show queuing discipline statistics */ - rtnl_tc_dump_stats(qdisc_netem, &dp_param); + nl_object_dump((struct nl_object *) qdisc_netem, &dp_param); + nl_object_dump((struct nl_object *) qdisc_prio, &dp_param); + nl_object_dump((struct nl_object *) cls_fw, &dp_param); +#endif + tc_print_netem(qdisc_netem); - /* Parse new data */ - if (feof(stdin) || getline(&line, &linelen, stdin) == -1) +next_line: len = getline(&line, &linelen, stdin); + if (len < 0 && errno == ENOENT) + break; /* EOF => quit */ + else if (len < 0) error(-1, errno, "Failed to read data from stdin"); - if (line[0] == '#') - continue; + if (line[0] == '#' || line[0] == '\r' || line[0] == '\n') + goto next_line; - if (sscanf(line, "%f %f %f ", &rtt, &mu, &sigma) != 3) - error(-1, 0, "Invalid data format"); - - /* Update the netem config according to the measurements */ - /* TODO: Add more characteristics */ - netem.delay = (mu / 2) * 1e6; - netem.jitter = sigma * 1e6; - - ret = tc_netem(sock, link, &qdisc_netem, &netem); + if (emulate_parse_line(line, qdisc_netem)) + error(-1, 0, "Failed to parse stdin"); + + ret = tc_netem(sock, link, &qdisc_netem); if (ret) error(-1, 0, "Failed to update TC: netem qdisc: %s", nl_geterror(ret)); - timerfd_wait(tfd); - run++; - } + run = timerfd_wait(tfd); + } while (!cfg.limit || run < cfg.limit); /* Shutdown */ + free(line); + nl_close(sock); nl_socket_free(sock); - + return 0; } \ No newline at end of file diff --git a/google_dns.data b/google_dns.data new file mode 100644 index 0000000..c0e14e7 --- /dev/null +++ b/google_dns.data @@ -0,0 +1,85 @@ +0.007793 0.007793 0.000000 +0.012111 0.009952 0.003053 +0.007713 0.009206 0.002516 +0.007720 0.008834 0.002185 +0.010580 0.009183 0.002047 +0.006892 0.008801 0.002056 +0.007658 0.008638 0.001926 +0.021541 0.010251 0.004898 +0.006646 0.009850 0.004737 +0.006649 0.009530 0.004579 +0.007001 0.009300 0.004411 +0.006629 0.009078 0.004275 +0.022691 0.010125 0.005569 +0.006482 0.009865 0.005438 +0.006838 0.009663 0.005298 +0.007332 0.009517 0.005152 +0.007372 0.009391 0.005015 +0.006467 0.009229 0.004914 +0.007457 0.009135 0.004793 +0.006506 0.009004 0.004702 +# Probing: 8.8.8.8 on port 53 +# Started: Wed, 22 Jul 2015 16:26:39 +0200 +# RTT mu sigma (units in S) +0.006968 0.000000 0.000000 +0.007741 0.007741 0.000000 +0.010410 0.009076 0.001887 +0.007241 0.008464 0.001704 +0.006460 0.007963 0.001715 +0.007150 0.007801 0.001529 +0.010674 0.008280 0.001802 +0.006617 0.008042 0.001761 +0.006874 0.007896 0.001681 +0.007532 0.007856 0.001578 +0.006529 0.007723 0.001545 +0.040176 0.010673 0.009894 +0.007170 0.010381 0.009488 +0.006634 0.010093 0.009143 +0.007105 0.009880 0.008821 +0.006902 0.009681 0.008535 +0.006268 0.009468 0.008289 +0.007247 0.009337 0.008044 +0.006843 0.009199 0.007826 +0.007542 0.009112 0.007615 +0.007043 0.009008 0.007426 +0.007207 0.008922 0.007249 +0.006725 0.008822 0.007090 +0.007246 0.008754 0.006935 +0.025455 0.009450 0.007591 +0.011946 0.009550 0.007448 +0.006728 0.009441 0.007318 +0.006563 0.009335 0.007197 +0.007007 0.009251 0.007077 +0.007522 0.009192 0.006956 +0.006391 0.009098 0.006855 +0.006703 0.009021 0.006753 +0.010147 0.009056 0.006646 +0.006508 0.008979 0.006557 +0.008520 0.008966 0.006457 +0.006604 0.008898 0.006374 +0.007099 0.008848 0.006289 +0.007183 0.008803 0.006207 +0.062674 0.010221 0.010671 +0.006785 0.010133 0.010544 +0.007262 0.010061 0.010417 +0.007390 0.009996 0.010295 +0.006266 0.009907 0.010185 +0.007565 0.009853 0.010069 +0.007859 0.009807 0.009956 +0.006778 0.009740 0.009852 +0.026690 0.010108 0.010058 +0.006794 0.010038 0.009960 +0.006620 0.009967 0.009865 +0.007763 0.009922 0.009767 +0.006820 0.009860 0.009677 +0.006866 0.009801 0.009589 +0.023273 0.010060 0.009676 +0.007387 0.010010 0.009590 +0.006900 0.009952 0.009509 +0.008651 0.009928 0.009422 +0.010079 0.009931 0.009336 +0.039580 0.010451 0.010051 +0.006095 0.010376 0.009979 +0.007297 0.010324 0.009900 +0.006600 0.010262 0.009828 +Goodbye! diff --git a/hist.c b/hist.c index 803f6a6..84adef8 100644 --- a/hist.c +++ b/hist.c @@ -99,29 +99,29 @@ double hist_stddev(struct hist *h) return sqrt(hist_var(h)); } -void hist_print(struct hist *h) +void hist_print(struct hist *h, FILE *f) { - printf("Total: %u values\n", h->total); - printf("Highest value: %f\n", h->highest); - printf("Lowest value: %f\n", h->lowest); - printf("Mean: %f\n", hist_mean(h)); - printf("Variance: %f\n", hist_var(h)); - printf("Standard derivation: %f\n", hist_stddev(h)); + fprintf(f, "Total: %u values\n", h->total); + fprintf(f, "Highest value: %f\n", h->highest); + fprintf(f, "Lowest value: %f\n", h->lowest); + fprintf(f, "Mean: %f\n", hist_mean(h)); + fprintf(f, "Variance: %f\n", hist_var(h)); + fprintf(f, "Standard derivation: %f\n", hist_stddev(h)); if (h->higher > 0) - printf("Missed: %u values above %f\n", h->higher, h->high); + fprintf(f, "Missed: %u values above %f\n", h->higher, h->high); if (h->lower > 0) - printf("Missed: %u values below %f\n", h->lower, h->low); + fprintf(f, "Missed: %u values below %f\n", h->lower, h->low); if (h->total - h->higher - h->lower > 0) { - hist_plot(h); - char buf[(h->length + 1) * 8]; hist_dump(h, buf, sizeof(buf)); - printf(buf); + fprintf(f, "Matlab data: %s\n", buf); + + hist_plot(h, f); } } -void hist_plot(struct hist *h) +void hist_plot(struct hist *h, FILE *f) { char buf[HIST_HEIGHT]; memset(buf, '#', sizeof(buf)); @@ -135,13 +135,13 @@ void hist_plot(struct hist *h) } /* Print plot */ - printf("%3s | %9s | %5s | %s\n", "#", "Value", "Occur", "Plot"); - printf("--------------------------------------------------------------------------------\n"); + fprintf(f, "%3s | %9s | %5s | %s\n", "#", "Value", "Occur", "Plot"); + fprintf(f, "--------------------------------------------------------------------------------\n"); for (int i = 0; i < h->length; i++) { int bar = HIST_HEIGHT * ((double) h->data[i] / max); - printf("%3u | %+5.2e | " "%5u" " | %.*s\n", i, VAL(h, i), h->data[i], bar, buf); + fprintf(f, "%3u | %+5.2e | " "%5u" " | %.*s\n", i, VAL(h, i), h->data[i], bar, buf); } } diff --git a/hist.h b/hist.h index 6b1541c..ae0f95b 100644 --- a/hist.h +++ b/hist.h @@ -70,10 +70,10 @@ double hist_mean(struct hist *h); double hist_stddev(struct hist *h); /** Print all statistical properties of distribution including a graphilcal plot of the histogram. */ -void hist_print(struct hist *h); +void hist_print(struct hist *h, FILE *f); /** Print ASCII style plot of histogram */ -void hist_plot(struct hist *h); +void hist_plot(struct hist *h, FILE *f); /** Dump histogram data in Matlab format to buf */ void hist_dump(struct hist *h, char *buf, int len); diff --git a/main.c b/main.c index 32c4d8d..e6a2e38 100644 --- a/main.c +++ b/main.c @@ -34,6 +34,7 @@ struct config cfg = { int probe(int argc, char *argv[]); int emulate(int argc, char *argv[]); +int dist(int argc, char *argv[]); void quit(int sig, siginfo_t *si, void *ptr) { @@ -47,7 +48,7 @@ int main(int argc, char *argv[]) printf( "usage: %s CMD [OPTIONS]\n" " CMD can be one of:\n\n" " probe IP PORT Start TCP SYN+ACK RTT probes and write measurements data to STDOUT\n" - " live Read measurement data from STDIN and configure Kernel (tc-netem(8)) on-the-fly.\n" + " emulate Read measurement data from STDIN and configure Kernel (tc-netem(8)) on-the-fly.\n" " This mode only uses the mean and standard deviation of of the previous samples\n" " to configure the netem qdisc. This can be used to interactively replicate a network link.\n" "\n" @@ -129,7 +130,7 @@ check: if (!strcmp(cmd, "probe")) return probe(argc-optind-1, argv+optind+1); - else if (!strcmp(cmd, "live")) + else if (!strcmp(cmd, "emulate")) return emulate(argc-optind-1, argv+optind+1); else if (!strcmp(cmd, "dist")) return dist(argc-optind-1, argv+optind+1); diff --git a/netlink-private.h b/netlink-private.h new file mode 100644 index 0000000..367e75a --- /dev/null +++ b/netlink-private.h @@ -0,0 +1,66 @@ +/** Some private libnl3 headers + * + * Those are required for: + * + * Based on libnl3 3.2.26 + * + * @author Steffen Vogel + * @copyright 2014-2015, Steffen Vogel + * @license GPLv3 + *********************************************************************************/ + +#ifndef _NETLINK_PRIVATE_H_ +#define _NETLINK_PRIVATE_H_ + +#include + +struct rtnl_netem_corr +{ + uint32_t nmc_delay; + uint32_t nmc_loss; + uint32_t nmc_duplicate; +}; + +struct rtnl_netem_reo +{ + uint32_t nmro_probability; + uint32_t nmro_correlation; +}; + +struct rtnl_netem_crpt +{ + uint32_t nmcr_probability; + uint32_t nmcr_correlation; +}; + +struct rtnl_netem_dist +{ + int16_t * dist_data; + size_t dist_size; +}; + +struct rtnl_netem +{ + uint32_t qnm_latency; + uint32_t qnm_limit; + uint32_t qnm_loss; + uint32_t qnm_gap; + uint32_t qnm_duplicate; + uint32_t qnm_jitter; + uint32_t qnm_mask; + struct rtnl_netem_corr qnm_corr; + struct rtnl_netem_reo qnm_ro; + struct rtnl_netem_crpt qnm_crpt; + struct rtnl_netem_dist qnm_dist; +}; + +void *rtnl_tc_data(struct rtnl_tc *tc); + +#define BUG() \ + do { \ + fprintf(stderr, "BUG at file position %s:%d:%s\n", \ + __FILE__, __LINE__, __PRETTY_FUNCTION__); \ + assert(0); \ + } while (0) + +#endif \ No newline at end of file diff --git a/probe.c b/probe.c index 6cc0e97..ecb7484 100644 --- a/probe.c +++ b/probe.c @@ -108,6 +108,8 @@ retry: len = ts_recvmsg(sd, &msgh, 0, &ts_ack); int probe(int argc, char *argv[]) { + int run = 0, tfd; + /* Parse address */ struct nl_addr *addr; struct sockaddr_in sin; @@ -138,9 +140,9 @@ int probe(int argc, char *argv[]) /* Enable Kernel TS support */ if (ts_enable_if("lo")) - perror("Failed to enable timestamping"); + fprintf(stderr, "Failed to enable timestamping: %s\n", strerror(errno)); if (ts_enable_sd(sd)) - perror("Failed to set SO_TIMESTAMPING"); + fprintf(stderr, "Failed to set SO_TIMESTAMPING: %s\n", strerror(errno)); /* Prepare payload */ struct timespec ts; @@ -150,20 +152,10 @@ int probe(int argc, char *argv[]) hist_create(&hist, 0, 0, 1); /* Start timer */ - struct itimerspec its = { - .it_interval = time_from_double(1 / cfg.rate), - .it_value = { 1, 0 } - }; - - int tfd = timerfd_create(CLOCK_REALTIME, 0); - if (tfd < 0) - error(-1, errno, "Failed to create timer"); - - if (timerfd_settime(tfd, 0, &its, NULL)) - error(-1, errno, "Failed to start timer"); + if ((tfd = timerfd_init(cfg.rate)) < 0) + error(-1, errno, "Failed to initilize timer"); - unsigned run = 0; - while (cfg.limit && run < cfg.limit) { + do { probe_tcp(sd, dport, &ts); double rtt = time_to_double(&ts); @@ -177,7 +169,7 @@ int probe(int argc, char *argv[]) double span = hist.highest - hist.lowest; hist_destroy(&hist); hist_create(&hist, MAX(0, hist.lowest - span * 0.1), hist.highest + span * 0.2, span / 20); - printf("Created new histogram: high=%f, low=%f, buckets=%u\n", + fprintf(stderr, "Created new histogram: high=%f, low=%f, buckets=%u\n", hist.high, hist.low, hist.length); /* Print header for output */ @@ -192,14 +184,14 @@ int probe(int argc, char *argv[]) printf("# Started: %s\n", date); printf("# RTT mu sigma (units in S)\n"); } - //else if (run > 20) - printf("%f %f %f\n", rtt, hist_mean(&hist), hist_stddev(&hist)); - - timerfd_wait(tfd); - run++; - } + + printf("%f %f %f\n", rtt, hist_mean(&hist), hist_stddev(&hist)); + fflush(stdout); + + run += timerfd_wait(tfd); + } while (cfg.limit && run < cfg.limit); - hist_print(&hist); + hist_print(&hist, stderr); hist_destroy(&hist); return 0; diff --git a/tc.c b/tc.c index f9c1034..8ddfff8 100644 --- a/tc.c +++ b/tc.c @@ -5,6 +5,9 @@ * @license GPLv3 *********************************************************************************/ +#define _POSIX_C_SOURCE 1 +#include + #include #include #include @@ -50,7 +53,7 @@ int tc_prio(struct nl_sock *sock, struct rtnl_link *link, struct rtnl_tc **tc) return ret; } -int tc_netem(struct nl_sock *sock, struct rtnl_link *link, struct rtnl_tc **tc, struct tc_netem *ne) +int tc_netem(struct nl_sock *sock, struct rtnl_link *link, struct rtnl_tc **tc) { struct rtnl_qdisc *q; @@ -65,21 +68,6 @@ int tc_netem(struct nl_sock *sock, struct rtnl_link *link, struct rtnl_tc **tc, else q = (struct rtnl_qdisc *) (*tc); - rtnl_netem_set_limit(q, ne->limit); - rtnl_netem_set_gap(q, ne->gap); - rtnl_netem_set_reorder_probability(q, ne->reorder_prob); - rtnl_netem_set_reorder_correlation(q, ne->reorder_corr); - rtnl_netem_set_corruption_probability(q, ne->corruption_prob); - rtnl_netem_set_corruption_correlation(q, ne->corruption_corr); - rtnl_netem_set_loss(q, ne->loss_prob); - rtnl_netem_set_loss_correlation(q, ne->loss_corr); - rtnl_netem_set_duplicate(q, ne->duplication_prob); - rtnl_netem_set_duplicate_correlation(q, ne->duplication_corr); - rtnl_netem_set_delay(q, ne->delay); - rtnl_netem_set_jitter(q, ne->jitter); - rtnl_netem_set_delay_correlation(q, ne->delay_corr); - //rtnl_netem_set_delay_distribution(q, ne->delay_distr); - int ret = rtnl_qdisc_add(sock, q, NLM_F_CREATE); *tc = TC_CAST(q); @@ -141,3 +129,54 @@ int tc_print_stats(struct tc_stats *stats) { return 0; } + +int tc_print_netem(struct rtnl_tc *tc) +{ + struct rtnl_qdisc *ne = (struct rtnl_qdisc *) tc; + + if (rtnl_netem_get_limit(ne) > 0) + printf("limit %upkts", rtnl_netem_get_limit(ne)); + + if (rtnl_netem_get_delay(ne) > 0) { + printf("delay %fms ", rtnl_netem_get_delay(ne) / 1000.0); + + if (rtnl_netem_get_jitter(ne) > 0) { + printf("jitter %fms ", rtnl_netem_get_jitter(ne) / 1000.0); + + if (rtnl_netem_get_delay_correlation(ne) > 0) + printf("%u%% ", rtnl_netem_get_delay_correlation(ne)); + } + } + + if (rtnl_netem_get_loss(ne) > 0) { + printf("loss %u%% ", rtnl_netem_get_loss(ne)); + + if (rtnl_netem_get_loss_correlation(ne) > 0) + printf("%u%% ", rtnl_netem_get_loss_correlation(ne)); + } + + if (rtnl_netem_get_reorder_probability(ne) > 0) { + printf(" reorder%u%% ", rtnl_netem_get_reorder_probability(ne)); + + if (rtnl_netem_get_reorder_correlation(ne) > 0) + printf("%u%% ", rtnl_netem_get_reorder_correlation(ne)); + } + + if (rtnl_netem_get_corruption_probability(ne) > 0) { + printf("corruption %u%% ", rtnl_netem_get_corruption_probability(ne)); + + if (rtnl_netem_get_corruption_correlation(ne) > 0) + printf("%u%% ", rtnl_netem_get_corruption_correlation(ne)); + } + + if (rtnl_netem_get_duplicate(ne) > 0) { + printf("duplication %u%% ", rtnl_netem_get_duplicate(ne)); + + if (rtnl_netem_get_duplicate_correlation(ne) > 0) + printf("%u%% ", rtnl_netem_get_duplicate_correlation(ne)); + } + + printf("\n"); + + return 0; +} diff --git a/tc.h b/tc.h index 15caf3e..7a698b5 100644 --- a/tc.h +++ b/tc.h @@ -12,23 +12,6 @@ #include #include -struct tc_netem { - int limit; - int gap; - int reorder_prob; - int reorder_corr; - int corruption_prob; - int corruption_corr; - int loss_prob; - int loss_corr; - int duplication_prob; - int duplication_corr; - int jitter; - int delay; - int delay_corr; - char *delay_distr; -}; - /*struct tc_stats { uint64_t packets; // Number of packets seen. uint64_t bytes; // Total bytes seen. @@ -45,7 +28,7 @@ struct rtnl_link * tc_get_link(struct nl_sock *sock, const char *dev); int tc_prio(struct nl_sock *sock, struct rtnl_link *link, struct rtnl_tc **tc); -int tc_netem(struct nl_sock *sock, struct rtnl_link *link, struct rtnl_tc **tc, struct tc_netem *ne); +int tc_netem(struct nl_sock *sock, struct rtnl_link *link, struct rtnl_tc **tc); int tc_classifier(struct nl_sock *sock, struct rtnl_link *link, struct rtnl_tc **tc, int mark, int mask); @@ -55,4 +38,6 @@ int tc_get_stats(struct nl_sock *sock, struct rtnl_tc *tc, struct tc_stats *stat int tc_print_stats(struct tc_stats *stats); +int tc_print_netem(struct rtnl_tc *tc); + #endif \ No newline at end of file diff --git a/dump.sh b/tcdump.sh similarity index 100% rename from dump.sh rename to tcdump.sh diff --git a/timing.c b/timing.c index ac7498a..5380d10 100644 --- a/timing.c +++ b/timing.c @@ -14,6 +14,23 @@ #include "timing.h" +int timerfd_init(double rate) +{ + struct itimerspec its = { + .it_interval = time_from_double(1 / rate), + .it_value = { 1, 0 } + }; + + int tfd = timerfd_create(CLOCK_REALTIME, 0); + if (tfd < 0) + return -1; + + if (timerfd_settime(tfd, 0, &its, NULL)) + return -1; + + return tfd; +} + uint64_t timerfd_wait(int fd) { uint64_t runs; diff --git a/timing.h b/timing.h index afbb3f8..d590d37 100644 --- a/timing.h +++ b/timing.h @@ -11,6 +11,8 @@ #include #include +int timerfd_init(double rate); + /** Wait until timer elapsed * * @param fd A file descriptor which was created by timerfd_create(3).