VILLASnode/tests/benchmarks/evaluate_logs.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Benchmark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Settings\n",
    "root_dir = '/home/dennis/VILLASnode/tests/benchmarks'\n",
    "benchmark_dir = 'benchmarks_20180803_11-43-33'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load all files\n",
    "### Results\n",
    "...\n",
    "\n",
    "### Source log\n",
    "..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loaded input file: /home/dennis/VILLASnode/tests/benchmarks/benchmarks_20180803_11-43-33/0_TCP-3-50000-500000_input.csv\n",
      "Loaded input file: /home/dennis/VILLASnode/tests/benchmarks/benchmarks_20180803_11-43-33/1_UDP-3-50000-500000_input.csv\n",
      "Loaded output file: /home/dennis/VILLASnode/tests/benchmarks/benchmarks_20180803_11-43-33/0_TCP-3-50000-500000_output.csv\n",
      "Loaded output file: /home/dennis/VILLASnode/tests/benchmarks/benchmarks_20180803_11-43-33/1_UDP-3-50000-500000_output.csv\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import os\n",
    "import re\n",
    "\n",
    "# First, source log\n",
    "\n",
    "# Initialize arrays\n",
    "files_array = []\n",
    "input_paths = []\n",
    "input_array = []\n",
    "output_paths = []\n",
    "output_array = []\n",
    "\n",
    "# Save complete path of files in an array\n",
    "for subdir, dirs, files in os.walk(root_dir+'/'+benchmark_dir):\n",
    "    for file in files:\n",
    "        # Regex to match .csv files\n",
    "        if re.match(r'.*?_input.csv', file, re.M|re.I):\n",
    "            input_paths.append(os.path.join(subdir, file))\n",
    "        # Regex to match .log files\n",
    "        elif re.match(r'.*?_output.csv', file, re.M|re.I):\n",
    "            output_paths.append(os.path.join(subdir, file))\n",
    "        \n",
    "    filles_array = files\n",
    "\n",
    "# INPUT\n",
    "# Loop through array with result files and save the comma separated data into a new array\n",
    "for file in input_paths:\n",
    "    print(\"Loaded input file: {}\".format(file))\n",
    "    \n",
    "    # Load file \n",
    "    input_array.append(np.genfromtxt(file, delimiter=','))\n",
    "    \n",
    "# OUTPUT\n",
    "# Loop through array with result files and save the comma separated data into a new array\n",
    "for file in output_paths:\n",
    "    print(\"Loaded output file: {}\".format(file))\n",
    "    \n",
    "    # Load file \n",
    "    output_array.append(np.genfromtxt(file, delimiter=','))\n",
    "\n",
    "# Small sanity check, are arrays of the same size?\n",
    "if len(input_array) != len(output_array):\n",
    "    print(\"Error: There should be as many input files as there are output files!\")\n",
    "    exit();"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Save characteristics of tests\n",
    "All important settings are contained in the name of the file. We will save them in a separate array. The structure of the name is as follows:\n",
    "\n",
    "```bash\n",
    "root_dir/benchmarks_${DATE}/${ID}_${MODE}-${VALUES IN SMP}-${RATE}-${SENT SMPS}\n",
    "```\n",
    "\n",
    "Thus, we will structure it in the settings_array as follows:\n",
    "\n",
    "* `settings_array[*][0] = ID`\n",
    "* `settings_array[*][1] = MODE`\n",
    "* `settings_array[*][2] = VALUES IN SAMPLE`\n",
    "* `settings_array[*][3] = RATE`\n",
    "* `settings_array[*][4] = TOTAL NUMBER OF SAMPLES`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['0', 'TCP', '3', '50000', '500000']\n",
      "['1', 'UDP', '3', '50000', '500000']\n"
     ]
    }
   ],
   "source": [
    "# Array with settings\n",
    "settings_array = []\n",
    "\n",
    "for file in output_paths:\n",
    "    settings = []\n",
    "    \n",
    "    matchObj = re.match(r'.*?(\\d*)_(\\w*)-(\\d*)-(\\d*)-(\\d*)_output.csv', file, re.M|re.I)\n",
    "\n",
    "    # Fill values to array\n",
    "    if matchObj:\n",
    "        for i in range(0,5):\n",
    "            settings.append(matchObj.group(i+1))\n",
    "    \n",
    "        # Append array to big array\n",
    "        settings_array.append(settings)\n",
    "        \n",
    "        print(settings)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get missed steps from source node\n",
    "..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test 0 missed 1784 (0.36%) of 500000 samples that were intended to send at the send side.\n",
      "Test 1 missed 1930 (0.39%) of 500000 samples that were intended to send at the send side.\n"
     ]
    }
   ],
   "source": [
    "# Number of missing samples at receive side\n",
    "missed_send_arr = []\n",
    "# Percentage of missed samples\n",
    "perc_miss_send_arr = []\n",
    "\n",
    "# Generate real total and number of missing samples.\n",
    "# Print percentage of missed samples\n",
    "for (i, csv_vec) in enumerate(input_array):\n",
    "    # Get number of missing samples\n",
    "    missed_send_arr.append(int(settings_array[i][4]) - len(csv_vec))\n",
    "    \n",
    "    # Take percentage\n",
    "    perc_miss_send_arr.append(round(missed_send_arr[i] / int(settings_array[i][4]) * 100, 2))\n",
    "    \n",
    "    print(\"Test {} missed {} ({}%) of {} samples that were intended to send at the send side.\"\n",
    "          .format(settings_array[i][0], missed_send_arr[i], perc_miss_send_arr[i], settings_array[i][4]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get missed steps from destination node\n",
    "..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test 0 missed 2010 (0.4%) of 500000 samples that were intended to send.\n",
      "Test 1 missed 2125 (0.43%) of 500000 samples that were intended to send.\n"
     ]
    }
   ],
   "source": [
    "# Number of missing samples at receive side\n",
    "missed_recv_arr = []\n",
    "# Percentage of missed samples\n",
    "perc_miss_recv_arr = []\n",
    "\n",
    "# Generate real total and number of missing samples.\n",
    "# Print percentage of missed samples\n",
    "for (i, csv_vec) in enumerate(output_array):\n",
    "    # Get number of missing samples\n",
    "    missed_recv_arr.append(int(settings_array[i][4]) - len(csv_vec))\n",
    "    \n",
    "    # Take percentage\n",
    "    perc_miss_recv_arr.append(round(missed_recv_arr[i] / int(settings_array[i][4]) * 100, 2))\n",
    "    \n",
    "    print(\"Test {} missed {} ({}%) of {} samples that were intended to send.\"\n",
    "          .format(settings_array[i][0], missed_recv_arr[i], perc_miss_recv_arr[i], settings_array[i][4]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Check first and second sample from receive & destination node\n",
    "..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "First and second sample of test 0: 0.0 and 1728.0, respectively\n",
      "First and second sample of test 1: 0.0 and 1904.0, respectively\n",
      "\n",
      "First and second sample of test 0: 1978.0 and 1979.0, respectively\n",
      "First and second sample of test 1: 2123.0 and 2124.0, respectively\n"
     ]
    }
   ],
   "source": [
    "# Check first and second sample\n",
    "# INPUT\n",
    "first_second_smp_input = []\n",
    "\n",
    "for (i, csv_vec) in enumerate(input_array):\n",
    "    first_second_smp_input.append([csv_vec[0][3], csv_vec[1][3]])\n",
    "    print(\"First and second sample of test {}: {} and {}, respectively\".format(settings_array[i][0],\n",
    "                                                                               first_second_smp_input[i][0],\n",
    "                                                                               first_second_smp_input[i][1]))\n",
    "\n",
    "print(\"\")\n",
    "# OUTPUT\n",
    "first_second_smp_output = []\n",
    "\n",
    "for (i, csv_vec) in enumerate(output_array):\n",
    "    first_second_smp_output.append([csv_vec[0][3], csv_vec[1][3]])\n",
    "    print(\"First and second sample of test {}: {} and {}, respectively\".format(settings_array[i][0],\n",
    "                                                                               first_second_smp_output[i][0],\n",
    "                                                                               first_second_smp_output[i][1]))\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Compare input and output data\n",
    "..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 186,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "226 samples were never transferred\n",
      "0 of these errors occured after the first sample arrived at the receive side.\n",
      "\n",
      "195 samples were never transferred\n",
      "0 of these errors occured after the first sample arrived at the receive side.\n",
      "\n"
     ]
    }
   ],
   "source": [
    "missing_seq = []\n",
    "never_trans_total_arr = []\n",
    "never_trans_after_arr = []\n",
    "\n",
    "# Small check if formats of arrays make sense\n",
    "if len(input_array) < len(output_array):\n",
    "    print(\"Error: Something went wrong! The length of the input array should always be bigger than or equal to the length of the output array\")\n",
    "    exit();\n",
    "    \n",
    "# Loop through input_array, since this is always bigger or equal to output array\n",
    "for (i, csv_vec) in enumerate(input_array):\n",
    "    k = 0\n",
    "    missing_seq.append([])\n",
    "    for (j, line) in enumerate(csv_vec):\n",
    "        if line[3] != output_array[i][k][3]:\n",
    "            missing_seq[i].append(line[3])\n",
    "        else:\n",
    "            k += 1\n",
    "            \n",
    "    never_trans_total_arr.append(len(missing_seq[i]))\n",
    "    \n",
    "    never_trans_after_arr.append(np.sum(missing_seq[i] > first_second_smp_output[i][0]))\n",
    "            \n",
    "    print(\"{} samples were never transferred\".format(never_trans_total_arr[i]))\n",
    "    print(\"{} of these errors occured after the first sample arrived at the receive side.\".format(never_trans_after_arr[i]))\n",
    "\n",
    "    print(\"\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## Plot data\n",
    "..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 244,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Define Fancy Box function we use\n",
    "def plot_fancy_box(bottom, height):\n",
    "    top = bottom + height\n",
    "    \n",
    "    p = FancyBboxPatch((left, bottom),\n",
    "                       width,\n",
    "                       height,\n",
    "                       boxstyle=\"round, pad=0.005\",\n",
    "                       \n",
    "                       ec=\"#dbdbdb\", \n",
    "                       fc=\"white\", \n",
    "                       alpha=0.85,\n",
    "                       transform=ax.transAxes\n",
    "                      )\n",
    "    \n",
    "    ax.add_patch(p)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "from matplotlib.font_manager import FontProperties\n",
    "from matplotlib.patches import FancyBboxPatch\n",
    "import os\n",
    "\n",
    "# Start creating plots\n",
    "for (i, csv_vec) in enumerate(output_array):\n",
    "    # Create figure\n",
    "    fig = plt.figure(num=None, figsize=(12, 8), dpi=90, facecolor='w', edgecolor='k')\n",
    "\n",
    "    # Add plot and set title\n",
    "    ax = fig.add_subplot(111)\n",
    "    \n",
    "    # Set subtitle \n",
    "    title = \"Test {} — Mode: {}, Values in Sample: {}, Rate: {}, Total number of Samples: {}\".format(settings_array[i][0],\n",
    "                                                                                                     settings_array[i][1], \n",
    "                                                                                                     settings_array[i][2], \n",
    "                                                                                                     settings_array[i][3], \n",
    "                                                                                                     settings_array[i][4])\n",
    "    ax.set_title(title, fontsize=12, pad=12, loc='center')\n",
    "    \n",
    "    # Set grid\n",
    "    ax.set_axisbelow(True)\n",
    "    ax.grid(True, linestyle='--')\n",
    "\n",
    "    bins = 10000\n",
    "    x_limit=0.00005\n",
    "    \n",
    "    # Data in plot\n",
    "    # http://www.color-hex.com/color-palette/33602\n",
    "    csv_vec_t = csv_vec.transpose()\n",
    "    \n",
    "    ax.hist(csv_vec_t[2] * 1e6, label='t_sent -> t_recv offset            ',\n",
    "            edgecolor='black',\n",
    "            bins=bins,\n",
    "            color='#00549f')\n",
    "\n",
    "    # Set axis and calculate values above limit\n",
    "    plt.xlim([0,x_limit])\n",
    "       \n",
    "\n",
    "    plt.xticks(np.arange(0, 50, 2), fontsize=10)\n",
    "    from matplotlib.ticker import MultipleLocator\n",
    "    minorLocator = MultipleLocator(1)\n",
    "    ax.xaxis.set_minor_locator(minorLocator)\n",
    "        \n",
    "    plt.yticks(fontsize=11)\n",
    "\n",
    "    # Labels\n",
    "    ax.set_xlabel('time [µs]', fontsize=13, labelpad=20)\n",
    "    ax.set_ylabel('frequency', fontsize=13, labelpad=20)\n",
    "    ax.set_yscale('log')\n",
    "\n",
    "    # Create text for offset\n",
    "    off_smaller_2p5us  = round((np.size(csv_vec_t[2][csv_vec_t[2] < 0.0000025]) / np.size(csv_vec_t[2])) * 100, 2)\n",
    "    off_smaller_5us = round((np.size(csv_vec_t[2][csv_vec_t[2] < 0.000005]) / np.size(csv_vec_t[2])) * 100, 2)\n",
    "    off_bigger_50us = round((np.size(csv_vec_t[2][csv_vec_t[2] > x_limit]) / np.size(csv_vec_t[2])) * 100, 2)\n",
    "    \n",
    "    offset_text  = 'offset <  {0}: {1:2.2f}% of samples\\n'.format('2.5µs', off_smaller_2p5us)\n",
    "    offset_text += 'offset <     {0}: {1:2.2f}% of samples\\n'.format('5µs', off_smaller_5us)\n",
    "    offset_text += 'offset >   {0}: {1:2.2f}% of samples'.format('50µs', off_bigger_50us)\n",
    "    \n",
    "    \n",
    "    start_nr_text  = '{0: <25}  {1: <8}    {2: <9}\\n'.format(\"1st, 2nd (input):\",\n",
    "                                                           int(first_second_smp_input[i][0]),\n",
    "                                                           int(first_second_smp_input[i][1]))\n",
    "    start_nr_text += '{0: <24}  {1: <8} {2: <9}'.format(\"1st, 2nd (output):\",\n",
    "                                                         int(first_second_smp_output[i][0]),\n",
    "                                                         int(first_second_smp_output[i][1]))\n",
    "    \n",
    "       \n",
    "    \n",
    "    # Create text for missed steps\n",
    "    missed_text  = '{0} {1:>5} ({2:>2.2f}%)\\n'.format(\"missing at send side:\",\n",
    "                                                           missed_send_arr[i],\n",
    "                                                           perc_miss_send_arr[i])\n",
    "    missed_text += '{0} {1:>6} ({2:>2.2f}%)'.format(\"missing at recv side:\",\n",
    "                                                         missed_recv_arr[i],\n",
    "                                                         perc_miss_recv_arr[i])\n",
    "    \n",
    "    # Create text for missed steps\n",
    "    never_transferred  = '{} {}\\n'.format(\"total:\", never_trans_total_arr[i])\n",
    "    never_transferred += '{0} {1:<28}'.format(\"after connection established:\", never_trans_after_arr[i])\n",
    "    \n",
    "    # Create text for extrema\n",
    "    extrema_text_column_1  = 'min: {0:4.2f}{1:<8}\\n'.format(round(np.min(csv_vec_t[2]) * 1e6, 2), 'µs')\n",
    "    extrema_text_column_1 += 'max: {0:4.2f}{1:<8}'.format(round(np.max(csv_vec_t[2]) * 1e6, 2), 'µs')\n",
    "    \n",
    "    extrema_text_2  = 'average:   {0:4.2f}{1:>}\\n'.format(round(np.average(csv_vec_t[2]) * 1e6, 2), 'µs')\n",
    "    extrema_text_2 += 'median :    {0:4.2f}{1:>}'.format(round(np.median(csv_vec_t[2]) * 1e6, 2), 'µs') \n",
    "           \n",
    "    # bbox accepts FancyBboxPatch prop dict\n",
    "    font_header = FontProperties()\n",
    "    font_header.set_weight('bold')\n",
    "    font_header.set_size(10)\n",
    "    \n",
    "    \n",
    "    # Set box constraints\n",
    "    left, width = .705, .295\n",
    "    right = left + width\n",
    "    \n",
    "    plot_fancy_box(bottom = 0.28, height = 0.72)\n",
    "\n",
    "    # Set box constraints\n",
    "    left, width = .715, .275\n",
    "    right = left + width\n",
    "    \n",
    "    #Create legend\n",
    "    ax.text(0.986, 0.985, \"Legend\",\n",
    "            verticalalignment='top', horizontalalignment='right',\n",
    "            transform=ax.transAxes,\n",
    "            color='black', fontproperties = font_header)\n",
    "\n",
    "        \n",
    "    ax.legend(loc=1, bbox_to_anchor=(1, 0.965), fontsize = 10)\n",
    "\n",
    "    \n",
    "    # Offset boxes\n",
    "    plot_fancy_box(bottom = 0.7775, height = 0.0825)\n",
    "    \n",
    "    ax.text(0.986, 0.892, \"Offset Ranges\",\n",
    "            verticalalignment='top', horizontalalignment='right',\n",
    "            transform=ax.transAxes,\n",
    "            color='black', fontproperties = font_header)\n",
    "    ax.text(left, 0.857, offset_text,\n",
    "            verticalalignment='top', horizontalalignment='left',\n",
    "            transform=ax.transAxes,\n",
    "            color='black', fontsize=10)\n",
    "    \n",
    "    # Start Nr\n",
    "    plot_fancy_box(bottom = 0.6575, height = 0.055)\n",
    "    \n",
    "    ax.text(0.986, 0.7475, \"Sequence numbers in CSV files\",\n",
    "            verticalalignment='top', horizontalalignment='right',\n",
    "            transform=ax.transAxes,\n",
    "            color='black', fontproperties = font_header)\n",
    "    ax.text(left, 0.7075, start_nr_text,\n",
    "            verticalalignment='top', horizontalalignment='left',\n",
    "            transform=ax.transAxes,\n",
    "            color='black', fontsize=10)\n",
    "    \n",
    "    # Missed steps\n",
    "    plot_fancy_box(bottom = 0.5375, height = 0.055)\n",
    "    \n",
    "    ax.text(0.986, 0.6275, \"Missed steps\",\n",
    "            verticalalignment='top', horizontalalignment='right',\n",
    "            transform=ax.transAxes,\n",
    "            color='black', fontproperties = font_header)\n",
    "    ax.text(left, 0.5875, missed_text,\n",
    "            verticalalignment='top', horizontalalignment='left',\n",
    "            transform=ax.transAxes,\n",
    "            color='black', fontsize=10)\n",
    "    \n",
    "    # Never transferred\n",
    "    plot_fancy_box(bottom = 0.4175, height = 0.055)\n",
    "    \n",
    "    ax.text(0.986, 0.5075, \"Samples not transferred\",\n",
    "            verticalalignment='top', horizontalalignment='right',\n",
    "            transform=ax.transAxes,\n",
    "            color='black', fontproperties = font_header)\n",
    "    ax.text(left, 0.4675, never_transferred,\n",
    "            verticalalignment='top', \n",
    "            horizontalalignment='left',\n",
    "            transform=ax.transAxes,\n",
    "            color='black', fontsize=10)\n",
    "    \n",
    "    # Extrema\n",
    "    plot_fancy_box(bottom = 0.2975, height = 0.055)\n",
    "    \n",
    "    \n",
    "    ax.text(0.986, 0.3875, \"Extrema\",\n",
    "            verticalalignment='top', horizontalalignment='right',\n",
    "            transform=ax.transAxes,\n",
    "            color='black', fontproperties = font_header)\n",
    "    ax.text(left, 0.3475, extrema_text_1,\n",
    "            verticalalignment='top', horizontalalignment='left',\n",
    "            transform=ax.transAxes,\n",
    "            color='black', fontsize=10)\n",
    "    ax.text(0.85, 0.3475, extrema_text_2,\n",
    "            verticalalignment='top', horizontalalignment='left',\n",
    "            transform=ax.transAxes,\n",
    "            color='black', fontsize=10)\n",
    "   \n",
    "\n",
    "    #Show plot\n",
    "    plt.show()\n",
    "\n",
    "    \n",
    "    #Save plot\n",
    "    fig.savefig('./plots/{}_{}.png'.format(settings_array[i][0], settings_array[i][1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}