Hop Count Stability

Note: Some field in tb-v5b are incorrect, hence mapping to the tb-v5c files

In [1]:
from IPython.core.display import HTML
HTML("<style>.container { width:90% !important; }</style>")
Out[1]:
In [2]:
# Imports
%matplotlib inline
import matplotlib.lines as mlines
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import csv
from numpy import cumsum
from numpy import genfromtxt
import matplotlib.dates as mdate
import datetime as dt

def vectortoccdf (invar):
    tmp=1.0-cumsum(1.0*invar[:,0]/(1.0*sum(invar[:,0])))
    tmp = np.insert(tmp,0,1);
    tmp = np.delete(tmp,-1);
    return tmp;

def vectortocdf (invar):
    tmp=cumsum(1.0*invar[:,0]/(1.0*sum(invar[:,0]))) 
    #tmp = np.insert(tmp,0,0);
    return tmp;

def vectortocdfnoz (invar):
    tmp=cumsum(1.0*invar[:,0]/(1.0*sum(invar[:,0]))) 
    return tmp;

Prepare IPv6 files v5b

In [20]:
%%bash
ip=ipv6
file="${ip}hc.csv.gr1pkt.gr1ttl"
wc -l $file
#cut -d, -f9 $file | sort -n | uniq -c > $file.cut9.sortn.uniqc
cat  $file.cut9.sortn.uniqc
../process.sh  $file.cut9.sortn.uniqc
ls $file.cut9.sortn.uniqc
307442 ipv6hc.csv.gr1pkt.gr1ttl
131332,0
155178,1
10303,2
4256,3
2373,4
1908,5
846,6
349,7
339,8
46,9
31,10
225,11
65,12
14,13
19,14
6,15
9,16
9,17
16,18
18,19
4,20
3,21
5,22
2,23
8,24
1,25
8,26
4,27
4,28
1,29
8,31
25,33
1,39
2,48
1,50
3,54
5,55
7,56
2,57
1,71
1,80
1,101
1,105
1,117
1,119
ipv6hc.csv.gr1pkt.gr1ttl.cut9.sortn.uniqc

Prepare IPv6 HC files v5c

In [ ]:
%%bash
ext="ips"
for i in "ipv6hc.csv.gr1pkt.gr1ttl" "ipv6hc.csv.gr1pkt.gr1ttl.eq1ab" "ipv6hc.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps" "ipv6hc.csv.gr1pkt.gr1ttl.gr1ab.overlaps"
do
    cut -d, -f1 $i | grep -v "IP" > $i.$ext &
done
wait
In [172]:
%%bash 
for i in "ipv6hc.csv.gr1pkt.gr1ttl.ips" "ipv6hc.csv.gr1pkt.gr1ttl.eq1ab.ips" "ipv6hc.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips" "ipv6hc.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips"
do
    grepcidr -f $i ../tb-v5c/ipv6hc.csv >  ../tb-v5c/ipv6hc.csv-grepcidr-$i &
    wc -l $i
done
wait
307442 ipv6hc.csv.gr1pkt.gr1ttl.ips
118933 ipv6hc.csv.gr1pkt.gr1ttl.eq1ab.ips
89148 ipv6hc.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips
99361 ipv6hc.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips
In [173]:
%%bash
wc -l ../tb-v5c/ipv6hc*grepcidr*ips
  118933 ../tb-v5c/ipv6hc.csv-grepcidr-ipv6hc.csv.gr1pkt.gr1ttl.eq1ab.ips
   89148 ../tb-v5c/ipv6hc.csv-grepcidr-ipv6hc.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips
   99361 ../tb-v5c/ipv6hc.csv-grepcidr-ipv6hc.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips
  307442 ../tb-v5c/ipv6hc.csv-grepcidr-ipv6hc.csv.gr1pkt.gr1ttl.ips
  614884 total
In [ ]:
%%bash
cd ../tb-v5c/
ext="cut9.sortn.uniqc"
for i in ipv6hc*grepcidr*ips ipv6hc.csv
do
    cut -d, -f9 $i | sort -n | uniq -c | grep -v "amplitude" > $i.$ext 
    ../process.sh $i.$ext
done
wait
git add ../tb-v5c/ipv6hc*$ext
cd ../tb-v5b

Plot IPv6 HC v5c

In [175]:
%%bash
head -n 3 ipv6hc.csv.cut9.sortn.uniqc
1449685,0
155178,1
10303,2
In [23]:
# old keep for ref
import numpy as np
from matplotlib import rc
rc('text', usetex=False)

file="../tb-v5c/ipv6hc.csv.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
plt.plot(array[:,1],vectortocdf(array),'r-',label="(A) all IPs");
print(sum(array[:,0]))
print(vectortocdf(array)[0:10]);

file="../tb-v5c/ipv6hc.csv-grepcidr-ipv6hc.csv.gr1pkt.gr1ttl.ips.cut9.sortn.uniqc"
array61=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
print(sum(array61[:,0]))
print(vectortocdf(array61)[0:10]);
plt.plot(array61[:,1],vectortocdf(array61),'g-o',markevery=slice(1,11,2),label="(C2) IPs $>$ 1ttl");

file="../tb-v5c/ipv6hc.csv-grepcidr-ipv6hc.csv.gr1pkt.gr1ttl.eq1ab.ips.cut9.sortn.uniqc"
array62=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
plt.plot(array62[:,1],vectortocdf(array62),'b-s',markevery=2,label="(D1) $>$ 1ttl, =1 bin");
print(sum(array62[:,0]))
print(vectortocdf(array62)[0:10]);

file="../tb-v5c/ipv6hc.csv-grepcidr-ipv6hc.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
print(sum(array[:,0]))
print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'k--',label="(E1)$>$1ttl, $>$1 bin, no overlaps");

file="../tb-v5c/ipv6hc.csv-grepcidr-ipv6hc.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
print(sum(array[:,0]))
print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'k-x',label="(E2) $>$1ttl, $>$1 bin, overlaps");

ax=plt.gca();
ax.set_xlim(xmin=0,xmax=10);
ax.set_ylim(ymax=1);
plt.xticks(np.arange(0,16,1),np.arange(0,16,1));
plt.ylabel('ECDF: P[x$\leq$X]');#,fontsize='14');
plt.xlabel('HC amplitude (max - min) per IPv6 address');
plt.legend(loc="lower right",prop={'size':10});
plt.gcf().subplots_adjust(bottom=0.11,right=0.97,left=0.11,top=0.96)
plt.savefig('../../figures/amplitude-hc-ipv6.pdf', format='pdf', dpi=2000)
plt.show()
1625795
[ 0.87034528  0.9795417   0.98961985  0.99396972  0.99620432  0.99790133
  0.99855148  0.99908537  0.99943843  0.9995024 ]
307442
[ 0.31436824  0.89181374  0.94510835  0.96811106  0.97992792  0.98890197
  0.99234002  0.99516332  0.99703033  0.99736861]
118933
[ 0.3026578   0.96217198  0.98555489  0.99084358  0.99144897  0.9989574
  0.9991676   0.99955437  0.99963845  0.99966368]
89148
[ 0.3759703   0.85438821  0.9322475   0.96442994  0.97466012  0.98545116
  0.98960156  0.99488491  0.9972742   0.99783506]
99361
[ 0.27311521  0.84117511  0.90823361  0.94420346  0.97086382  0.97996196
  0.98662453  0.9901571   0.99368968  0.99420296]
In [24]:
%%bash
pdfcrop ../../figures/amplitude-hc-ipv6.pdf
git add ../../figures/amplitude-hc-ipv6-crop.pdf
rm ../../figures/amplitude-hc-ipv6.pdf
PDFCROP 1.33, 2012/02/01 - Copyright (c) 2002-2012 by Heiko Oberdiek.
==> 1 page written on `../../figures/amplitude-hc-ipv6-crop.pdf'.

Pre-Process files - IPv6 TTL

In [ ]:
%%bash
ext="cut9.sortn.uniqc"
for i in "ipv6ttl.csv" "ipv6ttl.csv.gr1pkt.gr1ttl" "ipv6ttl.csv.gr1pkt.gr1ttl.eq1ab" "ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps" "ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps"
do
    cut -d, -f9 $i | sort -n | uniq -c | grep -v "amplitude" > $i.$ext 
    ../process.sh $i.$ext
done
wait
git add ipv6ttl*$ext
In [ ]:
%%bash
ext="ips"
for i in "ipv6ttl.csv.gr1pkt.gr1ttl" "ipv6ttl.csv.gr1pkt.gr1ttl.eq1ab" "ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps" "ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps"
do
    cut -d, -f1 $i | grep -v "IP" > $i.$ext &
done
wait
In [134]:
%%bash 
for i in "ipv6ttl.csv.gr1pkt.gr1ttl.ips" "ipv6ttl.csv.gr1pkt.gr1ttl.eq1ab.ips" "ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips" "ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips"
do
    grepcidr -f $i ../tb-v5c/ipv6ttl.csv >  ../tb-v5c/ipv6ttl.csv-grepcidr-$i &
    wc -l $i
done
wait
307442 ipv6ttl.csv.gr1pkt.gr1ttl.ips
118933 ipv6ttl.csv.gr1pkt.gr1ttl.eq1ab.ips
89148 ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips
99361 ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips
In [136]:
%%bash
wc -l ../tb-v5c/*grepcidr*ips
  118933 ../tb-v5c/ipv6ttl.csv-grepcidr-ipv6ttl.csv.gr1pkt.gr1ttl.eq1ab.ips
   89148 ../tb-v5c/ipv6ttl.csv-grepcidr-ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips
   99361 ../tb-v5c/ipv6ttl.csv-grepcidr-ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips
  307442 ../tb-v5c/ipv6ttl.csv-grepcidr-ipv6ttl.csv.gr1pkt.gr1ttl.ips
  614884 total
In [ ]:
%%bash
cd ../tb-v5c/
ext="cut9.sortn.uniqc"
for i in ipv6ttl*grepcidr*ips ipv6ttl.csv
do
    cut -d, -f9 $i | sort -n | uniq -c | grep -v "amplitude" > $i.$ext 
    ../process.sh $i.$ext
done
wait
git add ../tb-v5c/ipv6ttl*$ext
cd ../tb-v5b

Plot IPv6 TTL amplitude

In [25]:
import numpy as np
from matplotlib import rc
rc('text', usetex=False)

printit=False
file="../tb-v5c/ipv6ttl.csv.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
if(printit):
    print(sum(array[:,0]))
    print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'r-',label="all IPs");

file="../tb-v5c/ipv6ttl.csv-grepcidr-ipv6ttl.csv.gr1pkt.gr1ttl.ips.cut9.sortn.uniqc"
array61=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
if(printit):
    print(sum(array61[:,0]))
    print(vectortocdf(array61)[0:10]);
plt.plot(array61[:,1],vectortocdf(array61),'g-o',markevery=[0,1,2],label="IPs $>$ 1ttl");

file="../tb-v5c/ipv6ttl.csv-grepcidr-ipv6ttl.csv.gr1pkt.gr1ttl.eq1ab.ips.cut9.sortn.uniqc"
array62=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
plt.plot(array62[:,1],vectortocdf(array62),'b-^',markevery=[0,1,2],label="$>$ 1ttl,$=$1 bin");
if(printit):
    print(sum(array62[:,0]))
    print(vectortocdf(array62)[0:10]);

file="../tb-v5c/ipv6ttl.csv-grepcidr-ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
if(printit):
    print(sum(array[:,0]))
    print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'k--',label="$>$1ttl,$>$1 bin, no overlaps");

file="../tb-v5c/ipv6ttl.csv-grepcidr-ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
if(printit):
    print(sum(array[:,0]))
    print(vectortocdf(array));
plt.plot(array[:,1],vectortocdf(array),'k-x',markevery=[0,1,2],label="$>$1ttl,$>$1 bin, overlaps");


ax=plt.gca();

ax.set_xscale('symlog',basex=2,linthreshx=2);
ax.set_xlim(xmin=0,xmax=255);
ax.set_ylim(ymin=0,ymax=1);
labelsy = [0,1,2,4,8,16,32,64,128,192];
plt.xticks(labelsy,labelsy)
plt.yticks(np.arange(0,1.1,0.1),np.arange(0,1.1,.1));

plt.ylabel('ECDF: P[x$\leq$X]');
plt.xlabel('TTL amplitude (max - min) per IPv6 address');


plt.legend(loc="lower right",prop={'size':10});
plt.gcf().subplots_adjust(bottom=0.11,right=0.97,left=0.11,top=0.96)
plt.savefig('../../figures/amplitude-ttl-ipv6.pdf', format='pdf', dpi=2000)
In [26]:
%%bash
pdfcrop ../../figures/amplitude-ttl-ipv6.pdf
git add ../../figures/amplitude-ttl-ipv6-crop.pdf
rm ../../figures/amplitude-ttl-ipv6.pdf
PDFCROP 1.33, 2012/02/01 - Copyright (c) 2002-2012 by Heiko Oberdiek.
==> 1 page written on `../../figures/amplitude-ttl-ipv6-crop.pdf'.

Prepare IPv4 HC files

In [27]:
%%bash
ip=ipv4
file="${ip}hc.csv.gr1pkt.gr1ttl"
wc -l $file
cut -d, -f9 $file | sort -n | uniq -c > $file.cut9.sortn.uniqc
head $file.cut9.sortn.uniqc
../process.sh  $file.cut9.sortn.uniqc


file="${ip}hc.csv"
wc -l $file
cut -d, -f9 $file | sort -n | uniq -c | grep -v amplitudeHC > $file.cut9.sortn.uniqc
head $file.cut9.sortn.uniqc
../process.sh  $file.cut9.sortn.uniqc
../process.sh ipv4hc.csv.gr1pkt.gr1ttl.eq1ab.cut9.sortn.uniqc
17569119 ipv4hc.csv.gr1pkt.gr1ttl
10915573,0
4364059,1
766674,2
397600,3
245631,4
98607,5
87423,6
62722,7
123038,8
51121,9
85986816 ipv4hc.csv

Prepare IPv4 HC files for v5c

In [ ]:
%%bash
ext="cut9.sortn.uniqc"
for i in "ipv4hc.csv" "ipv4hc.csv.gr1pkt.gr1ttl" "ipv4hc.csv.gr1pkt.gr1ttl.eq1ab" "ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps" "ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.overlaps"
do
    cut -d, -f9 $i | sort -n | uniq -c | grep -v "amplitude" > $i.$ext 
    ../process.sh $i.$ext
done
wait
ls -l ipv4hc*$ext
git add ipv4hc*$ext
In [ ]:
%%bash
ext="ips"
for i in "ipv4hc.csv.gr1pkt.gr1ttl" "ipv4hc.csv.gr1pkt.gr1ttl.eq1ab" "ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps" "ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.overlaps"
do
    cut -d, -f1 $i | grep -v "IP" > $i.$ext &
done
wait
In [183]:
%%bash 
for i in "ipv4hc.csv.gr1pkt.gr1ttl.ips" "ipv4hc.csv.gr1pkt.gr1ttl.eq1ab.ips" "ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips" "ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips"
do
    wc -l $i
    grepcidr -f $i ../tb-v5c/ipv4hc.csv >  ../tb-v5c/ipv4hc.csv-grepcidr-$i &
done
wait
17569119 ipv4hc.csv.gr1pkt.gr1ttl.ips
1467531 ipv4hc.csv.gr1pkt.gr1ttl.eq1ab.ips
12923493 ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips
3178095 ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips
In [184]:
%%bash
wc -l ../tb-v5c/ipv4hc*grepcidr*ips
   1467531 ../tb-v5c/ipv4hc.csv-grepcidr-ipv4hc.csv.gr1pkt.gr1ttl.eq1ab.ips
  12923493 ../tb-v5c/ipv4hc.csv-grepcidr-ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips
   3178095 ../tb-v5c/ipv4hc.csv-grepcidr-ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips
  17569119 ../tb-v5c/ipv4hc.csv-grepcidr-ipv4hc.csv.gr1pkt.gr1ttl.ips
  35138238 total
In [ ]:
%%bash
cd ../tb-v5c/
ext="cut9.sortn.uniqc"
for i in ipv4hc*grepcidr*ips ipv4hc.csv
do
    cut -d, -f9 $i | sort -n | uniq -c | grep -v "amplitude" > $i.$ext 
done
wait
for i in ipv4hc*grepcidr*ips ipv4hc.csv
do
    ../process.sh $i.$ext
done
ls -l ../tb-v5c/ipv4hc*$ext
git add ../tb-v5c/ipv4hc*$ext
#git add ipv6ttl*$ext
cd ../tb-v5b

Plot IPv4 HC

In [28]:
# new
import numpy as np
from matplotlib import rc
rc('text', usetex=False)

file="../tb-v5c/ipv4hc.csv.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
print(sum(array[:,0]))
print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'r-',label="(A) all IPs");

file="../tb-v5c/ipv4hc.csv-grepcidr-ipv4hc.csv.gr1pkt.gr1ttl.ips.cut9.sortn.uniqc"
array61=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
print(sum(array61[:,0]))
print(vectortocdf(array61)[0:10]);
plt.plot(array61[:,1],vectortocdf(array61),'g-o',markevery=slice(1,11,2),label="(C2) IPs $>$ 1ttl");

file="../tb-v5c/ipv4hc.csv-grepcidr-ipv4hc.csv.gr1pkt.gr1ttl.eq1ab.ips.cut9.sortn.uniqc"
array62=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
plt.plot(array62[:,1],vectortocdf(array62),'b-s',markevery=2,label="(D1) $>$ 1ttl, =1 bin");
print(sum(array62[:,0]))
print(vectortocdf(array62)[0:10]);

file="../tb-v5c/ipv4hc.csv-grepcidr-ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
print(sum(array[:,0]))
print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'k--',label="(E1) $>$1ttl, $>$1 bin, no overlaps");

file="../tb-v5c/ipv4hc.csv-grepcidr-ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
print(sum(array[:,0]))
print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'k-x',label="(E2) $>$1ttl, $>$1 bin, overlaps");


ax=plt.gca();
ax.set_xlim(xmin=0,xmax=15);

ax.set_ylim(ymax=1);
plt.xticks(np.arange(0,16,1),np.arange(0,16,1));
plt.yticks(np.arange(0,1.1,.1),np.arange(0,1.1,.1));


plt.ylabel('ECDF: P[x$\leq$X]');#,fontsize='14');
plt.xlabel('HopCount amplitude (max - min) per IPv4 address');

plt.legend(loc="lower right",prop={'size':10});
plt.gcf().subplots_adjust(bottom=0.11,right=0.97,left=0.11,top=0.96)
plt.savefig('../../figures/amplitude-hc-ipv4.pdf', format='pdf', dpi=2000)
85986815
[ 0.81805171  0.92140821  0.94500886  0.95629851  0.96590004  0.9696774
  0.97313544  0.97594146  0.98068574  0.98274862]
17569119
[ 0.10950834  0.6153559   0.73086226  0.78611608  0.83310791  0.85159506
  0.86851942  0.88225266  0.9054721   0.91556828]
1467531
[ 0.03335534  0.8151821   0.88510839  0.92456173  0.94935644  0.95644181
  0.96236059  0.96669713  0.97522028  0.97680049]
12923493
[ 0.13862785  0.61716859  0.7261926   0.77552888  0.82459471  0.84271853
  0.85960707  0.8742795   0.89932946  0.90906514]
3178095
[ 0.0262607   0.51571209  0.67862572  0.76523892  0.81404678  0.83927636
  0.86142831  0.8756815   0.89824344  0.91373795]
In [29]:
%%bash
pdfcrop ../../figures/amplitude-hc-ipv4.pdf
git add -f ../../figures/amplitude-hc-ipv4-crop.pdf
rm ../../figures/amplitude-hc-ipv4.pdf
PDFCROP 1.33, 2012/02/01 - Copyright (c) 2002-2012 by Heiko Oberdiek.
==> 1 page written on `../../figures/amplitude-hc-ipv4-crop.pdf'.

Process files (IPv4 TTL)

In [ ]:
%%bash
ext="cut9.sortn.uniqc"
for i in "ipv4ttl.csv" "ipv4ttl.csv.gr1pkt.gr1ttl" "ipv4ttl.csv.gr1pkt.gr1ttl.eq1ab" "ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps" "ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps"
do
    cut -d, -f9 $i | sort -n | uniq -c | grep -v amplitude > $i.$ext &
    true
done
wait
for i in ipv4ttl*.$ext
do
    ../process.sh $i &
    git add $i
done
wait
In [ ]:
%%bash
ext="ips"
for i in "ipv4ttl.csv.gr1pkt.gr1ttl" "ipv4ttl.csv.gr1pkt.gr1ttl.eq1ab" "ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps" "ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps"
do
    cut -d, -f1 $i | grep -v "IP" > $i.$ext &
done
wait
In [155]:
%%bash 
for i in "ipv4ttl.csv.gr1pkt.gr1ttl.ips" "ipv4ttl.csv.gr1pkt.gr1ttl.eq1ab.ips" "ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips" "ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips"
do
    grepcidr -f $i ../tb-v5c/ipv4ttl.csv >  ../tb-v5c/ipv4ttl.csv-grepcidr-$i &
    wc -l $i
done
wait
17569119 ipv4ttl.csv.gr1pkt.gr1ttl.ips
1467531 ipv4ttl.csv.gr1pkt.gr1ttl.eq1ab.ips
12923493 ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips
3178095 ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips
In [156]:
%%bash
wc -l ../tb-v5c/ipv4*grepcidr*ips
   1467531 ../tb-v5c/ipv4ttl.csv-grepcidr-ipv4ttl.csv.gr1pkt.gr1ttl.eq1ab.ips
  12923493 ../tb-v5c/ipv4ttl.csv-grepcidr-ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips
   3178095 ../tb-v5c/ipv4ttl.csv-grepcidr-ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips
  17569119 ../tb-v5c/ipv4ttl.csv-grepcidr-ipv4ttl.csv.gr1pkt.gr1ttl.ips
  35138238 total
In [ ]:
%%bash
cd ../tb-v5c/
ext="cut9.sortn.uniqc"
for i in ipv4ttl*grepcidr*ips ipv4ttl.csv
do
    cut -d, -f9 $i | sort -n | uniq -c | grep -v "amplitude" > $i.$ext &
done
wait
for i in ipv4ttl*grepcidr*ips ipv4ttl.csv
do
        ../process.sh $i.$ext &
done
wait
git add ../tb-v5c/ipv4ttl*$ext
cd ../tb-v5b
In [95]:
%%bash
head -n 2  ipv4ttl.csv.cut9.sortn.uniqc
78945594,0
3962004,1

Plot IPv4 TTL amplitude

In [30]:
import numpy as np
from matplotlib import rc
rc('text', usetex=False)

printit=False
file="../tb-v5c/ipv4ttl.csv.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',', skip_header=0, dtype='int64')# , max_rows=10000);
if(printit):
    print(sum(array[:,0]))
    print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'r-',label="all IPs");

file="../tb-v5c/ipv4ttl.csv-grepcidr-ipv4ttl.csv.gr1pkt.gr1ttl.ips.cut9.sortn.uniqc"
array61=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
if(printit):
    print(sum(array61[:,0]))
    print(vectortocdf(array61)[0:10]);
plt.plot(array61[:,1],vectortocdf(array61),'g-o',markevery=[1,2,4,8,16,32],label="IPs $>$ 1ttl");

file="../tb-v5c/ipv4ttl.csv-grepcidr-ipv4ttl.csv.gr1pkt.gr1ttl.eq1ab.ips.cut9.sortn.uniqc"
array62=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
plt.plot(array62[:,1],vectortocdf(array62),'b-^',markevery=[1,2,4,8,16,32],label="$>$ 1ttl,$=$1 bin");
if(printit):
    print(sum(array62[:,0]))
    print(vectortocdf(array62)[0:10]);

file="../tb-v5c/ipv4ttl.csv-grepcidr-ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
if(printit):
    print(sum(array[:,0]))
    print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'k--',label="$>$1ttl,$>$1 bin, no overlaps");

file="../tb-v5c/ipv4ttl.csv-grepcidr-ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
if(printit):
    print(sum(array[:,0]))
    print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'k-x',markevery=[1,2,4,8,16,32],label="$>$1ttl,$>$1 bin, overlaps");


ax=plt.gca();
plt.yticks(np.arange(0,1.1,0.1),np.arange(0,1.1,0.1));
plt.xticks([0,32,64,128,192,255])

plt.ylabel('ECDF: P[x$\leq$X]');
plt.xlabel('TTL amplitude (max - min) per IPv4 address');

ax.set_xscale('symlog',basex=2,linthreshx=2);
ax.set_xlim(xmin=0,xmax=255);
ax.set_ylim(ymin=0,ymax=1);
labelsy = [0,1,2,4,8,16,32,64,128,192];
plt.xticks(labelsy,labelsy)


plt.legend(loc="lower right",prop={'size':10});
plt.gcf().subplots_adjust(bottom=0.11,right=0.97,left=0.11,top=0.96)
plt.savefig('../../figures/amplitude-ttl-ipv4.pdf', format='pdf', dpi=2000)
In [31]:
%%bash
pdfcrop ../../figures/amplitude-ttl-ipv4.pdf
git add ../../figures/amplitude-ttl-ipv4-crop.pdf
rm ../../figures/amplitude-ttl-ipv4.pdf
PDFCROP 1.33, 2012/02/01 - Copyright (c) 2002-2012 by Heiko Oberdiek.
==> 1 page written on `../../figures/amplitude-ttl-ipv4-crop.pdf'.