from IPython.core.display import HTML
HTML("<style>.container { width:90% !important; }</style>")
# Imports
%matplotlib inline
import matplotlib.lines as mlines
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import csv
from numpy import cumsum
from numpy import genfromtxt
import matplotlib.dates as mdate
import datetime as dt
def vectortoccdf (invar):
tmp=1.0-cumsum(1.0*invar[:,0]/(1.0*sum(invar[:,0])))
tmp = np.insert(tmp,0,1);
tmp = np.delete(tmp,-1);
return tmp;
def vectortocdf (invar):
tmp=cumsum(1.0*invar[:,0]/(1.0*sum(invar[:,0])))
#tmp = np.insert(tmp,0,0);
return tmp;
def vectortocdfnoz (invar):
tmp=cumsum(1.0*invar[:,0]/(1.0*sum(invar[:,0])))
return tmp;
%%bash
ip=ipv6
file="${ip}hc.csv.gr1pkt.gr1ttl"
wc -l $file
#cut -d, -f9 $file | sort -n | uniq -c > $file.cut9.sortn.uniqc
cat $file.cut9.sortn.uniqc
../process.sh $file.cut9.sortn.uniqc
ls $file.cut9.sortn.uniqc
%%bash
ext="ips"
for i in "ipv6hc.csv.gr1pkt.gr1ttl" "ipv6hc.csv.gr1pkt.gr1ttl.eq1ab" "ipv6hc.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps" "ipv6hc.csv.gr1pkt.gr1ttl.gr1ab.overlaps"
do
cut -d, -f1 $i | grep -v "IP" > $i.$ext &
done
wait
%%bash
for i in "ipv6hc.csv.gr1pkt.gr1ttl.ips" "ipv6hc.csv.gr1pkt.gr1ttl.eq1ab.ips" "ipv6hc.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips" "ipv6hc.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips"
do
grepcidr -f $i ../tb-v5c/ipv6hc.csv > ../tb-v5c/ipv6hc.csv-grepcidr-$i &
wc -l $i
done
wait
%%bash
wc -l ../tb-v5c/ipv6hc*grepcidr*ips
%%bash
cd ../tb-v5c/
ext="cut9.sortn.uniqc"
for i in ipv6hc*grepcidr*ips ipv6hc.csv
do
cut -d, -f9 $i | sort -n | uniq -c | grep -v "amplitude" > $i.$ext
../process.sh $i.$ext
done
wait
git add ../tb-v5c/ipv6hc*$ext
cd ../tb-v5b
%%bash
head -n 3 ipv6hc.csv.cut9.sortn.uniqc
# old keep for ref
import numpy as np
from matplotlib import rc
rc('text', usetex=False)
file="../tb-v5c/ipv6hc.csv.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
plt.plot(array[:,1],vectortocdf(array),'r-',label="(A) all IPs");
print(sum(array[:,0]))
print(vectortocdf(array)[0:10]);
file="../tb-v5c/ipv6hc.csv-grepcidr-ipv6hc.csv.gr1pkt.gr1ttl.ips.cut9.sortn.uniqc"
array61=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
print(sum(array61[:,0]))
print(vectortocdf(array61)[0:10]);
plt.plot(array61[:,1],vectortocdf(array61),'g-o',markevery=slice(1,11,2),label="(C2) IPs $>$ 1ttl");
file="../tb-v5c/ipv6hc.csv-grepcidr-ipv6hc.csv.gr1pkt.gr1ttl.eq1ab.ips.cut9.sortn.uniqc"
array62=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
plt.plot(array62[:,1],vectortocdf(array62),'b-s',markevery=2,label="(D1) $>$ 1ttl, =1 bin");
print(sum(array62[:,0]))
print(vectortocdf(array62)[0:10]);
file="../tb-v5c/ipv6hc.csv-grepcidr-ipv6hc.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
print(sum(array[:,0]))
print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'k--',label="(E1)$>$1ttl, $>$1 bin, no overlaps");
file="../tb-v5c/ipv6hc.csv-grepcidr-ipv6hc.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
print(sum(array[:,0]))
print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'k-x',label="(E2) $>$1ttl, $>$1 bin, overlaps");
ax=plt.gca();
ax.set_xlim(xmin=0,xmax=10);
ax.set_ylim(ymax=1);
plt.xticks(np.arange(0,16,1),np.arange(0,16,1));
plt.ylabel('ECDF: P[x$\leq$X]');#,fontsize='14');
plt.xlabel('HC amplitude (max - min) per IPv6 address');
plt.legend(loc="lower right",prop={'size':10});
plt.gcf().subplots_adjust(bottom=0.11,right=0.97,left=0.11,top=0.96)
plt.savefig('../../figures/amplitude-hc-ipv6.pdf', format='pdf', dpi=2000)
plt.show()
%%bash
pdfcrop ../../figures/amplitude-hc-ipv6.pdf
git add ../../figures/amplitude-hc-ipv6-crop.pdf
rm ../../figures/amplitude-hc-ipv6.pdf
%%bash
ext="cut9.sortn.uniqc"
for i in "ipv6ttl.csv" "ipv6ttl.csv.gr1pkt.gr1ttl" "ipv6ttl.csv.gr1pkt.gr1ttl.eq1ab" "ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps" "ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps"
do
cut -d, -f9 $i | sort -n | uniq -c | grep -v "amplitude" > $i.$ext
../process.sh $i.$ext
done
wait
git add ipv6ttl*$ext
%%bash
ext="ips"
for i in "ipv6ttl.csv.gr1pkt.gr1ttl" "ipv6ttl.csv.gr1pkt.gr1ttl.eq1ab" "ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps" "ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps"
do
cut -d, -f1 $i | grep -v "IP" > $i.$ext &
done
wait
%%bash
for i in "ipv6ttl.csv.gr1pkt.gr1ttl.ips" "ipv6ttl.csv.gr1pkt.gr1ttl.eq1ab.ips" "ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips" "ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips"
do
grepcidr -f $i ../tb-v5c/ipv6ttl.csv > ../tb-v5c/ipv6ttl.csv-grepcidr-$i &
wc -l $i
done
wait
%%bash
wc -l ../tb-v5c/*grepcidr*ips
%%bash
cd ../tb-v5c/
ext="cut9.sortn.uniqc"
for i in ipv6ttl*grepcidr*ips ipv6ttl.csv
do
cut -d, -f9 $i | sort -n | uniq -c | grep -v "amplitude" > $i.$ext
../process.sh $i.$ext
done
wait
git add ../tb-v5c/ipv6ttl*$ext
cd ../tb-v5b
import numpy as np
from matplotlib import rc
rc('text', usetex=False)
printit=False
file="../tb-v5c/ipv6ttl.csv.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
if(printit):
print(sum(array[:,0]))
print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'r-',label="all IPs");
file="../tb-v5c/ipv6ttl.csv-grepcidr-ipv6ttl.csv.gr1pkt.gr1ttl.ips.cut9.sortn.uniqc"
array61=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
if(printit):
print(sum(array61[:,0]))
print(vectortocdf(array61)[0:10]);
plt.plot(array61[:,1],vectortocdf(array61),'g-o',markevery=[0,1,2],label="IPs $>$ 1ttl");
file="../tb-v5c/ipv6ttl.csv-grepcidr-ipv6ttl.csv.gr1pkt.gr1ttl.eq1ab.ips.cut9.sortn.uniqc"
array62=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
plt.plot(array62[:,1],vectortocdf(array62),'b-^',markevery=[0,1,2],label="$>$ 1ttl,$=$1 bin");
if(printit):
print(sum(array62[:,0]))
print(vectortocdf(array62)[0:10]);
file="../tb-v5c/ipv6ttl.csv-grepcidr-ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
if(printit):
print(sum(array[:,0]))
print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'k--',label="$>$1ttl,$>$1 bin, no overlaps");
file="../tb-v5c/ipv6ttl.csv-grepcidr-ipv6ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
if(printit):
print(sum(array[:,0]))
print(vectortocdf(array));
plt.plot(array[:,1],vectortocdf(array),'k-x',markevery=[0,1,2],label="$>$1ttl,$>$1 bin, overlaps");
ax=plt.gca();
ax.set_xscale('symlog',basex=2,linthreshx=2);
ax.set_xlim(xmin=0,xmax=255);
ax.set_ylim(ymin=0,ymax=1);
labelsy = [0,1,2,4,8,16,32,64,128,192];
plt.xticks(labelsy,labelsy)
plt.yticks(np.arange(0,1.1,0.1),np.arange(0,1.1,.1));
plt.ylabel('ECDF: P[x$\leq$X]');
plt.xlabel('TTL amplitude (max - min) per IPv6 address');
plt.legend(loc="lower right",prop={'size':10});
plt.gcf().subplots_adjust(bottom=0.11,right=0.97,left=0.11,top=0.96)
plt.savefig('../../figures/amplitude-ttl-ipv6.pdf', format='pdf', dpi=2000)
%%bash
pdfcrop ../../figures/amplitude-ttl-ipv6.pdf
git add ../../figures/amplitude-ttl-ipv6-crop.pdf
rm ../../figures/amplitude-ttl-ipv6.pdf
%%bash
ip=ipv4
file="${ip}hc.csv.gr1pkt.gr1ttl"
wc -l $file
cut -d, -f9 $file | sort -n | uniq -c > $file.cut9.sortn.uniqc
head $file.cut9.sortn.uniqc
../process.sh $file.cut9.sortn.uniqc
file="${ip}hc.csv"
wc -l $file
cut -d, -f9 $file | sort -n | uniq -c | grep -v amplitudeHC > $file.cut9.sortn.uniqc
head $file.cut9.sortn.uniqc
../process.sh $file.cut9.sortn.uniqc
../process.sh ipv4hc.csv.gr1pkt.gr1ttl.eq1ab.cut9.sortn.uniqc
%%bash
ext="cut9.sortn.uniqc"
for i in "ipv4hc.csv" "ipv4hc.csv.gr1pkt.gr1ttl" "ipv4hc.csv.gr1pkt.gr1ttl.eq1ab" "ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps" "ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.overlaps"
do
cut -d, -f9 $i | sort -n | uniq -c | grep -v "amplitude" > $i.$ext
../process.sh $i.$ext
done
wait
ls -l ipv4hc*$ext
git add ipv4hc*$ext
%%bash
ext="ips"
for i in "ipv4hc.csv.gr1pkt.gr1ttl" "ipv4hc.csv.gr1pkt.gr1ttl.eq1ab" "ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps" "ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.overlaps"
do
cut -d, -f1 $i | grep -v "IP" > $i.$ext &
done
wait
%%bash
for i in "ipv4hc.csv.gr1pkt.gr1ttl.ips" "ipv4hc.csv.gr1pkt.gr1ttl.eq1ab.ips" "ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips" "ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips"
do
wc -l $i
grepcidr -f $i ../tb-v5c/ipv4hc.csv > ../tb-v5c/ipv4hc.csv-grepcidr-$i &
done
wait
%%bash
wc -l ../tb-v5c/ipv4hc*grepcidr*ips
%%bash
cd ../tb-v5c/
ext="cut9.sortn.uniqc"
for i in ipv4hc*grepcidr*ips ipv4hc.csv
do
cut -d, -f9 $i | sort -n | uniq -c | grep -v "amplitude" > $i.$ext
done
wait
for i in ipv4hc*grepcidr*ips ipv4hc.csv
do
../process.sh $i.$ext
done
ls -l ../tb-v5c/ipv4hc*$ext
git add ../tb-v5c/ipv4hc*$ext
#git add ipv6ttl*$ext
cd ../tb-v5b
# new
import numpy as np
from matplotlib import rc
rc('text', usetex=False)
file="../tb-v5c/ipv4hc.csv.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
print(sum(array[:,0]))
print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'r-',label="(A) all IPs");
file="../tb-v5c/ipv4hc.csv-grepcidr-ipv4hc.csv.gr1pkt.gr1ttl.ips.cut9.sortn.uniqc"
array61=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
print(sum(array61[:,0]))
print(vectortocdf(array61)[0:10]);
plt.plot(array61[:,1],vectortocdf(array61),'g-o',markevery=slice(1,11,2),label="(C2) IPs $>$ 1ttl");
file="../tb-v5c/ipv4hc.csv-grepcidr-ipv4hc.csv.gr1pkt.gr1ttl.eq1ab.ips.cut9.sortn.uniqc"
array62=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
plt.plot(array62[:,1],vectortocdf(array62),'b-s',markevery=2,label="(D1) $>$ 1ttl, =1 bin");
print(sum(array62[:,0]))
print(vectortocdf(array62)[0:10]);
file="../tb-v5c/ipv4hc.csv-grepcidr-ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
print(sum(array[:,0]))
print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'k--',label="(E1) $>$1ttl, $>$1 bin, no overlaps");
file="../tb-v5c/ipv4hc.csv-grepcidr-ipv4hc.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
print(sum(array[:,0]))
print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'k-x',label="(E2) $>$1ttl, $>$1 bin, overlaps");
ax=plt.gca();
ax.set_xlim(xmin=0,xmax=15);
ax.set_ylim(ymax=1);
plt.xticks(np.arange(0,16,1),np.arange(0,16,1));
plt.yticks(np.arange(0,1.1,.1),np.arange(0,1.1,.1));
plt.ylabel('ECDF: P[x$\leq$X]');#,fontsize='14');
plt.xlabel('HopCount amplitude (max - min) per IPv4 address');
plt.legend(loc="lower right",prop={'size':10});
plt.gcf().subplots_adjust(bottom=0.11,right=0.97,left=0.11,top=0.96)
plt.savefig('../../figures/amplitude-hc-ipv4.pdf', format='pdf', dpi=2000)
%%bash
pdfcrop ../../figures/amplitude-hc-ipv4.pdf
git add -f ../../figures/amplitude-hc-ipv4-crop.pdf
rm ../../figures/amplitude-hc-ipv4.pdf
%%bash
ext="cut9.sortn.uniqc"
for i in "ipv4ttl.csv" "ipv4ttl.csv.gr1pkt.gr1ttl" "ipv4ttl.csv.gr1pkt.gr1ttl.eq1ab" "ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps" "ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps"
do
cut -d, -f9 $i | sort -n | uniq -c | grep -v amplitude > $i.$ext &
true
done
wait
for i in ipv4ttl*.$ext
do
../process.sh $i &
git add $i
done
wait
%%bash
ext="ips"
for i in "ipv4ttl.csv.gr1pkt.gr1ttl" "ipv4ttl.csv.gr1pkt.gr1ttl.eq1ab" "ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps" "ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps"
do
cut -d, -f1 $i | grep -v "IP" > $i.$ext &
done
wait
%%bash
for i in "ipv4ttl.csv.gr1pkt.gr1ttl.ips" "ipv4ttl.csv.gr1pkt.gr1ttl.eq1ab.ips" "ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips" "ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips"
do
grepcidr -f $i ../tb-v5c/ipv4ttl.csv > ../tb-v5c/ipv4ttl.csv-grepcidr-$i &
wc -l $i
done
wait
%%bash
wc -l ../tb-v5c/ipv4*grepcidr*ips
%%bash
cd ../tb-v5c/
ext="cut9.sortn.uniqc"
for i in ipv4ttl*grepcidr*ips ipv4ttl.csv
do
cut -d, -f9 $i | sort -n | uniq -c | grep -v "amplitude" > $i.$ext &
done
wait
for i in ipv4ttl*grepcidr*ips ipv4ttl.csv
do
../process.sh $i.$ext &
done
wait
git add ../tb-v5c/ipv4ttl*$ext
cd ../tb-v5b
%%bash
head -n 2 ipv4ttl.csv.cut9.sortn.uniqc
import numpy as np
from matplotlib import rc
rc('text', usetex=False)
printit=False
file="../tb-v5c/ipv4ttl.csv.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',', skip_header=0, dtype='int64')# , max_rows=10000);
if(printit):
print(sum(array[:,0]))
print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'r-',label="all IPs");
file="../tb-v5c/ipv4ttl.csv-grepcidr-ipv4ttl.csv.gr1pkt.gr1ttl.ips.cut9.sortn.uniqc"
array61=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
if(printit):
print(sum(array61[:,0]))
print(vectortocdf(array61)[0:10]);
plt.plot(array61[:,1],vectortocdf(array61),'g-o',markevery=[1,2,4,8,16,32],label="IPs $>$ 1ttl");
file="../tb-v5c/ipv4ttl.csv-grepcidr-ipv4ttl.csv.gr1pkt.gr1ttl.eq1ab.ips.cut9.sortn.uniqc"
array62=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
plt.plot(array62[:,1],vectortocdf(array62),'b-^',markevery=[1,2,4,8,16,32],label="$>$ 1ttl,$=$1 bin");
if(printit):
print(sum(array62[:,0]))
print(vectortocdf(array62)[0:10]);
file="../tb-v5c/ipv4ttl.csv-grepcidr-ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.no-overlaps.ips.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
if(printit):
print(sum(array[:,0]))
print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'k--',label="$>$1ttl,$>$1 bin, no overlaps");
file="../tb-v5c/ipv4ttl.csv-grepcidr-ipv4ttl.csv.gr1pkt.gr1ttl.gr1ab.overlaps.ips.cut9.sortn.uniqc"
array=genfromtxt(file, delimiter=',' , skip_header=0, dtype='int64')# , max_rows=10000);
if(printit):
print(sum(array[:,0]))
print(vectortocdf(array)[0:10]);
plt.plot(array[:,1],vectortocdf(array),'k-x',markevery=[1,2,4,8,16,32],label="$>$1ttl,$>$1 bin, overlaps");
ax=plt.gca();
plt.yticks(np.arange(0,1.1,0.1),np.arange(0,1.1,0.1));
plt.xticks([0,32,64,128,192,255])
plt.ylabel('ECDF: P[x$\leq$X]');
plt.xlabel('TTL amplitude (max - min) per IPv4 address');
ax.set_xscale('symlog',basex=2,linthreshx=2);
ax.set_xlim(xmin=0,xmax=255);
ax.set_ylim(ymin=0,ymax=1);
labelsy = [0,1,2,4,8,16,32,64,128,192];
plt.xticks(labelsy,labelsy)
plt.legend(loc="lower right",prop={'size':10});
plt.gcf().subplots_adjust(bottom=0.11,right=0.97,left=0.11,top=0.96)
plt.savefig('../../figures/amplitude-ttl-ipv4.pdf', format='pdf', dpi=2000)
%%bash
pdfcrop ../../figures/amplitude-ttl-ipv4.pdf
git add ../../figures/amplitude-ttl-ipv4-crop.pdf
rm ../../figures/amplitude-ttl-ipv4.pdf