import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes=True)
%matplotlib inline
#input file
#[prefix, mean(hopcount), ASPathLength, min(hopcount), max(hopcount), var(hopcount), stdev(hopcount)]
# files at leavenworth:/srv/2016-ba-wickenheiser-scheitle/bgpttl/result
#ipv6 file
filename2 = "bgpttl_20160319-073130.csv"
#ipv4 file
filename1 = "bgpttl_20160321-102030.csv"
#create dataframes
df1 = pd.read_csv(filename1)
df2 = pd.read_csv(filename2)
#rename keys cause they are a bit messed up in the original csv file (optional)
df1.columns=["prefix", "mean(hopcount)", "ASPathLength", "min(hopcount)", "max(hopcount)", "var(hopcount)", "stdev(hopcount)"]
df2.columns=["prefix", "mean(hopcount)", "ASPathLength", "min(hopcount)", "max(hopcount)", "var(hopcount)", "stdev(hopcount)"]
#add column for IPv
df1["IP Version"] = "IPv4"
df2["IP Version"] = "IPv6"
#combine dataframes for split violin plot
combined = df1.append(df2)
fig, ax = plt.subplots()
ax= sns.violinplot(x="ASPathLength", y="mean(hopcount)", hue="IP Version", data=combined, split=False, inner="box")#,scale="count")
ax.set(xlabel='AS Path Length', ylabel='Mean Hop Count per Prefix (capped at 40)')
# this would add regression lines
#sns.regplot(x="ASPathLength", y="mean(hopcount)" , ci= None, scatter=False, data=df1, color="g")
#sns.regplot(x="ASPathLength", y="mean(hopcount)" , ci= None, scatter=False, data=df2, color="b")
ax.set_ylim(ymin=0,ymax=40);#,xmax=100000);