graph.py (1746B)
1 import matplotlib.pyplot as plt 2 import pandas as pd 3 import numpy as np 4 5 df = pd.read_csv('search.csv') 6 7 engines = { 8 'startpage': 'blue', 9 'ddg': 'yellow', 10 'brave_search': 'red', 11 } 12 13 # query,engine,time,captcha_hit,captcha_time,pow_captcha,slop_sites_top_5,answer_index,unrelated_sites 14 records = df.to_records(index=False).tolist() 15 16 means = {} 17 18 for record in records: 19 engine = record[1] 20 slop_sites = record[6] 21 answer_index = record[7] 22 unrelated_sites = record[8] 23 24 25 # if unrealted sites is nan, skip 26 if np.isnan(unrelated_sites): 27 continue 28 29 if engine not in means: 30 means[engine] = { 31 'slop_sites': 0, 32 'answer_index': 0, 33 'unrelated_sites': 0, 34 'count' : 0 35 } 36 37 means[engine]["count"] += 1 38 means[engine]["slop_sites"] += slop_sites 39 means[engine]["answer_index"] += answer_index 40 means[engine]["unrelated_sites"] += unrelated_sites 41 42 for engine in means: 43 means[engine]["slop_sites"] /= means[engine]["count"] 44 means[engine]["answer_index"] /= means[engine]["count"] 45 means[engine]["unrelated_sites"] /= means[engine]["count"] 46 47 48 plt.bar(means.keys(), [means[engine]["slop_sites"] for engine in means], color=[engines[engine] for engine in means], label='Slop Sites') 49 plt.title('Slop Sites') 50 plt.show() 51 52 plt.bar(means.keys(), [means[engine]["answer_index"] for engine in means], color=[engines[engine] for engine in means], label='Answer Index') 53 plt.title('Answer Index') 54 plt.show() 55 56 plt.bar(means.keys(), [means[engine]["unrelated_sites"] for engine in means], color=[engines[engine] for engine in means]) 57 plt.title('Unrelated Sites') 58 plt.show() 59 60 61 print(means['startpage']) 62 print(means['brave_search']) 63 print(means['ddg'])