blog

Personal blog
git clone git://git.laack.co/blog.git
Log | Files | Refs

graph.py (1746B)


      1 import matplotlib.pyplot as plt
      2 import pandas as pd
      3 import numpy as np
      4 
      5 df = pd.read_csv('search.csv')
      6 
      7 engines = {
      8     'startpage': 'blue',
      9     'ddg': 'yellow',
     10     'brave_search': 'red',
     11 }
     12 
     13 # query,engine,time,captcha_hit,captcha_time,pow_captcha,slop_sites_top_5,answer_index,unrelated_sites
     14 records = df.to_records(index=False).tolist()
     15 
     16 means = {}
     17 
     18 for record in records:
     19     engine = record[1]
     20     slop_sites = record[6]
     21     answer_index = record[7]
     22     unrelated_sites = record[8]
     23 
     24 
     25     # if unrealted sites is nan, skip
     26     if np.isnan(unrelated_sites):
     27         continue
     28 
     29     if engine not in means:
     30         means[engine] = {
     31             'slop_sites': 0,
     32             'answer_index': 0,
     33             'unrelated_sites': 0,
     34             'count' : 0
     35         }
     36 
     37     means[engine]["count"] += 1
     38     means[engine]["slop_sites"] += slop_sites
     39     means[engine]["answer_index"] += answer_index
     40     means[engine]["unrelated_sites"] += unrelated_sites
     41 
     42 for engine in means:
     43     means[engine]["slop_sites"] /= means[engine]["count"]
     44     means[engine]["answer_index"] /= means[engine]["count"]
     45     means[engine]["unrelated_sites"] /= means[engine]["count"]
     46 
     47 
     48 plt.bar(means.keys(), [means[engine]["slop_sites"] for engine in means], color=[engines[engine] for engine in means], label='Slop Sites')
     49 plt.title('Slop Sites')
     50 plt.show()
     51 
     52 plt.bar(means.keys(), [means[engine]["answer_index"] for engine in means], color=[engines[engine] for engine in means], label='Answer Index')
     53 plt.title('Answer Index')
     54 plt.show()
     55 
     56 plt.bar(means.keys(), [means[engine]["unrelated_sites"] for engine in means], color=[engines[engine] for engine in means])
     57 plt.title('Unrelated Sites')
     58 plt.show()
     59 
     60 
     61 print(means['startpage'])
     62 print(means['brave_search'])
     63 print(means['ddg'])