commit c27c4ce1f3bcc2d4ace8c1829b7398b59ea13d7e
parent ac1b912b333a7ab20950568d030e66da45a7f118
Author: Andrew Laack <andrew@laack.co>
Date: Thu, 4 Jun 2026 01:20:11 -0500
Updated scoring (-1 -> 6 for answer index to make graphs nicer for now), added graphing script, updated query wording
Diffstat:
3 files changed, 81 insertions(+), 15 deletions(-)
diff --git a/python/search-engines/graph.py b/python/search-engines/graph.py
@@ -0,0 +1,63 @@
+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+
+df = pd.read_csv('search.csv')
+
+engines = {
+ 'startpage': 'blue',
+ 'ddg': 'yellow',
+ 'brave_search': 'red',
+}
+
+# query,engine,time,captcha_hit,captcha_time,pow_captcha,slop_sites_top_5,answer_index,unrelated_sites
+records = df.to_records(index=False).tolist()
+
+means = {}
+
+for record in records:
+ engine = record[1]
+ slop_sites = record[6]
+ answer_index = record[7]
+ unrelated_sites = record[8]
+
+
+ # if unrealted sites is nan, skip
+ if np.isnan(unrelated_sites):
+ continue
+
+ if engine not in means:
+ means[engine] = {
+ 'slop_sites': 0,
+ 'answer_index': 0,
+ 'unrelated_sites': 0,
+ 'count' : 0
+ }
+
+ means[engine]["count"] += 1
+ means[engine]["slop_sites"] += slop_sites
+ means[engine]["answer_index"] += answer_index
+ means[engine]["unrelated_sites"] += unrelated_sites
+
+for engine in means:
+ means[engine]["slop_sites"] /= means[engine]["count"]
+ means[engine]["answer_index"] /= means[engine]["count"]
+ means[engine]["unrelated_sites"] /= means[engine]["count"]
+
+
+plt.bar(means.keys(), [means[engine]["slop_sites"] for engine in means], color=[engines[engine] for engine in means], label='Slop Sites')
+plt.title('Slop Sites')
+plt.show()
+
+plt.bar(means.keys(), [means[engine]["answer_index"] for engine in means], color=[engines[engine] for engine in means], label='Answer Index')
+plt.title('Answer Index')
+plt.show()
+
+plt.bar(means.keys(), [means[engine]["unrelated_sites"] for engine in means], color=[engines[engine] for engine in means])
+plt.title('Unrelated Sites')
+plt.show()
+
+
+print(means['startpage'])
+print(means['brave_search'])
+print(means['ddg'])
diff --git a/python/search-engines/query.py b/python/search-engines/query.py
@@ -52,7 +52,7 @@ for engine in Engine:
captcha = input("Did you see a captcha? (y/n): ")
if captcha == 'y':
- query.captcha_time = float(input("How many seconds did it take to solve (-1 means failed to solve): "))
+ query.captcha_time = float(input("How many seconds did it take to solve (6 means failed to solve): "))
query.captcha_hit = True
query.pow_captcha = input("Was the captcha a PoW captcha? (y/n): ") == "y"
else:
diff --git a/python/search-engines/search.csv b/python/search-engines/search.csv
@@ -1,19 +1,19 @@
query,engine,time,captcha_hit,captcha_time,pow_captcha,slop_sites_top_5,answer_index,unrelated_sites
"fim language models ""open weight"" code completion",startpage,1780355824.6081214,False,0,False,0,2
-"fim language models ""open weight"" code completion",brave_search,1780355824.6081214,False,0,False,0,-1
-"fim language models ""open weight"" code completion",ddg,1780355824.6081214,False,0,False,0,-1
+"fim language models ""open weight"" code completion",brave_search,1780355824.6081214,False,0,False,0,6
+"fim language models ""open weight"" code completion",ddg,1780355824.6081214,False,0,False,0,6
Granite4.0 code completion model,startpage,1780356056.7660637,False,0,False,0,1
Granite4.0 code completion model,brave_search,1780356056.7660637,False,0,False,0,1
Granite4.0 code completion model,ddg,1780356056.7660637,False,0,False,0,1
mellum fim vs qwen2.5-coder-3b code completion model benchmarks,startpage,1780356578.8307145,False,0,False,0,2
mellum fim vs qwen2.5-coder-3b code completion model benchmarks,brave_search,1780356578.8307145,False,0,False,1,5
-mellum fim vs qwen2.5-coder-3b code completion model benchmarks,ddg,1780356578.8307145,False,0,False,2,-1
-supermaven / cursor models vs mellum,startpage,1780357180.0816693,False,0,False,1,-1
-supermaven / cursor models vs mellum,brave_search,1780357180.0816693,False,0,False,2,-1
-supermaven / cursor models vs mellum,ddg,1780357180.0816693,False,0,False,4,-1
+mellum fim vs qwen2.5-coder-3b code completion model benchmarks,ddg,1780356578.8307145,False,0,False,2,6
+supermaven / cursor models vs mellum,startpage,1780357180.0816693,False,0,False,1,6
+supermaven / cursor models vs mellum,brave_search,1780357180.0816693,False,0,False,2,6
+supermaven / cursor models vs mellum,ddg,1780357180.0816693,False,0,False,4,6
code completion models economic impact,startpage,1780357606.6343193,False,0,False,1,1
-code completion models economic impact,brave_search,1780357606.6343193,False,0,False,0,-1
-code completion models economic impact,ddg,1780357606.6343193,False,0,False,2,-1
+code completion models economic impact,brave_search,1780357606.6343193,False,0,False,0,6
+code completion models economic impact,ddg,1780357606.6343193,False,0,False,2,6
tagging markdown files,startpage,1780367477.1498914,False,0,False,1,1
tagging markdown files,brave_search,1780367477.1498914,False,0,False,2,1
tagging markdown files,ddg,1780367477.1498914,False,0,False,1,3
@@ -32,15 +32,15 @@ mellum 2,ddg,1780414675.3085477,False,0,False,1,1,0
jetbrains coding models,startpage,1780424305.6205204,False,0,False,0,2,0
jetbrains coding models,brave_search,1780424305.6205204,False,0,False,0,3,0
jetbrains coding models,ddg,1780424305.6205204,False,0,False,2,2,0
-jetbrains valuation,startpage,1780425787.8044605,False,0,False,2,-1,0
-jetbrains valuation,brave_search,1780425787.8044605,False,0,False,3,-1,0
-jetbrains valuation,ddg,1780425787.8044605,False,0,False,2,-1,0
-don't call iti freedom,startpage,1780426156.7134058,False,0,False,3,-1,5
-don't call iti freedom,brave_search,1780426156.7134058,False,0,False,2,-1,5
+jetbrains valuation,startpage,1780425787.8044605,False,0,False,2,6,0
+jetbrains valuation,brave_search,1780425787.8044605,False,0,False,3,6,0
+jetbrains valuation,ddg,1780425787.8044605,False,0,False,2,6,0
+don't call iti freedom,startpage,1780426156.7134058,False,0,False,3,6,5
+don't call iti freedom,brave_search,1780426156.7134058,False,0,False,2,6,5
don't call iti freedom,ddg,1780426156.7134058,False,0,False,1,1,3
"inference cost for running small LLMs (""4b""|""8b""|""3b""|""1.5b"")",startpage,1780437841.4090471,False,0,False,1,2,4
"inference cost for running small LLMs (""4b""|""8b""|""3b""|""1.5b"")",brave_search,1780437841.4090471,False,0,False,2,1,4
-"inference cost for running small LLMs (""4b""|""8b""|""3b""|""1.5b"")",ddg,1780437841.4090471,False,0,False,2,-1,5
+"inference cost for running small LLMs (""4b""|""8b""|""3b""|""1.5b"")",ddg,1780437841.4090471,False,0,False,2,6,5
code-completion-infill benchmarks,startpage,1780440226.2587073,False,0,False,0,1,0
code-completion-infill benchmarks,brave_search,1780440226.2587073,False,0,False,0,1,0
code-completion-infill benchmarks,ddg,1780440226.2587073,False,0,False,2,2,2
@@ -59,3 +59,6 @@ librewolf anti-fingerprinting,ddg,1780499870.63678,False,0,False,3,1,3
browser fingerprinting privacy guides,startpage,1780500464.2163224,False,0,False,1,1,1
browser fingerprinting privacy guides,brave_search,1780500464.2163224,False,0,False,2,2,3
browser fingerprinting privacy guides,ddg,1780500464.2163224,False,0,False,1,2,1
+context length qwen 2.5 coder 7b,startpage,1780551836.1093357,True,6.0,False,0,1,0
+context length qwen 2.5 coder 7b,brave_search,1780551836.1093357,False,0,False,0,1,0
+context length qwen 2.5 coder 7b,ddg,1780551836.1093357,False,0,False,2,1,0