commit d4ba2fb57c92450400e753c64235fb5e0ec3896c
parent 00097e7b854af12fc4eca332cf5662a76f13a37f
Author: Andrew Laack <andrew@laack.co>
Date: Thu, 18 Sep 2025 23:57:32 -0500
Merge branch 'master' of ssh://brgr:/home/shared/git/public-repos/blog
Diffstat:
1 file changed, 26 insertions(+), 6 deletions(-)
diff --git a/python/youtube/analysis/size_analysis.py b/python/youtube/analysis/size_analysis.py
@@ -44,10 +44,9 @@ def convert_to_bytes(str_size : str):
return size
sizes = df['size'].apply(convert_to_bytes)
-
+urls = df['url']
sizes = sizes.to_list()
-
def bytes_to_larger(count_bytes : float):
cuts = 0
while count_bytes > 1000:
@@ -71,22 +70,43 @@ def bytes_to_larger(count_bytes : float):
return str(count_bytes) + " " + suffix
+largest_size_by_url = {}
+
+for index in range(len(sizes)):
+ current_largest = largest_size_by_url.get(urls[index], -1)
+ current = sizes[index]
+ if current > current_largest:
+ largest_size_by_url[urls[index]] = current
+sum_size_by_url = {}
+for index in range(len(sizes)):
+ current = sum_size_by_url.get(urls[index], 0)
+ current += sizes[index]
+ sum_size_by_url[urls[index]] = current
+
+print(len(largest_size_by_url))
def mean(sizes):
mean_val = sum(sizes) / len(sizes)
return mean_val
-
-
sum_bytes = sum(sizes)
sum_str = bytes_to_larger(sum_bytes)
mean_val = mean(sizes)
mean_str = bytes_to_larger(mean_val)
+mean_sum_size = mean(sum_size_by_url.values())
+
+mean_val_largest = mean(largest_size_by_url.values())
+mean_str_largest = bytes_to_larger(mean_val_largest)
+mean_sum_str = bytes_to_larger(mean_sum_size)
+
+
+print("Average video size across all resolutions for each video: " + mean_str)
+print("Average largest video size across videos: " + mean_str_largest)
-print("Average video size: " + mean_str)
+print("Average sum size across all videos and formats: " + mean_sum_str)
print("Sum of all videos queried: " + sum_str)
@@ -102,4 +122,4 @@ for size in sizes:
itr += 1
plt.plot(itrs, running_means)
-plt.show()
+#plt.show()