blog

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

commit d4ba2fb57c92450400e753c64235fb5e0ec3896c
parent 00097e7b854af12fc4eca332cf5662a76f13a37f
Author: Andrew Laack <andrew@laack.co>
Date:   Thu, 18 Sep 2025 23:57:32 -0500

Merge branch 'master' of ssh://brgr:/home/shared/git/public-repos/blog

Diffstat:
Mpython/youtube/analysis/size_analysis.py | 32++++++++++++++++++++++++++------
1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/python/youtube/analysis/size_analysis.py b/python/youtube/analysis/size_analysis.py @@ -44,10 +44,9 @@ def convert_to_bytes(str_size : str): return size sizes = df['size'].apply(convert_to_bytes) - +urls = df['url'] sizes = sizes.to_list() - def bytes_to_larger(count_bytes : float): cuts = 0 while count_bytes > 1000: @@ -71,22 +70,43 @@ def bytes_to_larger(count_bytes : float): return str(count_bytes) + " " + suffix +largest_size_by_url = {} + +for index in range(len(sizes)): + current_largest = largest_size_by_url.get(urls[index], -1) + current = sizes[index] + if current > current_largest: + largest_size_by_url[urls[index]] = current +sum_size_by_url = {} +for index in range(len(sizes)): + current = sum_size_by_url.get(urls[index], 0) + current += sizes[index] + sum_size_by_url[urls[index]] = current + +print(len(largest_size_by_url)) def mean(sizes): mean_val = sum(sizes) / len(sizes) return mean_val - - sum_bytes = sum(sizes) sum_str = bytes_to_larger(sum_bytes) mean_val = mean(sizes) mean_str = bytes_to_larger(mean_val) +mean_sum_size = mean(sum_size_by_url.values()) + +mean_val_largest = mean(largest_size_by_url.values()) +mean_str_largest = bytes_to_larger(mean_val_largest) +mean_sum_str = bytes_to_larger(mean_sum_size) + + +print("Average video size across all resolutions for each video: " + mean_str) +print("Average largest video size across videos: " + mean_str_largest) -print("Average video size: " + mean_str) +print("Average sum size across all videos and formats: " + mean_sum_str) print("Sum of all videos queried: " + sum_str) @@ -102,4 +122,4 @@ for size in sizes: itr += 1 plt.plot(itrs, running_means) -plt.show() +#plt.show()