blog

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

commit 00f4e2398d8e92054d06f99cc87354b6c3e6ae12
parent 751210ca6a7e034224677b112802e1fddd5d54c4
Author: andrew.laack <andrew.laack@imbue.com>
Date:   Wed, 17 Sep 2025 10:03:52 -0700

Added more scripts, did some more analysis, started probing for all bitrates.

Diffstat:
M.gitignore | 1+
Apython/youtube/analysis/urls.py | 30++++++++++++++++++++++++++++++
Rpython/youtube/bitrate.txt -> python/youtube/findings/bitrate.txt | 0
Apython/youtube/findings/bitrate_summary.txt | 12++++++++++++
Rpython/youtube/duration.txt -> python/youtube/findings/duration.txt | 0
Apython/youtube/findings/eval_bitrate.py | 828+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apython/youtube/findings/math.md | 20++++++++++++++++++++
Apython/youtube/urls/read_and_check.py | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apython/youtube/urls/start.sh | 15+++++++++++++++
9 files changed, 973 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -3,3 +3,4 @@ *.png *.pdf *.jpg +python/youtube/urls/urls.txt diff --git a/python/youtube/analysis/urls.py b/python/youtube/analysis/urls.py @@ -0,0 +1,30 @@ +import pandas as pd +import matplotlib.pyplot as plt +import os +from collections import Counter + +# Find all CSV files recursively under "results" +files = [os.path.join(dp, f) for dp, _, filenames in os.walk("results") for f in filenames if os.path.splitext(f)[1] == '.csv'] + +urls = set() + +for file in files: + print(f'Processing {file}') + df = pd.read_csv(file, low_memory=False) + + if 'url' not in df.columns: + print("Warning: 'url' column not found in this file") + continue + + df = df[~df['url'].isin(urls)] + df = df.drop_duplicates(subset=['url'], keep='first') + + new_urls = df['url'].dropna() + urls.update(new_urls) + + print(f"Added {len(new_urls)} more urls.") + +print(f"Total unique URL processed: {len(urls)}") + +for url in urls: + print(url) diff --git a/python/youtube/bitrate.txt b/python/youtube/findings/bitrate.txt diff --git a/python/youtube/findings/bitrate_summary.txt b/python/youtube/findings/bitrate_summary.txt @@ -0,0 +1,12 @@ + bitrate_kbps count +count 810.000000 810.000000 +mean 520.169136 6.241975 +std 253.956343 13.759122 +min 62.000000 1.000000 +25% 311.250000 2.000000 +50% 513.500000 4.000000 +75% 715.750000 7.000000 +max 1376.000000 162.000000 +Most common bitrate: bitrate_kbps 597 +count 162 +Name: 0, dtype: int64 diff --git a/python/youtube/duration.txt b/python/youtube/findings/duration.txt diff --git a/python/youtube/findings/eval_bitrate.py b/python/youtube/findings/eval_bitrate.py @@ -0,0 +1,828 @@ + +import re +import pandas as pd +import matplotlib.pyplot as plt + +data_str = """ +597 kbps: 162 +598 kbps: 154 +596 kbps: 143 +599 kbps: 140 +600 kbps: 121 +601 kbps: 110 +595 kbps: 108 +602 kbps: 98 +594 kbps: 93 +603 kbps: 63 +604 kbps: 46 +593 kbps: 45 +605 kbps: 38 +606 kbps: 38 +592 kbps: 27 +607 kbps: 27 +608 kbps: 27 +610 kbps: 26 +591 kbps: 24 +609 kbps: 19 +611 kbps: 18 +383 kbps: 16 +585 kbps: 16 +309 kbps: 15 +590 kbps: 14 +207 kbps: 13 +310 kbps: 13 +373 kbps: 13 +484 kbps: 13 +588 kbps: 13 +612 kbps: 13 +399 kbps: 12 +423 kbps: 12 +536 kbps: 12 +575 kbps: 12 +738 kbps: 12 +321 kbps: 11 +350 kbps: 11 +371 kbps: 11 +384 kbps: 11 +415 kbps: 11 +419 kbps: 11 +422 kbps: 11 +476 kbps: 11 +526 kbps: 11 +541 kbps: 11 +550 kbps: 11 +574 kbps: 11 +587 kbps: 11 +613 kbps: 11 +617 kbps: 11 +620 kbps: 11 +624 kbps: 11 +285 kbps: 10 +322 kbps: 10 +327 kbps: 10 +336 kbps: 10 +358 kbps: 10 +380 kbps: 10 +388 kbps: 10 +393 kbps: 10 +397 kbps: 10 +429 kbps: 10 +441 kbps: 10 +456 kbps: 10 +497 kbps: 10 +507 kbps: 10 +513 kbps: 10 +514 kbps: 10 +551 kbps: 10 +553 kbps: 10 +554 kbps: 10 +564 kbps: 10 +580 kbps: 10 +650 kbps: 10 +209 kbps: 9 +296 kbps: 9 +325 kbps: 9 +328 kbps: 9 +349 kbps: 9 +357 kbps: 9 +363 kbps: 9 +366 kbps: 9 +394 kbps: 9 +425 kbps: 9 +427 kbps: 9 +431 kbps: 9 +433 kbps: 9 +437 kbps: 9 +463 kbps: 9 +485 kbps: 9 +491 kbps: 9 +503 kbps: 9 +505 kbps: 9 +508 kbps: 9 +511 kbps: 9 +523 kbps: 9 +543 kbps: 9 +545 kbps: 9 +548 kbps: 9 +569 kbps: 9 +589 kbps: 9 +618 kbps: 9 +621 kbps: 9 +629 kbps: 9 +631 kbps: 9 +646 kbps: 9 +688 kbps: 9 +208 kbps: 8 +256 kbps: 8 +257 kbps: 8 +286 kbps: 8 +287 kbps: 8 +295 kbps: 8 +306 kbps: 8 +334 kbps: 8 +335 kbps: 8 +337 kbps: 8 +338 kbps: 8 +356 kbps: 8 +360 kbps: 8 +362 kbps: 8 +387 kbps: 8 +389 kbps: 8 +391 kbps: 8 +400 kbps: 8 +403 kbps: 8 +411 kbps: 8 +421 kbps: 8 +426 kbps: 8 +430 kbps: 8 +444 kbps: 8 +445 kbps: 8 +455 kbps: 8 +487 kbps: 8 +490 kbps: 8 +493 kbps: 8 +498 kbps: 8 +517 kbps: 8 +531 kbps: 8 +542 kbps: 8 +544 kbps: 8 +546 kbps: 8 +547 kbps: 8 +568 kbps: 8 +622 kbps: 8 +626 kbps: 8 +632 kbps: 8 +644 kbps: 8 +671 kbps: 8 +692 kbps: 8 +733 kbps: 8 +734 kbps: 8 +248 kbps: 7 +260 kbps: 7 +273 kbps: 7 +294 kbps: 7 +300 kbps: 7 +302 kbps: 7 +307 kbps: 7 +313 kbps: 7 +314 kbps: 7 +316 kbps: 7 +317 kbps: 7 +323 kbps: 7 +326 kbps: 7 +342 kbps: 7 +367 kbps: 7 +376 kbps: 7 +392 kbps: 7 +398 kbps: 7 +407 kbps: 7 +409 kbps: 7 +428 kbps: 7 +436 kbps: 7 +442 kbps: 7 +443 kbps: 7 +448 kbps: 7 +452 kbps: 7 +458 kbps: 7 +465 kbps: 7 +467 kbps: 7 +470 kbps: 7 +471 kbps: 7 +473 kbps: 7 +474 kbps: 7 +479 kbps: 7 +480 kbps: 7 +481 kbps: 7 +494 kbps: 7 +504 kbps: 7 +510 kbps: 7 +516 kbps: 7 +518 kbps: 7 +521 kbps: 7 +527 kbps: 7 +528 kbps: 7 +538 kbps: 7 +559 kbps: 7 +560 kbps: 7 +582 kbps: 7 +615 kbps: 7 +623 kbps: 7 +628 kbps: 7 +634 kbps: 7 +640 kbps: 7 +647 kbps: 7 +649 kbps: 7 +677 kbps: 7 +687 kbps: 7 +843 kbps: 7 +204 kbps: 6 +245 kbps: 6 +279 kbps: 6 +298 kbps: 6 +315 kbps: 6 +329 kbps: 6 +330 kbps: 6 +333 kbps: 6 +340 kbps: 6 +344 kbps: 6 +346 kbps: 6 +348 kbps: 6 +355 kbps: 6 +361 kbps: 6 +364 kbps: 6 +370 kbps: 6 +379 kbps: 6 +382 kbps: 6 +395 kbps: 6 +401 kbps: 6 +402 kbps: 6 +405 kbps: 6 +408 kbps: 6 +412 kbps: 6 +414 kbps: 6 +416 kbps: 6 +420 kbps: 6 +424 kbps: 6 +438 kbps: 6 +439 kbps: 6 +440 kbps: 6 +446 kbps: 6 +453 kbps: 6 +462 kbps: 6 +466 kbps: 6 +468 kbps: 6 +478 kbps: 6 +486 kbps: 6 +496 kbps: 6 +499 kbps: 6 +500 kbps: 6 +501 kbps: 6 +506 kbps: 6 +524 kbps: 6 +535 kbps: 6 +537 kbps: 6 +539 kbps: 6 +552 kbps: 6 +557 kbps: 6 +561 kbps: 6 +562 kbps: 6 +571 kbps: 6 +572 kbps: 6 +581 kbps: 6 +583 kbps: 6 +584 kbps: 6 +586 kbps: 6 +619 kbps: 6 +625 kbps: 6 +638 kbps: 6 +642 kbps: 6 +654 kbps: 6 +656 kbps: 6 +673 kbps: 6 +680 kbps: 6 +691 kbps: 6 +698 kbps: 6 +701 kbps: 6 +703 kbps: 6 +714 kbps: 6 +717 kbps: 6 +728 kbps: 6 +729 kbps: 6 +730 kbps: 6 +737 kbps: 6 +739 kbps: 6 +746 kbps: 6 +761 kbps: 6 +772 kbps: 6 +845 kbps: 6 +103 kbps: 5 +146 kbps: 5 +159 kbps: 5 +182 kbps: 5 +199 kbps: 5 +201 kbps: 5 +202 kbps: 5 +205 kbps: 5 +222 kbps: 5 +231 kbps: 5 +243 kbps: 5 +247 kbps: 5 +255 kbps: 5 +266 kbps: 5 +269 kbps: 5 +274 kbps: 5 +275 kbps: 5 +280 kbps: 5 +293 kbps: 5 +301 kbps: 5 +303 kbps: 5 +304 kbps: 5 +311 kbps: 5 +319 kbps: 5 +324 kbps: 5 +347 kbps: 5 +353 kbps: 5 +365 kbps: 5 +368 kbps: 5 +369 kbps: 5 +372 kbps: 5 +381 kbps: 5 +390 kbps: 5 +396 kbps: 5 +404 kbps: 5 +410 kbps: 5 +418 kbps: 5 +432 kbps: 5 +434 kbps: 5 +435 kbps: 5 +447 kbps: 5 +449 kbps: 5 +450 kbps: 5 +454 kbps: 5 +460 kbps: 5 +464 kbps: 5 +469 kbps: 5 +472 kbps: 5 +475 kbps: 5 +482 kbps: 5 +492 kbps: 5 +509 kbps: 5 +525 kbps: 5 +532 kbps: 5 +549 kbps: 5 +556 kbps: 5 +565 kbps: 5 +567 kbps: 5 +573 kbps: 5 +579 kbps: 5 +616 kbps: 5 +627 kbps: 5 +633 kbps: 5 +636 kbps: 5 +637 kbps: 5 +648 kbps: 5 +651 kbps: 5 +655 kbps: 5 +658 kbps: 5 +670 kbps: 5 +672 kbps: 5 +674 kbps: 5 +696 kbps: 5 +699 kbps: 5 +713 kbps: 5 +719 kbps: 5 +723 kbps: 5 +732 kbps: 5 +836 kbps: 5 +839 kbps: 5 +844 kbps: 5 +854 kbps: 5 +856 kbps: 5 +108 kbps: 4 +123 kbps: 4 +138 kbps: 4 +163 kbps: 4 +175 kbps: 4 +188 kbps: 4 +211 kbps: 4 +213 kbps: 4 +215 kbps: 4 +220 kbps: 4 +226 kbps: 4 +229 kbps: 4 +235 kbps: 4 +236 kbps: 4 +237 kbps: 4 +242 kbps: 4 +254 kbps: 4 +258 kbps: 4 +264 kbps: 4 +271 kbps: 4 +283 kbps: 4 +291 kbps: 4 +312 kbps: 4 +331 kbps: 4 +341 kbps: 4 +352 kbps: 4 +359 kbps: 4 +377 kbps: 4 +406 kbps: 4 +417 kbps: 4 +451 kbps: 4 +457 kbps: 4 +459 kbps: 4 +489 kbps: 4 +502 kbps: 4 +512 kbps: 4 +515 kbps: 4 +522 kbps: 4 +555 kbps: 4 +558 kbps: 4 +566 kbps: 4 +570 kbps: 4 +576 kbps: 4 +577 kbps: 4 +614 kbps: 4 +630 kbps: 4 +635 kbps: 4 +643 kbps: 4 +653 kbps: 4 +660 kbps: 4 +661 kbps: 4 +669 kbps: 4 +675 kbps: 4 +678 kbps: 4 +693 kbps: 4 +694 kbps: 4 +695 kbps: 4 +706 kbps: 4 +708 kbps: 4 +711 kbps: 4 +731 kbps: 4 +757 kbps: 4 +758 kbps: 4 +762 kbps: 4 +774 kbps: 4 +788 kbps: 4 +803 kbps: 4 +811 kbps: 4 +830 kbps: 4 +847 kbps: 4 +850 kbps: 4 +871 kbps: 4 +874 kbps: 4 +109 kbps: 3 +114 kbps: 3 +129 kbps: 3 +136 kbps: 3 +139 kbps: 3 +140 kbps: 3 +141 kbps: 3 +164 kbps: 3 +165 kbps: 3 +167 kbps: 3 +173 kbps: 3 +174 kbps: 3 +179 kbps: 3 +180 kbps: 3 +181 kbps: 3 +185 kbps: 3 +192 kbps: 3 +193 kbps: 3 +195 kbps: 3 +216 kbps: 3 +224 kbps: 3 +233 kbps: 3 +234 kbps: 3 +240 kbps: 3 +241 kbps: 3 +251 kbps: 3 +259 kbps: 3 +261 kbps: 3 +263 kbps: 3 +270 kbps: 3 +277 kbps: 3 +281 kbps: 3 +282 kbps: 3 +288 kbps: 3 +305 kbps: 3 +308 kbps: 3 +318 kbps: 3 +339 kbps: 3 +343 kbps: 3 +354 kbps: 3 +375 kbps: 3 +378 kbps: 3 +413 kbps: 3 +477 kbps: 3 +483 kbps: 3 +519 kbps: 3 +520 kbps: 3 +530 kbps: 3 +533 kbps: 3 +563 kbps: 3 +578 kbps: 3 +645 kbps: 3 +663 kbps: 3 +665 kbps: 3 +667 kbps: 3 +668 kbps: 3 +679 kbps: 3 +681 kbps: 3 +682 kbps: 3 +684 kbps: 3 +685 kbps: 3 +690 kbps: 3 +700 kbps: 3 +704 kbps: 3 +705 kbps: 3 +71 kbps: 3 +710 kbps: 3 +712 kbps: 3 +718 kbps: 3 +727 kbps: 3 +735 kbps: 3 +736 kbps: 3 +743 kbps: 3 +755 kbps: 3 +756 kbps: 3 +769 kbps: 3 +777 kbps: 3 +780 kbps: 3 +787 kbps: 3 +792 kbps: 3 +800 kbps: 3 +801 kbps: 3 +808 kbps: 3 +823 kbps: 3 +827 kbps: 3 +834 kbps: 3 +840 kbps: 3 +841 kbps: 3 +848 kbps: 3 +849 kbps: 3 +853 kbps: 3 +861 kbps: 3 +867 kbps: 3 +869 kbps: 3 +894 kbps: 3 +113 kbps: 2 +116 kbps: 2 +119 kbps: 2 +134 kbps: 2 +135 kbps: 2 +1375 kbps: 2 +142 kbps: 2 +144 kbps: 2 +149 kbps: 2 +153 kbps: 2 +155 kbps: 2 +156 kbps: 2 +157 kbps: 2 +166 kbps: 2 +177 kbps: 2 +183 kbps: 2 +186 kbps: 2 +189 kbps: 2 +194 kbps: 2 +200 kbps: 2 +203 kbps: 2 +206 kbps: 2 +210 kbps: 2 +212 kbps: 2 +214 kbps: 2 +217 kbps: 2 +225 kbps: 2 +228 kbps: 2 +238 kbps: 2 +239 kbps: 2 +246 kbps: 2 +249 kbps: 2 +250 kbps: 2 +262 kbps: 2 +267 kbps: 2 +268 kbps: 2 +272 kbps: 2 +276 kbps: 2 +278 kbps: 2 +289 kbps: 2 +292 kbps: 2 +297 kbps: 2 +299 kbps: 2 +320 kbps: 2 +351 kbps: 2 +374 kbps: 2 +385 kbps: 2 +386 kbps: 2 +461 kbps: 2 +488 kbps: 2 +495 kbps: 2 +529 kbps: 2 +534 kbps: 2 +540 kbps: 2 +641 kbps: 2 +652 kbps: 2 +659 kbps: 2 +666 kbps: 2 +676 kbps: 2 +686 kbps: 2 +689 kbps: 2 +707 kbps: 2 +709 kbps: 2 +715 kbps: 2 +721 kbps: 2 +724 kbps: 2 +725 kbps: 2 +726 kbps: 2 +741 kbps: 2 +742 kbps: 2 +748 kbps: 2 +750 kbps: 2 +754 kbps: 2 +759 kbps: 2 +763 kbps: 2 +765 kbps: 2 +766 kbps: 2 +768 kbps: 2 +771 kbps: 2 +775 kbps: 2 +779 kbps: 2 +786 kbps: 2 +789 kbps: 2 +790 kbps: 2 +793 kbps: 2 +794 kbps: 2 +799 kbps: 2 +804 kbps: 2 +805 kbps: 2 +806 kbps: 2 +807 kbps: 2 +81 kbps: 2 +815 kbps: 2 +816 kbps: 2 +817 kbps: 2 +818 kbps: 2 +824 kbps: 2 +825 kbps: 2 +828 kbps: 2 +837 kbps: 2 +838 kbps: 2 +842 kbps: 2 +846 kbps: 2 +851 kbps: 2 +852 kbps: 2 +855 kbps: 2 +86 kbps: 2 +863 kbps: 2 +870 kbps: 2 +872 kbps: 2 +884 kbps: 2 +898 kbps: 2 +902 kbps: 2 +904 kbps: 2 +907 kbps: 2 +922 kbps: 2 +927 kbps: 2 +93 kbps: 2 +933 kbps: 2 +953 kbps: 2 +101 kbps: 1 +1018 kbps: 1 +1031 kbps: 1 +104 kbps: 1 +106 kbps: 1 +1060 kbps: 1 +1076 kbps: 1 +1090 kbps: 1 +112 kbps: 1 +1147 kbps: 1 +115 kbps: 1 +117 kbps: 1 +1183 kbps: 1 +120 kbps: 1 +122 kbps: 1 +124 kbps: 1 +1257 kbps: 1 +126 kbps: 1 +1269 kbps: 1 +127 kbps: 1 +1298 kbps: 1 +130 kbps: 1 +1306 kbps: 1 +1319 kbps: 1 +137 kbps: 1 +1371 kbps: 1 +1376 kbps: 1 +143 kbps: 1 +147 kbps: 1 +151 kbps: 1 +152 kbps: 1 +154 kbps: 1 +158 kbps: 1 +160 kbps: 1 +162 kbps: 1 +168 kbps: 1 +172 kbps: 1 +176 kbps: 1 +178 kbps: 1 +184 kbps: 1 +187 kbps: 1 +190 kbps: 1 +196 kbps: 1 +197 kbps: 1 +198 kbps: 1 +218 kbps: 1 +223 kbps: 1 +227 kbps: 1 +230 kbps: 1 +232 kbps: 1 +244 kbps: 1 +252 kbps: 1 +253 kbps: 1 +284 kbps: 1 +290 kbps: 1 +332 kbps: 1 +345 kbps: 1 +62 kbps: 1 +639 kbps: 1 +657 kbps: 1 +66 kbps: 1 +662 kbps: 1 +664 kbps: 1 +683 kbps: 1 +69 kbps: 1 +697 kbps: 1 +70 kbps: 1 +702 kbps: 1 +716 kbps: 1 +720 kbps: 1 +722 kbps: 1 +740 kbps: 1 +747 kbps: 1 +749 kbps: 1 +751 kbps: 1 +752 kbps: 1 +753 kbps: 1 +767 kbps: 1 +770 kbps: 1 +773 kbps: 1 +776 kbps: 1 +78 kbps: 1 +781 kbps: 1 +782 kbps: 1 +783 kbps: 1 +785 kbps: 1 +791 kbps: 1 +796 kbps: 1 +797 kbps: 1 +798 kbps: 1 +80 kbps: 1 +802 kbps: 1 +809 kbps: 1 +810 kbps: 1 +814 kbps: 1 +819 kbps: 1 +820 kbps: 1 +821 kbps: 1 +822 kbps: 1 +826 kbps: 1 +829 kbps: 1 +831 kbps: 1 +833 kbps: 1 +835 kbps: 1 +85 kbps: 1 +857 kbps: 1 +858 kbps: 1 +859 kbps: 1 +860 kbps: 1 +862 kbps: 1 +864 kbps: 1 +866 kbps: 1 +87 kbps: 1 +875 kbps: 1 +876 kbps: 1 +877 kbps: 1 +88 kbps: 1 +880 kbps: 1 +883 kbps: 1 +89 kbps: 1 +891 kbps: 1 +892 kbps: 1 +893 kbps: 1 +895 kbps: 1 +897 kbps: 1 +90 kbps: 1 +900 kbps: 1 +903 kbps: 1 +905 kbps: 1 +906 kbps: 1 +909 kbps: 1 +910 kbps: 1 +912 kbps: 1 +913 kbps: 1 +917 kbps: 1 +918 kbps: 1 +921 kbps: 1 +925 kbps: 1 +930 kbps: 1 +937 kbps: 1 +941 kbps: 1 +947 kbps: 1 +955 kbps: 1 +963 kbps: 1 +966 kbps: 1 +971 kbps: 1 +981 kbps: 1 +994 kbps: 1 +""" +pattern = r"(\d+)\s+kbps:\s+(\d+)" +data = re.findall(pattern, data_str) + +df = pd.DataFrame(data, columns=["bitrate_kbps", "count"]) +df["bitrate_kbps"] = df["bitrate_kbps"].astype(int) +df["count"] = df["count"].astype(int) + +df = df.sort_values(by="bitrate_kbps") + +print(df.describe()) +print("Most common bitrate:", df.loc[df["count"].idxmax()]) diff --git a/python/youtube/findings/math.md b/python/youtube/findings/math.md @@ -0,0 +1,20 @@ +I think my bitrate is too low, need more samples, also, YouTube doesn't have every encoding right away, it can take some time. + +--- + +669.6658929891203 kbps * 520.169136 seconds + +~ 348,339.529 kilobits + +~ 43.54 MB + +--- + + +43.54 * 20,000,000 + += 870,800,000 MB/Day + += 317,842,000,000.0 MB/Year + += 317.84 PB/Year diff --git a/python/youtube/urls/read_and_check.py b/python/youtube/urls/read_and_check.py @@ -0,0 +1,67 @@ +import subprocess +import re +from pathlib import Path +import sys + +lower = int(float(sys.argv[1])) +upper = int(float(sys.argv[2])) + +size_pattern = re.compile(r"([\d.]+)(MiB|GiB)") + +done = set() + +if Path.exists(Path('urls_searched.csv')): + with open('urls_searched.csv', 'r') as urls_file: + for url in urls_file: + done.add(url) + + +itr = 1 + +inc = 0 + +with open('urls.txt') as file: + with open('urls_with_size.csv', 'a') as write_file: + with open('urls_searched.csv', 'a') as urls_file: + for url in file: + if inc < lower: + inc += 1 + continue + + inc += 1 + + if inc > upper: + print("Done with chunk") + exit() + + if url in done: + continue + try: + url = url.strip() + result = subprocess.run( + ["yt-dlp", "-F", url], + capture_output=True, + text=True, + check=True + ) + output = result.stdout + + matches = size_pattern.findall(output) + + n = 0 + for size, unit in matches: + if n == 0: + print(f"======================== {itr} ========================") + urls_file.writelines(f"{url}\n") + urls_file.flush() + n = 1 + + print(f"{url}, {size}{unit}") + + + write_file.writelines(f"{url}, {size}{unit}\n") + write_file.flush() + itr += 1 + + except subprocess.CalledProcessError as e: + print(e) diff --git a/python/youtube/urls/start.sh b/python/youtube/urls/start.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +START=0 +END=8000000 +STEP=50000 + +while [ $START -lt $END ]; do + NEXT=$((START + STEP)) + printf -v START_PADDED "%07d" $START + echo "Running: python3 read_and_check.py $START_PADDED $NEXT &" + python3 read_and_check.py "$START_PADDED" "$NEXT" & + START=$NEXT +done + +wait