read_and_check.py (1848B)
1 import subprocess 2 import re 3 from pathlib import Path 4 import sys 5 6 lower = int(float(sys.argv[1])) 7 upper = int(float(sys.argv[2])) 8 9 size_pattern = re.compile(r"([\d.]+)(MiB|GiB)") 10 11 done = set() 12 13 if Path.exists(Path('urls_searched.csv')): 14 with open('urls_searched.csv', 'r') as urls_file: 15 for url in urls_file: 16 done.add(url) 17 18 19 itr = 1 20 21 inc = 0 22 23 with open('urls.txt') as file: 24 with open('urls_with_size.csv', 'a') as write_file: 25 with open('urls_searched.csv', 'a') as urls_file: 26 for url in file: 27 if inc < lower: 28 inc += 1 29 continue 30 31 inc += 1 32 33 if inc > upper: 34 print("Done with chunk") 35 exit() 36 37 if url in done: 38 continue 39 try: 40 url = url.strip() 41 result = subprocess.run( 42 ["yt-dlp", "-F", url], 43 capture_output=True, 44 text=True, 45 check=True 46 ) 47 output = result.stdout 48 49 matches = size_pattern.findall(output) 50 51 n = 0 52 for size, unit in matches: 53 if n == 0: 54 print(f"======================== {itr} ========================") 55 urls_file.writelines(f"{url}\n") 56 urls_file.flush() 57 n = 1 58 59 print(f"{url}, {size}{unit}") 60 61 62 write_file.writelines(f"{url}, {size}{unit}\n") 63 write_file.flush() 64 itr += 1 65 66 except subprocess.CalledProcessError as e: 67 print(e)