blog

Personal blog
git clone git://git.laack.co/blog.git
Log | Files | Refs

read_and_check.py (1848B)


      1 import subprocess
      2 import re
      3 from pathlib import Path
      4 import sys
      5 
      6 lower = int(float(sys.argv[1]))
      7 upper = int(float(sys.argv[2]))
      8 
      9 size_pattern = re.compile(r"([\d.]+)(MiB|GiB)")
     10 
     11 done = set()
     12 
     13 if Path.exists(Path('urls_searched.csv')):
     14     with open('urls_searched.csv', 'r') as urls_file:
     15         for url in urls_file:
     16             done.add(url)
     17 
     18 
     19 itr = 1
     20 
     21 inc = 0
     22 
     23 with open('urls.txt') as file:
     24     with open('urls_with_size.csv', 'a') as write_file:
     25         with open('urls_searched.csv', 'a') as urls_file:
     26             for url in file:
     27                 if inc < lower:
     28                     inc += 1
     29                     continue
     30 
     31                 inc += 1
     32 
     33                 if inc > upper:
     34                     print("Done with chunk")
     35                     exit()
     36 
     37                 if url in done:
     38                     continue
     39                 try:
     40                     url = url.strip()
     41                     result = subprocess.run(
     42                         ["yt-dlp", "-F", url],
     43                         capture_output=True,
     44                         text=True,
     45                         check=True
     46                     )
     47                     output = result.stdout
     48 
     49                     matches = size_pattern.findall(output)
     50 
     51                     n = 0
     52                     for size, unit in matches:
     53                         if n == 0:
     54                             print(f"======================== {itr} ========================")
     55                             urls_file.writelines(f"{url}\n")
     56                             urls_file.flush()
     57                             n = 1
     58 
     59                         print(f"{url}, {size}{unit}")
     60 
     61 
     62                         write_file.writelines(f"{url}, {size}{unit}\n")
     63                         write_file.flush()
     64                     itr += 1
     65 
     66                 except subprocess.CalledProcessError as e:
     67                     print(e)