clean.py (893B)
1 from crawling.spider import get_crawling_db_connection 2 import shutil 3 from crawling.spider import get_indexing_db_connection 4 5 6 if __name__ == "__main__": 7 try: 8 shutil.rmtree("/var/lib/search/crawl_cache") 9 except FileNotFoundError as e: 10 print("Crawl cache directory doesn't exist, continuing with cleanup") 11 crawling_conn = get_crawling_db_connection() 12 13 crawling_cur = crawling_conn.cursor() 14 crawling_cur.execute(""" 15 DROP TABLE queued_site; 16 """) 17 crawling_cur.close() 18 crawling_conn.commit() 19 crawling_conn.close() 20 21 print("Crawling datbase cleaned") 22 23 indexing_conn = get_indexing_db_connection() 24 indexing_cur = indexing_conn.cursor() 25 indexing_cur.execute(""" 26 DROP TABLE indexing_queue; 27 """) 28 indexing_cur.close() 29 indexing_conn.commit() 30 indexing_conn.close() 31 32 print("Indexing datbase cleaned")