information-retrieval

Exploration of information retrieval topics
git clone git://git.laack.co/information-retrieval.git
Log | Files | Refs

idf.py (740B)


      1 import sqlite3
      2 from utils import get_log_doc_freqs
      3 from utils import get_filepaths
      4 
      5 if __name__ == "__main__":
      6     filepaths = get_filepaths('sites')
      7     print(f'Found {len(filepaths)} files')
      8     log_doc_freq = get_log_doc_freqs(filepaths)
      9     print(f"There are {len(log_doc_freq)}")
     10 
     11     con = sqlite3.connect('database/manifest.db', timeout=60)
     12     con.execute('PRAGMA journal_mode=WAL')
     13     cur = con.cursor()
     14     cur.execute("CREATE TABLE IF NOT EXISTS term(name, idf)")
     15     cur.execute("DELETE FROM term")
     16 
     17     for term in log_doc_freq:
     18         cur.execute("INSERT INTO term VALUES (?, ?)", (term, log_doc_freq[term]))
     19     con.commit()
     20     print(f"All {len(log_doc_freq)} terms inserted into the db.")
     21     cur.close()
     22     con.close()