information-retrieval

Exploration of information retrieval topics
git clone git://git.laack.co/information-retrieval.git
Log | Files | Refs

ModeNameSize
-rw-r--r--.gitignore10L
-rw-r--r--embeddings/ai_detection/process.py35L
-rw-r--r--embeddings/ai_detection/train.py29L
-rw-r--r--embeddings/docs/index.py44L
-rw-r--r--embeddings/docs/similarity.py39L
-rw-r--r--llm-search/CMakeLists.txt25L
-rw-r--r--llm-search/include/nlohmann/json.hpp24640L
-rw-r--r--llm-search/include/openai.hpp1119L
-rw-r--r--llm-search/src/main.cpp195L
-rw-r--r--llm-search/src/prompts.cpp55L
-rw-r--r--nli/nli-test.py82L
-rw-r--r--search-engine-postgres/Makefile3L
-rw-r--r--search-engine-postgres/TODO.md21L
-rw-r--r--search-engine-postgres/crawling/README.md13L
-rw-r--r--search-engine-postgres/crawling/__init__.py0B
-rw-r--r--search-engine-postgres/crawling/__pycache__/__init__.cpython-313.pyc161B
-rw-r--r--search-engine-postgres/crawling/__pycache__/clean_cache.cpython-313.pyc2680B
-rw-r--r--search-engine-postgres/crawling/__pycache__/constants.cpython-313.pyc427B
-rw-r--r--search-engine-postgres/crawling/__pycache__/spider.cpython-313.pyc16965B
-rw-r--r--search-engine-postgres/crawling/clean.py32L
-rw-r--r--search-engine-postgres/crawling/clean_cache.py58L
-rw-r--r--search-engine-postgres/crawling/constants.py8L
-rw-r--r--search-engine-postgres/crawling/spider.py474L
-rw-r--r--search-engine-postgres/indexing/README.md51L
-rw-r--r--search-engine-postgres/indexing/__init__.py0B
-rw-r--r--search-engine-postgres/indexing/__pycache__/__init__.cpython-313.pyc161B
-rw-r--r--search-engine-postgres/indexing/__pycache__/clean.cpython-313.pyc682B
-rw-r--r--search-engine-postgres/indexing/__pycache__/page_parsing.cpython-313.pyc13616B
-rw-r--r--search-engine-postgres/indexing/__pycache__/terms.cpython-313.pyc2783B
-rw-r--r--search-engine-postgres/indexing/__pycache__/utils.cpython-313.pyc3335B
-rw-r--r--search-engine-postgres/indexing/clean.py17L
-rw-r--r--search-engine-postgres/indexing/page_parsing.py386L
-rw-r--r--search-engine-postgres/indexing/utils.py79L
-rwxr-xr-xsearch-engine-postgres/run.sh11L
-rw-r--r--search-engine-postgres/search/__pycache__/query.cpython-313.pyc3855B
-rw-r--r--search-engine-postgres/search/query.py108L
-rw-r--r--search-engine-postgres/seeds/code.txt3L
-rw-r--r--search-engine-postgres/seeds/dictionaries.txt4L
-rw-r--r--search-engine-postgres/seeds/music.txt5L
-rw-r--r--search-engine-postgres/seeds/otr.txt8L
-rw-r--r--search-engine-postgres/seeds/piracy.txt6L
-rw-r--r--search-engine-postgres/seeds/research.txt7L
-rw-r--r--search-engine-postgres/seeds/wikis.txt6L
-rwxr-xr-xsearch-engine-postgres/setup.sh12L
-rw-r--r--smol/web/__pycache__/web_search.cpython-313.pyc5299B
-rw-r--r--smol/web/search.py72L
-rw-r--r--smol/web/web_search.py84L
-rw-r--r--sqlite-tfidf/TODO.md186L
-rw-r--r--sqlite-tfidf/collection/__pycache__/prune.cpython-313.pyc1740B
-rw-r--r--sqlite-tfidf/collection/prune.py35L
-rw-r--r--sqlite-tfidf/collection/spider.py256L
-rw-r--r--sqlite-tfidf/indexing/__init__.py0B
-rw-r--r--sqlite-tfidf/indexing/__pycache__/__init__.cpython-313.pyc161B
-rw-r--r--sqlite-tfidf/indexing/__pycache__/utils.cpython-313.pyc5219B
-rw-r--r--sqlite-tfidf/indexing/idf.py22L
-rw-r--r--sqlite-tfidf/indexing/lang-detect.py41L
-rw-r--r--sqlite-tfidf/indexing/tf.py43L
-rw-r--r--sqlite-tfidf/indexing/utils.py123L
-rw-r--r--sqlite-tfidf/metrics/cosine-similarity.py26L
-rw-r--r--sqlite-tfidf/metrics/tf-idf.py121L
-rw-r--r--sqlite-tfidf/pyproject.toml8L
-rw-r--r--sqlite-tfidf/search/query.py54L
-rw-r--r--sqlite-tfidf/seeds/code.txt7L
-rw-r--r--sqlite-tfidf/seeds/dictionaries.txt4L
-rw-r--r--sqlite-tfidf/seeds/music.txt5L
-rw-r--r--sqlite-tfidf/seeds/otr.txt8L
-rw-r--r--sqlite-tfidf/seeds/piracy.txt6L
-rw-r--r--sqlite-tfidf/seeds/research.txt7L
-rw-r--r--sqlite-tfidf/seeds/wikis.txt6L
-rw-r--r--web-research/CMakeLists.txt28L
-rw-r--r--web-research/include/nlohmann/json.hpp24640L
-rw-r--r--web-research/include/openai.hpp1119L
-rw-r--r--web-research/include/researcher.hpp23L
-rw-r--r--web-research/src/main.cpp78L
-rw-r--r--web-research/src/researcher.cpp186L
-rw-r--r--web-research/src/summary.cpp59L