information-retrieval

Exploration of information retrieval topics
git clone git://git.laack.co/information-retrieval.git
Log | Files | Refs

similarity.py (875B)


      1 import os
      2 from pathlib import Path
      3 from sentence_transformers import SentenceTransformer
      4 import json
      5 
      6 EMBEDDING_PATH = 'embeddings.json'
      7 NAME = 'Qwen/Qwen3-Embedding-0.6B'
      8 PATH = Path(str(os.path.expanduser('~')) + '/models/' + NAME)
      9 
     10 doc_dict = {}
     11 
     12 with open(EMBEDDING_PATH, 'r') as f:
     13     doc_dict = json.load(f)
     14 
     15 model = SentenceTransformer(str(PATH))
     16 print('loaded model')
     17 
     18 query = input('Enter your query: ')
     19 
     20 embedded_query = model.encode([query]).tolist()[0]
     21 
     22 
     23 similarities = {}
     24 
     25 current_idx = 0
     26 for pth in doc_dict:
     27     document_embedding = doc_dict[pth]
     28     similarity = model.similarity(document_embedding, embedded_query)
     29     similarities[pth] = similarity
     30 
     31 
     32 k = 10
     33 count = 0
     34 
     35 for path in sorted(similarities, key=similarities.get, reverse=True):
     36     if count == k:
     37         break
     38     print(f"Location: {path}\t\t\tScore {similarities[path][0][0]}")
     39     count += 1