information-retrieval

Exploration of information retrieval topics
git clone git://git.laack.co/information-retrieval.git
Log | Files | Refs

cosine-similarity.py (465B)


      1 import math
      2 
      3 def magnitude(v):
      4     sq = 0
      5     for i in range(len(v)):
      6         sq += v[i] ** 2
      7     return math.sqrt(sq)
      8 
      9 def dp(A,B):
     10     result = 0
     11     for i in range(len(A)):
     12         result += A[i] * B[i]
     13     return result
     14 
     15 def cosine_similarity(A,B):
     16     
     17     dp_AB = dp(A,B)
     18     a_l = magnitude(A)
     19     b_l = magnitude(B)
     20     return dp_AB / (a_l * b_l)
     21 
     22 
     23 if __name__ == "__main__":
     24     A = [0, 4873, 823]
     25     B = [0, 487, 48988]
     26     print(cosine_similarity(A,B))