Skip to content

Commit

Permalink
migrating gensim >= 4.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
silviatti committed Jul 26, 2021
1 parent 4939c09 commit 057418e
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 7 deletions.
4 changes: 2 additions & 2 deletions octis/evaluation_metrics/coherence_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def score(self, model_output):
if len(topic) > 0:
local_simi = []
for w1, w2 in itertools.combinations(topic[0:self.topk], 2):
if w1 in self._wv.vocab and w2 in self._wv.vocab:
if w1 in self._wv.key_to_index.keys() and w2 in self._wv.key_to_index.keys():
local_simi.append(self._wv.similarity(w1, w2))
arrays.append(np.mean(local_simi))
return np.mean(arrays)
Expand Down Expand Up @@ -167,7 +167,7 @@ def score(self, model_output):
for topic in topics:
topic_coherence = 0
for w1, w2 in itertools.combinations(topic, 2):
if w1 in self._wv.vocab and w2 in self._wv.vocab:
if w1 in self._wv.key_to_index.keys() and w2 in self._wv.key_to_index.keys():
distance = spatial.distance.cosine(self._wv.__getitem__(w1), self._wv.__getitem__(w2))
topic_coherence += distance - 1
count = count + 1
Expand Down
6 changes: 3 additions & 3 deletions octis/evaluation_metrics/similarity_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def score(self, model_output):
sim = 0
for word1 in list1[:self.topk]:
for word2 in list2[:self.topk]:
if word1 in self.wv.wv.vocab and word2 in self.wv.wv.vocab:
if word1 in self.wv.key_to_index.keys() and word2 in self.wv.key_to_index.keys():
sim = sim + self.wv.similarity(word1, word2)
word_counts = word_counts + 1
sim = sim / word_counts
Expand Down Expand Up @@ -134,11 +134,11 @@ def score(self, model_output):
centroid2 = np.zeros(self.wv.vector_size)
count1, count2 = 0, 0
for word1 in list1[:self.topk]:
if word1 in self.wv.wv.vocab:
if word1 in self.wv.key_to_index.keys():
centroid1 = centroid1 + self.wv[word1]
count1 += 1
for word2 in list2[:self.topk]:
if word2 in self.wv.wv.vocab:
if word2 in self.wv.key_to_index.keys():
centroid2 = centroid2 + self.wv[word2]
count2 += 1
centroid1 = centroid1 / count1
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
gensim==3.8.3
gensim>=4.0.0
nltk
pandas
spacy
Expand Down
8 changes: 7 additions & 1 deletion tests/test_evaluation_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from octis.evaluation_metrics.diversity_metrics import TopicDiversity, InvertedRBO, KLDivergence, LogOddsRatio, \
WordEmbeddingsInvertedRBO
from octis.evaluation_metrics.similarity_metrics import WordEmbeddingsRBOMatch, PairwiseJaccardSimilarity, RBO, \
WordEmbeddingsCentroidSimilarity
WordEmbeddingsCentroidSimilarity, WordEmbeddingsPairwiseSimilarity

from octis.evaluation_metrics.coherence_metrics import *
from octis.dataset.dataset import Dataset
Expand Down Expand Up @@ -135,6 +135,12 @@ def test_similarity_measures(dataset, model_output):
assert type(score) == np.float64 or type(score) == float
assert 0 <= score <= 1

metric = WordEmbeddingsPairwiseSimilarity(topk=10)
score = metric.score(model_output)
assert type(score) == np.float64 or type(score) == float
assert 0 <= score <= 1



def test_irbo(dataset, model_output):
metric = InvertedRBO(topk=10)
Expand Down

0 comments on commit 057418e

Please sign in to comment.