Skip to content

Commit

Permalink
Bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
louislefevre committed Mar 27, 2021
1 parent d699c0c commit 3e05189
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 3 deletions.
5 changes: 4 additions & 1 deletion retrieval/models/QueryLikelihood.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ def _score_passage(self, pid: int, query_words: list[str]) -> float:
probabilities = []
for word in query_words:
probabilities.append(self._probability(pid, word))
return math.log(np.prod(probabilities))
try:
return math.log(np.prod(probabilities))
except ValueError:
return 0.0

def _probability(self, pid: int, word: str) -> float:
tf = self._index[word].get_posting(pid).freq if word in self._collection[pid] else 0
Expand Down
2 changes: 2 additions & 0 deletions retrieval/models/VectorSpace.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ def _similarity(self, pid: int, query_words: list[str]) -> float:
counter = Counter(query_words)
query_length = sum(counter.values())
for word in query_words:
if word not in self._index:
continue
tfidf = tf_idf(counter[word], query_length, self._index[word].doc_freq,
self._collection_length)
if word in vocab:
Expand Down
6 changes: 4 additions & 2 deletions start.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@ def main():
data = ''
for qid, passages in results.items():
for rank, (pid, score) in enumerate(passages.items()):
data += f"{qid}\t{'A1'}\t{pid}\t{rank}\t{format(score, '.2f')}\t{model}{smoothing}\n"
write_txt(f'results/{model}.txt', data)
if rank >= 100:
break
data += f"{qid}\t{'A1'}\t{pid}\t{rank+1}\t{format(score, '.2f')}\t{model}{smoothing}\n"
write_txt(f'results/{model}{smoothing}.txt', data)


if __name__ == '__main__':
Expand Down

0 comments on commit 3e05189

Please sign in to comment.