Skip to content

Commit

Permalink
Move data/ contents
Browse files Browse the repository at this point in the history
- Move InvertedIndex.py to util/
- Move Dataset class to DatasetParser.py
  • Loading branch information
louislefevre committed Sep 20, 2021
1 parent 433b171 commit 24bd25e
Show file tree
Hide file tree
Showing 7 changed files with 26 additions and 28 deletions.
25 changes: 22 additions & 3 deletions retrieval/DatasetParser.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,30 @@
import os

from retrieval.data.Dataset import Dataset
from retrieval.data.InvertedIndex import InvertedIndex
from retrieval.util.InvertedIndex import InvertedIndex
from retrieval.models.BM25 import BM25
from retrieval.models.QueryLikelihood import QueryLikelihood
from retrieval.models.VectorSpace import VectorSpace
from retrieval.util.FileManager import read_pickle, write_pickle
from retrieval.util.FileManager import read_pickle, write_pickle, read_tsv


class Dataset:
def __init__(self, file_name: str):
self._rows = read_tsv(file_name)

def id_mapping(self) -> dict[int, list[int]]:
mapping = {}
for row in self._rows:
qid, pid = int(row[0]), int(row[1])
if qid not in mapping:
mapping[qid] = []
mapping[qid] += [pid]
return mapping

def queries(self) -> dict[int, str]:
return {int(row[0]): row[2] for row in self._rows}

def passages(self) -> dict[int, str]:
return {int(row[1]): row[3] for row in self._rows}


class DatasetParser:
Expand Down
21 changes: 0 additions & 21 deletions retrieval/data/Dataset.py

This file was deleted.

2 changes: 1 addition & 1 deletion retrieval/models/BM25.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from math import log

from retrieval.data.InvertedIndex import InvertedIndex
from retrieval.util.InvertedIndex import InvertedIndex
from retrieval.models.Model import Model


Expand Down
2 changes: 1 addition & 1 deletion retrieval/models/Model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from retrieval.data.InvertedIndex import InvertedIndex
from retrieval.util.InvertedIndex import InvertedIndex
from retrieval.util.TextProcessor import clean


Expand Down
2 changes: 1 addition & 1 deletion retrieval/models/QueryLikelihood.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np

from retrieval.data.InvertedIndex import InvertedIndex
from retrieval.util.InvertedIndex import InvertedIndex
from retrieval.models.Model import Model


Expand Down
2 changes: 1 addition & 1 deletion retrieval/models/VectorSpace.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
import numpy.linalg as npl

from retrieval.data.InvertedIndex import InvertedIndex
from retrieval.util.InvertedIndex import InvertedIndex
from retrieval.models.Model import Model


Expand Down
File renamed without changes.

0 comments on commit 24bd25e

Please sign in to comment.