Skip to content

Commit

Permalink
refactor and reoganize test data
Browse files Browse the repository at this point in the history
  • Loading branch information
geritwagner committed Mar 12, 2024
1 parent d675e9a commit 35b47db
Show file tree
Hide file tree
Showing 38 changed files with 118 additions and 147 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ repos:
- id: trailing-whitespace
exclude: colrev/template/ops/status.txt|commit_report_details.txt|commit_report_header.txt|pdf_get_man_mail.txt|^tests/data|docs/source/resources/extensions_index/
- id: end-of-file-fixer
exclude: ^tests/data/
exclude: ^tests/2_loader/data/|^tests/data
- id: check-docstring-first
- id: check-json
- id: check-yaml
Expand Down
2 changes: 1 addition & 1 deletion colrev/loader/bib.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from colrev.constants import Fields
from colrev.constants import FieldValues

if TYPE_CHECKING:
if TYPE_CHECKING: # pragma: no cover
import typing
from typing import Optional
from typing import Callable
Expand Down
2 changes: 1 addition & 1 deletion colrev/loader/enl.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import colrev.loader.loader

if TYPE_CHECKING:
if TYPE_CHECKING: # pragma: no cover
import typing
from typing import Callable

Expand Down
32 changes: 17 additions & 15 deletions colrev/loader/load_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@
import colrev.loader.ris
import colrev.loader.table

if TYPE_CHECKING:
if TYPE_CHECKING: # pragma: no cover
from typing import Callable

# pylint: disable=too-many-arguments
Expand Down Expand Up @@ -149,12 +149,14 @@ def load( # type: ignore
else:
raise NotImplementedError

kw["filename"] = filename
kw["entrytype_setter"] = entrytype_setter
kw["field_mapper"] = field_mapper
kw["id_labeler"] = id_labeler
kw["unique_id_field"] = unique_id_field
return parser(**kw).load()
return parser(
filename=filename,
entrytype_setter=entrytype_setter,
field_mapper=field_mapper,
id_labeler=id_labeler,
unique_id_field=unique_id_field,
**kw,
).load()


def loads( # type: ignore
Expand Down Expand Up @@ -187,11 +189,11 @@ def loads( # type: ignore
temp_file.write(load_string.encode("utf-8"))
temp_file_path = Path(temp_file.name)

kw["filename"] = temp_file_path
kw["entrytype_setter"] = entrytype_setter
kw["field_mapper"] = field_mapper
kw["id_labeler"] = id_labeler
kw["unique_id_field"] = unique_id_field

# return parser(**kw).load()
return load(**kw)
return load(
filename=temp_file_path,
entrytype_setter=entrytype_setter,
field_mapper=field_mapper,
id_labeler=id_labeler,
unique_id_field=unique_id_field,
**kw,
)
2 changes: 1 addition & 1 deletion colrev/loader/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def _set_fields(self, records_dict: dict) -> None:
def load_records_list(self) -> list:
"""The load_records_list must be implemented by the inheriting class
(e.g., for ris/bib/...)"""
raise NotImplementedError
raise NotImplementedError # pragma: no cover

def load(self) -> dict:
"""Load table entries from the source"""
Expand Down
2 changes: 1 addition & 1 deletion colrev/loader/md.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import colrev.review_manager
from colrev.constants import Fields

if TYPE_CHECKING:
if TYPE_CHECKING: # pragma: no cover
from typing import Callable

# pylint: disable=too-few-public-methods
Expand Down
2 changes: 1 addition & 1 deletion colrev/loader/nbib.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import colrev.loader.loader

if TYPE_CHECKING:
if TYPE_CHECKING: # pragma: no cover
import typing
from typing import Callable

Expand Down
50 changes: 25 additions & 25 deletions colrev/loader/ris.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import colrev.loader.loader

if TYPE_CHECKING:
if TYPE_CHECKING: # pragma: no cover
import typing
from typing import Callable

Expand Down Expand Up @@ -122,27 +122,27 @@ def load_records_list(self, *, content: str = "") -> list:

return records_list

def apply_ris_fixes(self) -> None:
"""Fix common defects in RIS files"""

# Error to fix: for lists of keywords, each line should start with the KW tag

with open(self.filename, encoding="UTF-8") as file:
lines = [line.rstrip("\n") for line in file]
# add missing start tags in lists (like KW)
processing_tag = ""
for i, line in enumerate(lines):
tag_match = re.match(r"^[A-Z][A-Z0-9]+(\s+)-", line) # |^ER\s?|^EF\s?
if tag_match:
processing_tag = tag_match.group()
elif line == "":
processing_tag = ""
continue
elif processing_tag == "":
continue
else:
lines[i] = f"{processing_tag} {line}"

with open(self.filename, "w", encoding="utf-8") as file:
for line in lines:
file.write(f"{line}\n")
# def apply_ris_fixes(self) -> None:
# """Fix common defects in RIS files"""

# # Error to fix: for lists of keywords, each line should start with the KW tag

# with open(self.filename, encoding="UTF-8") as file:
# lines = [line.rstrip("\n") for line in file]
# # add missing start tags in lists (like KW)
# processing_tag = ""
# for i, line in enumerate(lines):
# tag_match = re.match(r"^[A-Z][A-Z0-9]+(\s+)-", line) # |^ER\s?|^EF\s?
# if tag_match:
# processing_tag = tag_match.group()
# elif line == "":
# processing_tag = ""
# continue
# elif processing_tag == "":
# continue
# else:
# lines[i] = f"{processing_tag} {line}"

# with open(self.filename, "w", encoding="utf-8") as file:
# for line in lines:
# file.write(f"{line}\n")
2 changes: 1 addition & 1 deletion colrev/loader/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import colrev.exceptions as colrev_exceptions
import colrev.loader.loader

if TYPE_CHECKING:
if TYPE_CHECKING: # pragma: no cover
from typing import Callable

# pylint: disable=too-few-public-methods
Expand Down
2 changes: 2 additions & 0 deletions colrev/writer/write_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@


def write_file(records_dict: dict, filename: Path, **kw) -> dict: # type: ignore
"""Write a file (BiBTex, RIS, or other) from a dictionary of records."""
if filename.suffix == ".bib":
writer = colrev.writer.bib.write_file # type: ignore

Expand All @@ -24,6 +25,7 @@ def write_file(records_dict: dict, filename: Path, **kw) -> dict: # type: ignor


def to_string(*, records_dict: dict, implementation: str, **kw) -> str: # type: ignore
"""Write a string (BiBTex, RIS, or other) from a dictionary of records."""
if implementation == "bib":
writer = colrev.writer.bib.to_string # type: ignore

Expand Down
6 changes: 3 additions & 3 deletions tests/0_core/colrev_pdf_id_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@
"pdf_path, expected_result",
[
(
Path("WagnerLukyanenkoParEtAl2022.pdf"),
Path("data/WagnerLukyanenkoParEtAl2022.pdf"),
"cpid2:87ffff1fffffff1ff47fff7fe0000307e000071fffffff07f1603f0ffd67fffff7ffffff"
"e0000007e0000007e0000007fc6d59b7e3ffffffe03fffffffffffffe1ff0007e0000007"
"e0000007e00080ffe0008007e0000007e0000007e0000007e0008007e000fdffe0008fff"
"e000000ff00087ffffffffffffffffffffffffff",
),
(Path("zero-size-pdf.pdf"), "InvalidPDFException"),
(Path("data/zero-size-pdf.pdf"), "InvalidPDFException"),
],
)
def test_pdf_hash( # type: ignore
Expand Down Expand Up @@ -54,7 +54,7 @@ def test_open_pdf_invalid_path(helpers, tmp_path): # type: ignore
"""Test the open pdf with invalid path"""
os.chdir(tmp_path)

pdf_path = Path("WagnerLukyanenkoParEtAl2022.pdf")
pdf_path = Path("data/WagnerLukyanenkoParEtAl2022.pdf")
helpers.retrieve_test_file(
source=pdf_path,
target=pdf_path,
Expand Down
13 changes: 6 additions & 7 deletions tests/0_core/record_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1272,12 +1272,12 @@ def test_extract_text_by_page( # type: ignore
) -> None:
"""Test record.extract_text_by_page()"""
helpers.retrieve_test_file(
source=Path("WagnerLukyanenkoParEtAl2022.pdf"),
source=Path("data/WagnerLukyanenkoParEtAl2022.pdf"),
target=Path("data/pdfs/WagnerLukyanenkoParEtAl2022.pdf"),
)

expected = (
helpers.test_data_path / Path("WagnerLukyanenkoParEtAl2022_content.txt")
helpers.test_data_path / Path("data/WagnerLukyanenkoParEtAl2022_content.txt")
).read_text(encoding="utf-8")
actual = record_with_pdf.extract_text_by_page(pages=[0])
actual = actual.rstrip()
Expand All @@ -1287,7 +1287,7 @@ def test_extract_text_by_page( # type: ignore
def test_set_nr_pages_in_pdf(helpers, record_with_pdf: colrev.record.Record) -> None: # type: ignore
"""Test record.set_pages_in_pdf()"""
helpers.retrieve_test_file(
source=Path("WagnerLukyanenkoParEtAl2022.pdf"),
source=Path("data/WagnerLukyanenkoParEtAl2022.pdf"),
target=Path("data/pdfs/WagnerLukyanenkoParEtAl2022.pdf"),
)
expected = 18
Expand All @@ -1299,12 +1299,12 @@ def test_set_nr_pages_in_pdf(helpers, record_with_pdf: colrev.record.Record) ->
def test_set_text_from_pdf(helpers, record_with_pdf: colrev.record.Record) -> None: # type: ignore
"""Test record.set_text_from_pdf()"""
helpers.retrieve_test_file(
source=Path("WagnerLukyanenkoParEtAl2022.pdf"),
source=Path("data/WagnerLukyanenkoParEtAl2022.pdf"),
target=Path("data/pdfs/WagnerLukyanenkoParEtAl2022.pdf"),
)

expected = (
(helpers.test_data_path / Path("WagnerLukyanenkoParEtAl2022_content.txt"))
(helpers.test_data_path / Path("data/WagnerLukyanenkoParEtAl2022_content.txt"))
.read_text(encoding="utf-8")
.replace("\n", " ")
)
Expand Down Expand Up @@ -1448,9 +1448,8 @@ def test_get_pdf_hash(helpers) -> None: # type: ignore
data={"file": Path("WagnerLukyanenkoParEtAl2022.pdf")}
).get_pdf_hash(page_nr=1)

pdf_path = Path("WagnerLukyanenkoParEtAl2022.pdf")
helpers.retrieve_test_file(
source=pdf_path,
source=Path("data/WagnerLukyanenkoParEtAl2022.pdf"),
target=pdf_path,
)
pdf_hash = colrev.record.PrepRecord(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ def test_load(tmp_path, helpers) -> None: # type: ignore
)

helpers.retrieve_test_file(
source=Path("load_utils/") / Path("bib_tests.bib"),
target=Path("data/search/") / Path("bib_tests.bib"),
source=Path("2_loader/data/bib_data.bib"),
target=Path("data/search/bib_data.bib"),
)

records = colrev.loader.load_utils.load(
filename=Path("data/search/bib_tests.bib"),
filename=Path("data/search/bib_data.bib"),
)

assert records == {
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ AU - Smith, Tom
AU - Hunter, Shawn
OT - Keyword 1
OT - Keyword 2
OT - Keyword 3
JT - Journal Name
SO - v10 n1 p1-10 2000
AID - http://dx.doi.org/10.1000/123456789
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -100,19 +100,19 @@ def field_mapper(record_dict: dict) -> None:
record_dict[key] = str(value)

helpers.retrieve_test_file(
source=Path("load_utils/") / Path("ais.txt"),
target=Path("ais.txt"),
source=Path("2_loader/data/enl_data.enl"),
target=Path("enl_data.enl"),
)

records = colrev.loader.load_utils.load(
filename=Path("ais.txt"),
filename=Path("enl_data.enl"),
unique_id_field="INCREMENTAL",
entrytype_setter=entrytype_setter,
field_mapper=field_mapper,
)

expected = (
helpers.test_data_path / Path("load_utils/") / Path("ais_expected.bib")
helpers.test_data_path / Path("2_loader/data/enl_data_expected.bib")
).read_text(encoding="utf-8")

actual = to_string(records_dict=records, implementation="bib")
Expand Down
File renamed without changes.
12 changes: 6 additions & 6 deletions tests/2_ops/load_utils_test.py → tests/2_loader/loader_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ def test_load(tmp_path, helpers) -> None: # type: ignore

with pytest.raises(colrev_exceptions.ImportException):
colrev.loader.load_utils.load(
filename=Path("data/search/bib_tests.bib"),
filename=Path("data/search/bib_data.bib"),
logger=logging.getLogger(__name__),
)
helpers.retrieve_test_file(
source=Path("load_utils/") / Path("bib_tests.bib"),
target=Path("data/search/") / Path("bib_tests.xy"),
source=Path("2_loader/data/bib_data.bib"),
target=Path("data/search/bib_tests.xy"),
)
with pytest.raises(NotImplementedError):
colrev.loader.load_utils.load(
Expand All @@ -32,12 +32,12 @@ def test_load(tmp_path, helpers) -> None: # type: ignore
)

helpers.retrieve_test_file(
source=Path("load_utils/") / Path("bib_tests.bib"),
target=Path("data/search/") / Path("bib_tests.bib"),
source=Path("2_loader/data/bib_data.bib"),
target=Path("data/search/bib_data.bib"),
)

colrev.loader.load_utils.load(
filename=Path("data/search/bib_tests.bib"), logger=logging.getLogger(__name__)
filename=Path("data/search/bib_data.bib"), logger=logging.getLogger(__name__)
)

with pytest.raises(NotImplementedError):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ def test_load_md( # type: ignore

search_source = colrev.settings.SearchSource(
endpoint="colrev.unknown_source",
filename=Path("data/search/references.md"),
filename=Path("data/search/md_data.md"),
search_type=colrev.settings.SearchType.OTHER,
search_parameters={},
comment="",
)

helpers.retrieve_test_file(
source=Path("load_utils/") / Path("references.md"),
target=Path("data/search/") / Path("references.md"),
source=Path("2_loader/data/md_data.md"),
target=Path("data/search/md_data.md"),
)

records = colrev.loader.load_utils.load(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,22 +87,22 @@ def field_mapper(record_dict: dict) -> None:
)

helpers.retrieve_test_file(
source=Path("load_utils/nbib_test.nbib"),
target=Path("test.nbib"),
source=Path("2_loader/data/nbib_data.nbib"),
target=Path("bib_data.nbib"),
)

entries = colrev.loader.load_utils.load(
filename=Path("test.nbib"),
filename=Path("bib_data.nbib"),
unique_id_field="INCREMENTAL",
entrytype_setter=entrytype_setter,
field_mapper=field_mapper,
)

assert len(entries) == 1
print(entries)

assert entries["000001"][Fields.TITLE] == "Paper title"
assert entries["000001"][Fields.AUTHOR] == "Smith, Tom and Hunter, Shawn"
assert entries["000001"][Fields.KEYWORDS] == "Keyword 1, Keyword 2"
assert entries["000001"][Fields.KEYWORDS] == "Keyword 1, Keyword 2, Keyword 3"
assert entries["000001"][Fields.JOURNAL] == "Journal Name"
assert entries["000001"][Fields.DOI] == "http://dx.doi.org/10.1000/123456789"
assert entries["000001"]["eric_id"] == "EJ1131633"
Expand Down
Loading

0 comments on commit 35b47db

Please sign in to comment.