refactor and reoganize test data

olgagirona · Mar 12, 2024 · 35b47db · 35b47db
1 parent d675e9a
commit 35b47db
Show file tree

Hide file tree

Showing 38 changed files with 118 additions and 147 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -17,7 +17,7 @@ repos:
     -   id: trailing-whitespace
         exclude: colrev/template/ops/status.txt|commit_report_details.txt|commit_report_header.txt|pdf_get_man_mail.txt|^tests/data|docs/source/resources/extensions_index/
     -   id: end-of-file-fixer
-        exclude: ^tests/data/
+        exclude: ^tests/2_loader/data/|^tests/data
     -   id: check-docstring-first
     -   id: check-json
     -   id: check-yaml

diff --git a/colrev/loader/bib.py b/colrev/loader/bib.py
@@ -21,7 +21,7 @@
 from colrev.constants import Fields
 from colrev.constants import FieldValues
 
-if TYPE_CHECKING:
+if TYPE_CHECKING:  # pragma: no cover
     import typing
     from typing import Optional
     from typing import Callable

diff --git a/colrev/loader/enl.py b/colrev/loader/enl.py
@@ -9,7 +9,7 @@
 
 import colrev.loader.loader
 
-if TYPE_CHECKING:
+if TYPE_CHECKING:  # pragma: no cover
     import typing
     from typing import Callable
 

diff --git a/colrev/loader/load_utils.py b/colrev/loader/load_utils.py
@@ -111,7 +111,7 @@
 import colrev.loader.ris
 import colrev.loader.table
 
-if TYPE_CHECKING:
+if TYPE_CHECKING:  # pragma: no cover
     from typing import Callable
 
 # pylint: disable=too-many-arguments
@@ -149,12 +149,14 @@ def load(  # type: ignore
     else:
         raise NotImplementedError
 
-    kw["filename"] = filename
-    kw["entrytype_setter"] = entrytype_setter
-    kw["field_mapper"] = field_mapper
-    kw["id_labeler"] = id_labeler
-    kw["unique_id_field"] = unique_id_field
-    return parser(**kw).load()
+    return parser(
+        filename=filename,
+        entrytype_setter=entrytype_setter,
+        field_mapper=field_mapper,
+        id_labeler=id_labeler,
+        unique_id_field=unique_id_field,
+        **kw,
+    ).load()
 
 
 def loads(  # type: ignore
@@ -187,11 +189,11 @@ def loads(  # type: ignore
         temp_file.write(load_string.encode("utf-8"))
         temp_file_path = Path(temp_file.name)
 
-    kw["filename"] = temp_file_path
-    kw["entrytype_setter"] = entrytype_setter
-    kw["field_mapper"] = field_mapper
-    kw["id_labeler"] = id_labeler
-    kw["unique_id_field"] = unique_id_field
-
-    # return parser(**kw).load()
-    return load(**kw)
+    return load(
+        filename=temp_file_path,
+        entrytype_setter=entrytype_setter,
+        field_mapper=field_mapper,
+        id_labeler=id_labeler,
+        unique_id_field=unique_id_field,
+        **kw,
+    )
diff --git a/colrev/loader/loader.py b/colrev/loader/loader.py
@@ -90,7 +90,7 @@ def _set_fields(self, records_dict: dict) -> None:
     def load_records_list(self) -> list:
         """The load_records_list must be implemented by the inheriting class
         (e.g., for ris/bib/...)"""
-        raise NotImplementedError
+        raise NotImplementedError  # pragma: no cover
 
     def load(self) -> dict:
         """Load table entries from the source"""

diff --git a/colrev/loader/md.py b/colrev/loader/md.py
@@ -13,7 +13,7 @@
 import colrev.review_manager
 from colrev.constants import Fields
 
-if TYPE_CHECKING:
+if TYPE_CHECKING:  # pragma: no cover
     from typing import Callable
 
 # pylint: disable=too-few-public-methods

diff --git a/colrev/loader/nbib.py b/colrev/loader/nbib.py
@@ -9,7 +9,7 @@
 
 import colrev.loader.loader
 
-if TYPE_CHECKING:
+if TYPE_CHECKING:  # pragma: no cover
     import typing
     from typing import Callable
 

diff --git a/colrev/loader/ris.py b/colrev/loader/ris.py
@@ -9,7 +9,7 @@
 
 import colrev.loader.loader
 
-if TYPE_CHECKING:
+if TYPE_CHECKING:  # pragma: no cover
     import typing
     from typing import Callable
 
@@ -122,27 +122,27 @@ def load_records_list(self, *, content: str = "") -> list:
 
         return records_list
 
-    def apply_ris_fixes(self) -> None:
-        """Fix common defects in RIS files"""
-
-        # Error to fix: for lists of keywords, each line should start with the KW tag
-
-        with open(self.filename, encoding="UTF-8") as file:
-            lines = [line.rstrip("\n") for line in file]
-            # add missing start tags in lists (like KW)
-            processing_tag = ""
-            for i, line in enumerate(lines):
-                tag_match = re.match(r"^[A-Z][A-Z0-9]+(\s+)-", line)  # |^ER\s?|^EF\s?
-                if tag_match:
-                    processing_tag = tag_match.group()
-                elif line == "":
-                    processing_tag = ""
-                    continue
-                elif processing_tag == "":
-                    continue
-                else:
-                    lines[i] = f"{processing_tag} {line}"
-
-        with open(self.filename, "w", encoding="utf-8") as file:
-            for line in lines:
-                file.write(f"{line}\n")
+    # def apply_ris_fixes(self) -> None:
+    #     """Fix common defects in RIS files"""
+
+    #     # Error to fix: for lists of keywords, each line should start with the KW tag
+
+    #     with open(self.filename, encoding="UTF-8") as file:
+    #         lines = [line.rstrip("\n") for line in file]
+    #         # add missing start tags in lists (like KW)
+    #         processing_tag = ""
+    #         for i, line in enumerate(lines):
+    #             tag_match = re.match(r"^[A-Z][A-Z0-9]+(\s+)-", line)  # |^ER\s?|^EF\s?
+    #             if tag_match:
+    #                 processing_tag = tag_match.group()
+    #             elif line == "":
+    #                 processing_tag = ""
+    #                 continue
+    #             elif processing_tag == "":
+    #                 continue
+    #             else:
+    #                 lines[i] = f"{processing_tag} {line}"
+
+    #     with open(self.filename, "w", encoding="utf-8") as file:
+    #         for line in lines:
+    #             file.write(f"{line}\n")
diff --git a/colrev/loader/table.py b/colrev/loader/table.py
@@ -11,7 +11,7 @@
 import colrev.exceptions as colrev_exceptions
 import colrev.loader.loader
 
-if TYPE_CHECKING:
+if TYPE_CHECKING:  # pragma: no cover
     from typing import Callable
 
 # pylint: disable=too-few-public-methods

diff --git a/colrev/writer/write_utils.py b/colrev/writer/write_utils.py
@@ -14,6 +14,7 @@
 
 
 def write_file(records_dict: dict, filename: Path, **kw) -> dict:  # type: ignore
+    """Write a file (BiBTex, RIS, or other) from a dictionary of records."""
     if filename.suffix == ".bib":
         writer = colrev.writer.bib.write_file  # type: ignore
 
@@ -24,6 +25,7 @@ def write_file(records_dict: dict, filename: Path, **kw) -> dict:  # type: ignor
 
 
 def to_string(*, records_dict: dict, implementation: str, **kw) -> str:  # type: ignore
+    """Write a string (BiBTex, RIS, or other) from a dictionary of records."""
     if implementation == "bib":
         writer = colrev.writer.bib.to_string  # type: ignore
 

diff --git a/tests/0_core/colrev_pdf_id_test.py b/tests/0_core/colrev_pdf_id_test.py
@@ -17,13 +17,13 @@
     "pdf_path, expected_result",
     [
         (
-            Path("WagnerLukyanenkoParEtAl2022.pdf"),
+            Path("data/WagnerLukyanenkoParEtAl2022.pdf"),
             "cpid2:87ffff1fffffff1ff47fff7fe0000307e000071fffffff07f1603f0ffd67fffff7ffffff"
             "e0000007e0000007e0000007fc6d59b7e3ffffffe03fffffffffffffe1ff0007e0000007"
             "e0000007e00080ffe0008007e0000007e0000007e0000007e0008007e000fdffe0008fff"
             "e000000ff00087ffffffffffffffffffffffffff",
         ),
-        (Path("zero-size-pdf.pdf"), "InvalidPDFException"),
+        (Path("data/zero-size-pdf.pdf"), "InvalidPDFException"),
     ],
 )
 def test_pdf_hash(  # type: ignore
@@ -54,7 +54,7 @@ def test_open_pdf_invalid_path(helpers, tmp_path):  # type: ignore
     """Test the open pdf with invalid path"""
     os.chdir(tmp_path)
 
-    pdf_path = Path("WagnerLukyanenkoParEtAl2022.pdf")
+    pdf_path = Path("data/WagnerLukyanenkoParEtAl2022.pdf")
     helpers.retrieve_test_file(
         source=pdf_path,
         target=pdf_path,

diff --git a/tests/0_core/record_test.py b/tests/0_core/record_test.py
@@ -1272,12 +1272,12 @@ def test_extract_text_by_page(  # type: ignore
 ) -> None:
     """Test record.extract_text_by_page()"""
     helpers.retrieve_test_file(
-        source=Path("WagnerLukyanenkoParEtAl2022.pdf"),
+        source=Path("data/WagnerLukyanenkoParEtAl2022.pdf"),
         target=Path("data/pdfs/WagnerLukyanenkoParEtAl2022.pdf"),
     )
 
     expected = (
-        helpers.test_data_path / Path("WagnerLukyanenkoParEtAl2022_content.txt")
+        helpers.test_data_path / Path("data/WagnerLukyanenkoParEtAl2022_content.txt")
     ).read_text(encoding="utf-8")
     actual = record_with_pdf.extract_text_by_page(pages=[0])
     actual = actual.rstrip()
@@ -1287,7 +1287,7 @@ def test_extract_text_by_page(  # type: ignore
 def test_set_nr_pages_in_pdf(helpers, record_with_pdf: colrev.record.Record) -> None:  # type: ignore
     """Test record.set_pages_in_pdf()"""
     helpers.retrieve_test_file(
-        source=Path("WagnerLukyanenkoParEtAl2022.pdf"),
+        source=Path("data/WagnerLukyanenkoParEtAl2022.pdf"),
         target=Path("data/pdfs/WagnerLukyanenkoParEtAl2022.pdf"),
     )
     expected = 18
@@ -1299,12 +1299,12 @@ def test_set_nr_pages_in_pdf(helpers, record_with_pdf: colrev.record.Record) ->
 def test_set_text_from_pdf(helpers, record_with_pdf: colrev.record.Record) -> None:  # type: ignore
     """Test record.set_text_from_pdf()"""
     helpers.retrieve_test_file(
-        source=Path("WagnerLukyanenkoParEtAl2022.pdf"),
+        source=Path("data/WagnerLukyanenkoParEtAl2022.pdf"),
         target=Path("data/pdfs/WagnerLukyanenkoParEtAl2022.pdf"),
     )
 
     expected = (
-        (helpers.test_data_path / Path("WagnerLukyanenkoParEtAl2022_content.txt"))
+        (helpers.test_data_path / Path("data/WagnerLukyanenkoParEtAl2022_content.txt"))
         .read_text(encoding="utf-8")
         .replace("\n", " ")
     )
@@ -1448,9 +1448,8 @@ def test_get_pdf_hash(helpers) -> None:  # type: ignore
             data={"file": Path("WagnerLukyanenkoParEtAl2022.pdf")}
         ).get_pdf_hash(page_nr=1)
 
-    pdf_path = Path("WagnerLukyanenkoParEtAl2022.pdf")
     helpers.retrieve_test_file(
-        source=pdf_path,
+        source=Path("data/WagnerLukyanenkoParEtAl2022.pdf"),
         target=pdf_path,
     )
     pdf_hash = colrev.record.PrepRecord(

diff --git a/tests/2_ops/load_utils_bib_test.py → tests/2_loader/bib_test.py b/tests/2_ops/load_utils_bib_test.py → tests/2_loader/bib_test.py
@@ -35,12 +35,12 @@ def test_load(tmp_path, helpers) -> None:  # type: ignore
         )
 
     helpers.retrieve_test_file(
-        source=Path("load_utils/") / Path("bib_tests.bib"),
-        target=Path("data/search/") / Path("bib_tests.bib"),
+        source=Path("2_loader/data/bib_data.bib"),
+        target=Path("data/search/bib_data.bib"),
     )
 
     records = colrev.loader.load_utils.load(
-        filename=Path("data/search/bib_tests.bib"),
+        filename=Path("data/search/bib_data.bib"),
     )
 
     assert records == {

diff --git a/tests/data/load_utils/bib_tests.bib → tests/2_loader/data/bib_data.bib b/tests/data/load_utils/bib_tests.bib → tests/2_loader/data/bib_data.bib
diff --git a/tests/data/load_utils/table.csv → tests/2_loader/data/csv_data.csv b/tests/data/load_utils/table.csv → tests/2_loader/data/csv_data.csv
diff --git a/tests/data/load_utils/ais.txt → tests/2_loader/data/enl_data.enl b/tests/data/load_utils/ais.txt → tests/2_loader/data/enl_data.enl
diff --git a/tests/data/load_utils/ais_expected.bib → tests/2_loader/data/enl_data_expected.bib b/tests/data/load_utils/ais_expected.bib → tests/2_loader/data/enl_data_expected.bib
diff --git a/tests/data/load_utils/references.md → tests/2_loader/data/md_data.md b/tests/data/load_utils/references.md → tests/2_loader/data/md_data.md
diff --git a/tests/data/load_utils/nbib_test.nbib → tests/2_loader/data/nbib_data.nbib b/tests/data/load_utils/nbib_test.nbib → tests/2_loader/data/nbib_data.nbib
@@ -5,6 +5,7 @@ AU  - Smith, Tom
 AU  - Hunter, Shawn
 OT  - Keyword 1
 OT  - Keyword 2
+OT  - Keyword 3
 JT  - Journal Name
 SO  - v10 n1 p1-10 2000
 AID - http://dx.doi.org/10.1000/123456789

diff --git a/tests/data/load_utils/ris_test.ris → tests/2_loader/data/ris_data.ris b/tests/data/load_utils/ris_test.ris → tests/2_loader/data/ris_data.ris
diff --git a/tests/data/load_utils/table.xlsx → tests/2_loader/data/xlsx_data.xlsx b/tests/data/load_utils/table.xlsx → tests/2_loader/data/xlsx_data.xlsx
diff --git a/tests/2_ops/load_utils_enl_test.py → tests/2_loader/enl_test.py b/tests/2_ops/load_utils_enl_test.py → tests/2_loader/enl_test.py
@@ -100,19 +100,19 @@ def field_mapper(record_dict: dict) -> None:
             record_dict[key] = str(value)
 
     helpers.retrieve_test_file(
-        source=Path("load_utils/") / Path("ais.txt"),
-        target=Path("ais.txt"),
+        source=Path("2_loader/data/enl_data.enl"),
+        target=Path("enl_data.enl"),
     )
 
     records = colrev.loader.load_utils.load(
-        filename=Path("ais.txt"),
+        filename=Path("enl_data.enl"),
         unique_id_field="INCREMENTAL",
         entrytype_setter=entrytype_setter,
         field_mapper=field_mapper,
     )
 
     expected = (
-        helpers.test_data_path / Path("load_utils/") / Path("ais_expected.bib")
+        helpers.test_data_path / Path("2_loader/data/enl_data_expected.bib")
     ).read_text(encoding="utf-8")
 
     actual = to_string(records_dict=records, implementation="bib")

diff --git a/tests/2_ops/load_utils_formatter_test.py → tests/2_loader/formatter_test.py b/tests/2_ops/load_utils_formatter_test.py → tests/2_loader/formatter_test.py
diff --git a/tests/2_ops/load_utils_test.py → tests/2_loader/loader_test.py b/tests/2_ops/load_utils_test.py → tests/2_loader/loader_test.py
@@ -18,12 +18,12 @@ def test_load(tmp_path, helpers) -> None:  # type: ignore
 
     with pytest.raises(colrev_exceptions.ImportException):
         colrev.loader.load_utils.load(
-            filename=Path("data/search/bib_tests.bib"),
+            filename=Path("data/search/bib_data.bib"),
             logger=logging.getLogger(__name__),
         )
     helpers.retrieve_test_file(
-        source=Path("load_utils/") / Path("bib_tests.bib"),
-        target=Path("data/search/") / Path("bib_tests.xy"),
+        source=Path("2_loader/data/bib_data.bib"),
+        target=Path("data/search/bib_tests.xy"),
     )
     with pytest.raises(NotImplementedError):
         colrev.loader.load_utils.load(
@@ -32,12 +32,12 @@ def test_load(tmp_path, helpers) -> None:  # type: ignore
         )
 
     helpers.retrieve_test_file(
-        source=Path("load_utils/") / Path("bib_tests.bib"),
-        target=Path("data/search/") / Path("bib_tests.bib"),
+        source=Path("2_loader/data/bib_data.bib"),
+        target=Path("data/search/bib_data.bib"),
     )
 
     colrev.loader.load_utils.load(
-        filename=Path("data/search/bib_tests.bib"), logger=logging.getLogger(__name__)
+        filename=Path("data/search/bib_data.bib"), logger=logging.getLogger(__name__)
     )
 
     with pytest.raises(NotImplementedError):

diff --git a/tests/2_ops/load_utils_md_test.py → tests/2_loader/md_test.py b/tests/2_ops/load_utils_md_test.py → tests/2_loader/md_test.py
@@ -32,15 +32,15 @@ def test_load_md(  # type: ignore
 
     search_source = colrev.settings.SearchSource(
         endpoint="colrev.unknown_source",
-        filename=Path("data/search/references.md"),
+        filename=Path("data/search/md_data.md"),
         search_type=colrev.settings.SearchType.OTHER,
         search_parameters={},
         comment="",
     )
 
     helpers.retrieve_test_file(
-        source=Path("load_utils/") / Path("references.md"),
-        target=Path("data/search/") / Path("references.md"),
+        source=Path("2_loader/data/md_data.md"),
+        target=Path("data/search/md_data.md"),
     )
 
     records = colrev.loader.load_utils.load(

diff --git a/tests/2_ops/load_utils_nbib_test.py → tests/2_loader/nbib_test.py b/tests/2_ops/load_utils_nbib_test.py → tests/2_loader/nbib_test.py
@@ -87,22 +87,22 @@ def field_mapper(record_dict: dict) -> None:
         )
 
     helpers.retrieve_test_file(
-        source=Path("load_utils/nbib_test.nbib"),
-        target=Path("test.nbib"),
+        source=Path("2_loader/data/nbib_data.nbib"),
+        target=Path("bib_data.nbib"),
     )
 
     entries = colrev.loader.load_utils.load(
-        filename=Path("test.nbib"),
+        filename=Path("bib_data.nbib"),
         unique_id_field="INCREMENTAL",
         entrytype_setter=entrytype_setter,
         field_mapper=field_mapper,
     )
 
     assert len(entries) == 1
-    print(entries)
+
     assert entries["000001"][Fields.TITLE] == "Paper title"
     assert entries["000001"][Fields.AUTHOR] == "Smith, Tom and Hunter, Shawn"
-    assert entries["000001"][Fields.KEYWORDS] == "Keyword 1, Keyword 2"
+    assert entries["000001"][Fields.KEYWORDS] == "Keyword 1, Keyword 2, Keyword 3"
     assert entries["000001"][Fields.JOURNAL] == "Journal Name"
     assert entries["000001"][Fields.DOI] == "http://dx.doi.org/10.1000/123456789"
     assert entries["000001"]["eric_id"] == "EJ1131633"