feat(non_annotated_t4_tlr_to_deepen): add conversion tool (#142)

* feat(non_annotated_t4_tlr_to_deepen): add conversion tool Signed-off-by: kminoda <[email protected]> * update version Signed-off-by: kminoda <[email protected]> * remove camera_channels Signed-off-by: kminoda <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update tests Signed-off-by: kminoda <[email protected]> * update test Signed-off-by: kminoda <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update Signed-off-by: kminoda <[email protected]> * dummy fix Signed-off-by: kminoda <[email protected]> * dummy commit Signed-off-by: kminoda <[email protected]> * update pyproject.toml Signed-off-by: kminoda <[email protected]> --------- Signed-off-by: kminoda <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
tier4 · Aug 26, 2024 · 0cc8566 · 0cc8566
1 parent 9429830
commit 0cc8566
Show file tree

Hide file tree

Showing 7 changed files with 160 additions and 5 deletions.
diff --git a/config/convert_non_annotated_t4_tlr_to_deepen_sample.yaml b/config/convert_non_annotated_t4_tlr_to_deepen_sample.yaml
@@ -0,0 +1,4 @@
+task: convert_non_annotated_t4_tlr_to_deepen
+conversion:
+  input_base: ./data/non_annotated_t4_format
+  output_base: ./data/deepen_format
diff --git a/docs/tools_overview.md b/docs/tools_overview.md
@@ -64,6 +64,13 @@ python -m perception_dataset.convert --config config/convert_rosbag2_to_non_anno
 # if you want to overwrite t4-format data, use --overwrite option
 ```
 
+For traffic light dataset, you can use the following command:
+
+```bash
+python -m perception_dataset.convert --config config/convert_rosbag2_to_non_annotated_t4_tlr_sample.yaml
+# if you want to overwrite t4-format data, use --overwrite option
+```
+
 ### confirm non-annotated format data
 
 Verify that the following directories have the same number of files:
@@ -87,6 +94,12 @@ output: deepen-format data
 python -m perception_dataset.convert --config config/convert_non_annotated_t4_to_deepen_sample.yaml
 ```
 
+If you are converting T4 TLR dataset, please use the following command instead.
+
+```bash
+python -m perception_dataset.convert --config config/convert_non_annotated_t4_tlr_to_deepen_sample.yaml
+```
+
 ### Create and update dataset
 
 Login to deepen, create a dataset, and upload the file `deepen_format/${DATSET_NAME}.zip`

diff --git a/perception_dataset/convert.py b/perception_dataset/convert.py
@@ -81,6 +81,26 @@ def main():
             workers_number=workers_number,
         )
 
+        logger.info(
+            f"[BEGIN] Converting T4 dataset ({input_base}) to deepen format dataset ({output_base})"
+        )
+        converter.convert()
+        logger.info(
+            f"[Done] Converting T4 dataset ({input_base}) to deepen format dataset ({output_base})"
+        )
+    elif task == "convert_non_annotated_t4_tlr_to_deepen":
+        from perception_dataset.deepen.non_annotated_t4_tlr_to_deepen_converter import (
+            NonAnnotatedT4TlrToDeepenConverter,
+        )
+
+        input_base = config_dict["conversion"]["input_base"]
+        output_base = config_dict["conversion"]["output_base"]
+
+        converter = NonAnnotatedT4TlrToDeepenConverter(
+            input_base=input_base,
+            output_base=output_base,
+        )
+
         logger.info(
             f"[BEGIN] Converting T4 dataset ({input_base}) to deepen format dataset ({output_base})"
         )

diff --git a/perception_dataset/deepen/non_annotated_t4_tlr_to_deepen_converter.py b/perception_dataset/deepen/non_annotated_t4_tlr_to_deepen_converter.py
@@ -0,0 +1,55 @@
+import glob
+import os
+import os.path as osp
+from pathlib import Path
+import shutil
+import time
+
+from nuscenes.nuscenes import NuScenes
+
+from perception_dataset.abstract_converter import AbstractConverter
+from perception_dataset.utils.logger import configure_logger
+
+logger = configure_logger(modname=__name__)
+
+
+class NonAnnotatedT4TlrToDeepenConverter(AbstractConverter):
+    def __init__(
+        self,
+        input_base: str,
+        output_base: str,
+    ):
+        super().__init__(input_base, output_base)
+
+    def convert(self):
+        start_time = time.time()
+
+        for scene_dir in glob.glob(osp.join(self._input_base, "*")):
+            if not osp.isdir(scene_dir):
+                continue
+
+            out_dir = osp.join(self._output_base, osp.basename(scene_dir).replace(".", "-"))
+            self._convert_one_scene(
+                scene_dir,
+                out_dir,
+            )
+            shutil.make_archive(f"{out_dir}", "zip", root_dir=out_dir)
+
+        elapsed_time = time.time() - start_time
+        logger.info(f"Elapsed time: {elapsed_time:.1f} [sec]")
+
+    def _convert_one_scene(self, input_dir: str, output_dir: str):
+        os.makedirs(output_dir, exist_ok=True)
+        nusc = NuScenes(version="annotation", dataroot=input_dir, verbose=False)
+
+        logger.info(f"Converting {input_dir} to {output_dir}")
+        for sample in nusc.sample:
+            for sample_data_token in sample["data"].values():
+                # Note: This conversion tool will convert all camera data included in the t4dataset
+                sample_data = nusc.get("sample_data", sample_data_token)
+                original_filename = sample_data["filename"]
+                input_path: Path = Path(input_dir) / original_filename
+                output_path: Path = Path(output_dir) / original_filename.replace("/", "_")
+                shutil.copy(input_path, output_path)
+
+        logger.info(f"Done Conversion: {input_dir} to {output_dir}")
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "perception-dataset"
-version = "1.0.13"
+version = "1.0.14"
 description = "TIER IV Perception dataset has modules to convert dataset from rosbag to t4_dataset"
 authors = [
     "Yusuke Muramatsu <[email protected]>",

diff --git a/tests/config/convert_non_annotated_t4_tlr_to_deepen.yaml b/tests/config/convert_non_annotated_t4_tlr_to_deepen.yaml
@@ -0,0 +1,4 @@
+task: convert_non_annotated_t4_to_deepen
+conversion:
+  input_base: ./data/non_annotated_t4_format
+  output_base: ./data/deepen_format
diff --git a/tests/test_tlr_dataset_conversion.py b/tests/test_tlr_dataset_conversion.py
@@ -8,17 +8,22 @@
 
 from perception_dataset.constants import SENSOR_ENUM
 from perception_dataset.deepen.deepen_to_t4_converter import DeepenToT4Converter
+from perception_dataset.deepen.non_annotated_t4_tlr_to_deepen_converter import (
+    NonAnnotatedT4TlrToDeepenConverter,
+)
 from perception_dataset.rosbag2.converter_params import Rosbag2ConverterParams
 from perception_dataset.rosbag2.rosbag2_to_non_annotated_t4_converter import (
     Rosbag2ToNonAnnotatedT4Converter,
 )
 from perception_dataset.utils.rosbag2 import get_topic_count
 from tests.constants import TEST_CONFIG_ROOT_DIR, TEST_ROOT_DIR
 
+# Downloaded rosbag name
+TEST_ROSBAG_NAME = "traffic_light_sample_tf"
+
 
 @pytest.fixture(scope="module")
-def t4_dataset_path():
-    test_rosbag_name = "traffic_light_sample_tf"
+def non_annotated_t4_dataset_path():
     # before test - convert rosbag2 to t4
     with open(TEST_CONFIG_ROOT_DIR / "convert_rosbag2_to_non_annotated_t4_tlr_test.yaml") as f:
         param_args = yaml.safe_load(f)
@@ -40,6 +45,15 @@ def t4_dataset_path():
     converter = Rosbag2ToNonAnnotatedT4Converter(converter_params)
     converter.convert()
 
+    # provide a path to converted non-annotated t4 dataset
+    yield osp.join(r2t4_output_base, TEST_ROSBAG_NAME, "non_annotated_t4_format")
+
+    # after test - remove resource
+    shutil.rmtree(r2t4_output_base, ignore_errors=True)
+
+
+@pytest.fixture(scope="module")
+def t4_dataset_path(non_annotated_t4_dataset_path):
     # before test - convert deepen to t4
     with open(TEST_CONFIG_ROOT_DIR / "convert_deepen_to_t4_tlr_test.yaml") as f:
         config_dict = yaml.safe_load(f)
@@ -69,13 +83,34 @@ def t4_dataset_path():
     converter.convert()
 
     # provide a path to converted t4_dataset
-    yield osp.join(d2t4_output_base, test_rosbag_name, "t4_dataset")
+    yield osp.join(d2t4_output_base, TEST_ROSBAG_NAME, "t4_dataset")
 
     # after test - remove resource
-    shutil.rmtree(r2t4_output_base, ignore_errors=True)
     shutil.rmtree(d2t4_output_base, ignore_errors=True)
 
 
+@pytest.fixture(scope="module")
+def deepen_dataset_path(non_annotated_t4_dataset_path):
+    # before test - convert deepen to t4
+    with open(TEST_CONFIG_ROOT_DIR / "convert_non_annotated_t4_tlr_to_deepen.yaml") as f:
+        config_dict = yaml.safe_load(f)
+
+    t4_to_deepen_input_base = osp.join(TEST_ROOT_DIR, config_dict["conversion"]["input_base"])
+    t4_to_deepen_output_base = osp.join(TEST_ROOT_DIR, config_dict["conversion"]["output_base"])
+
+    converter = NonAnnotatedT4TlrToDeepenConverter(
+        input_base=t4_to_deepen_input_base,
+        output_base=t4_to_deepen_output_base,
+    )
+    converter.convert()
+
+    # provide a path to converted t4_dataset
+    yield osp.join(t4_to_deepen_output_base, TEST_ROSBAG_NAME)
+
+    # after test - remove resource
+    shutil.rmtree(t4_to_deepen_output_base, ignore_errors=True)
+
+
 @pytest.fixture
 def attribute_list():
     with open(TEST_CONFIG_ROOT_DIR / "label/attribute.yaml") as f:
@@ -289,3 +324,27 @@ def test_directory_structure(t4_dataset_path):
     ), "velocity_status is not in input_bag"
     assert "/tf" in topic_count_dict.keys(), "tf is not in input_bag"
     assert "/tf_static" in topic_count_dict.keys(), "tf_static is not in input_bag"
+
+
+def test_deepen_dataset_image_exists(deepen_dataset_path, allowed_extensions=[".jpg", ".png"]):
+    """
+    Check if there are image files with the allowed extensions in the given directory.
+
+    Args:
+        deepen_dataset_path (str): Path to the deepen dataset directory.
+        allowed_extensions (list): List of allowed file extensions.
+        expected_count (int, optional): Expected number of image files. If None, the count is not checked.
+    """
+    # List all files in the directory
+    dir_images = os.listdir(deepen_dataset_path)
+
+    # Filter files by allowed extensions
+    image_files = [
+        file for file in dir_images if any(file.endswith(ext) for ext in allowed_extensions)
+    ]
+
+    # Log the found image files
+    print(f"Found image files: {image_files}")
+
+    # Check if any image files exist
+    assert len(image_files) > 0, "No image files found in the dataset path."