Depth data loading for Polycam LiDAR data (nerfstudio-project#1253)

* Add depth data loading for polycam datasets * misc: missing arguments in process_data_utils.downscale_images * Move processing to polycam_utils * delete unused imports * process_data_utils: patch for frame%05d issue * accidentally deleted neighbor flag from ffmpeg * wrong ffmpeg command * upscale directly * redundant comment * remove unused imports * black formatting
DominikVincent · Jan 24, 2023 · 828d6bc · 828d6bc
1 parent bc65639
commit 828d6bc
Show file tree

Hide file tree

Showing 3 changed files with 210 additions and 29 deletions.
diff --git a/nerfstudio/process_data/polycam_utils.py b/nerfstudio/process_data/polycam_utils.py
@@ -17,10 +17,11 @@
 import json
 import sys
 from pathlib import Path
-from typing import List
+from typing import List, Tuple
 
 from rich.console import Console
 
+from nerfstudio.process_data import process_data_utils
 from nerfstudio.process_data.process_data_utils import CAMERA_MODELS
 from nerfstudio.utils import io
 
@@ -29,6 +30,7 @@
 
 def polycam_to_json(
     image_filenames: List[Path],
+    depth_filenames: List[Path],
     cameras_dir: Path,
     output_dir: Path,
     min_blur_score: float = 0.0,
@@ -38,6 +40,7 @@ def polycam_to_json(
 
     Args:
         image_filenames: List of paths to the original images.
+        depth_filenames: List of paths to the original depth maps.
         cameras_dir: Path to the polycam cameras directory.
         output_dir: Path to the output directory.
         min_blur_score: Minimum blur score to use an image. Images below this value will be skipped.
@@ -46,6 +49,7 @@ def polycam_to_json(
     Returns:
         Summary of the conversion.
     """
+    use_depth = len(image_filenames) == len(depth_filenames)
     data = {}
     data["camera_model"] = CAMERA_MODELS["perspective"].value
     # Needs to be a string for camera_utils.auto_orient_and_center_poses
@@ -67,6 +71,8 @@ def polycam_to_json(
         frame["w"] = frame_json["width"] - crop_border_pixels * 2
         frame["h"] = frame_json["height"] - crop_border_pixels * 2
         frame["file_path"] = f"./images/frame_{i+1:05d}{image_filename.suffix}"
+        if use_depth:
+            frame["depth_map_path"] = f"./depth/frame_{i+1:05d}{depth_filenames[i].suffix}"
         # Transform matrix to nerfstudio format. Please refer to the documentation for coordinate system conventions.
         frame["transform_matrix"] = [
             [frame_json["t_20"], frame_json["t_21"], frame_json["t_22"], frame_json["t_23"]],
@@ -90,3 +96,127 @@ def polycam_to_json(
         sys.exit(1)
 
     return summary
+
+
+def process_images(
+    polycam_image_dir: Path,
+    image_dir: Path,
+    crop_border_pixels: int = 15,
+    max_dataset_size: int = 600,
+    num_downscales: int = 3,
+    verbose: bool = True,
+) -> Tuple[List[str], List[Path]]:
+    """
+    Process RGB images only
+
+    Args:
+        polycam_image_dir: Path to the directory containing RGB Images
+        image_dir: Output directory for processed images
+        crop_border_pixels: Number of pixels to crop from each border of the image. Useful as borders may be
+                            black due to undistortion.
+        max_dataset_size: Max number of images to train on. If the dataset has more, images will be sampled
+                            approximately evenly. If -1, use all images.
+        num_downscales: Number of times to downscale the images. Downscales by 2 each time. For example a value of 3
+                        will downscale the images by 2x, 4x, and 8x.
+        verbose: If True, print extra logging.
+    Returns:
+        summary_log: Summary of the processing.
+        polycam_image_filenames: List of processed images paths
+    """
+    summary_log = []
+    polycam_image_filenames, num_orig_images = process_data_utils.get_image_filenames(
+        polycam_image_dir, max_dataset_size
+    )
+
+    # Copy images to output directory
+    copied_image_paths = process_data_utils.copy_images_list(
+        polycam_image_filenames,
+        image_dir=image_dir,
+        crop_border_pixels=crop_border_pixels,
+        verbose=verbose,
+    )
+    num_frames = len(copied_image_paths)
+
+    copied_image_paths = [Path("images/" + copied_image_path.name) for copied_image_path in copied_image_paths]
+
+    if max_dataset_size > 0 and num_frames != num_orig_images:
+        summary_log.append(f"Started with {num_frames} images out of {num_orig_images} total")
+        summary_log.append(
+            "To change the size of the dataset add the argument --max_dataset_size to larger than the "
+            f"current value ({max_dataset_size}), or -1 to use all images."
+        )
+    else:
+        summary_log.append(f"Started with {num_frames} images")
+
+    # Downscale images
+    summary_log.append(process_data_utils.downscale_images(image_dir, num_downscales, verbose=verbose))
+
+    # Save json
+    if num_frames == 0:
+        CONSOLE.print("[bold red]No images found, exiting")
+        sys.exit(1)
+
+    return summary_log, polycam_image_filenames
+
+
+def process_depth_maps(
+    polycam_depth_dir: Path,
+    depth_dir: Path,
+    num_processed_images: int,
+    crop_border_pixels: int = 15,
+    max_dataset_size: int = 600,
+    num_downscales: int = 3,
+    verbose: bool = True,
+) -> Tuple[List[str], List[Path]]:
+    """
+    Process Depth maps from polycam only
+
+    Args:
+        polycam_depth_dir: Path to the directory containing depth maps
+        depth_dir: Output directory for processed depth maps
+        num_processed_images: Number of RGB processed that must match the number of depth maps
+        crop_border_pixels: Number of pixels to crop from each border of the image. Useful as borders may be
+                            black due to undistortion.
+        max_dataset_size: Max number of images to train on. If the dataset has more, images will be sampled
+                         approximately evenly. If -1, use all images.
+        num_downscales: Number of times to downscale the images. Downscales by 2 each time. For example a value of 3
+                        will downscale the images by 2x, 4x, and 8x.
+        verbose: If True, print extra logging.
+    Returns:
+        summary_log: Summary of the processing.
+        polycam_depth_maps_filenames: List of processed depth maps paths
+    """
+    summary_log = []
+    polycam_depth_maps_filenames, num_orig_depth_maps = process_data_utils.get_image_filenames(
+        polycam_depth_dir, max_dataset_size
+    )
+
+    # Copy depth images to output directory
+    copied_depth_maps_paths = process_data_utils.copy_and_upscale_polycam_depth_maps_list(
+        polycam_depth_maps_filenames, depth_dir=depth_dir, crop_border_pixels=crop_border_pixels, verbose=verbose
+    )
+
+    num_processed_depth_maps = len(copied_depth_maps_paths)
+
+    # assert same number of images as depth maps
+    if num_processed_images != num_processed_depth_maps:
+        raise ValueError(
+            f"Expected same amount of depth maps as images. "
+            f"Instead got {num_processed_images} images and {num_processed_depth_maps} depth maps"
+        )
+
+    if crop_border_pixels > 0 and num_processed_depth_maps != num_orig_depth_maps:
+        summary_log.append(f"Started with {num_processed_depth_maps} images out of {num_orig_depth_maps} total")
+        summary_log.append(
+            "To change the size of the dataset add the argument --max_dataset_size to larger than the "
+            f"current value ({crop_border_pixels}), or -1 to use all images."
+        )
+    else:
+        summary_log.append(f"Started with {num_processed_depth_maps} images")
+
+    # Downscale depth maps
+    summary_log.append(
+        process_data_utils.downscale_images(depth_dir, num_downscales, folder_name="depth", verbose=verbose)
+    )
+
+    return summary_log, polycam_depth_maps_filenames
diff --git a/nerfstudio/process_data/process_data_utils.py b/nerfstudio/process_data/process_data_utils.py
@@ -30,6 +30,7 @@
 from nerfstudio.utils.scripts import run_command
 
 CONSOLE = Console(width=120)
+POLYCAM_UPSCALING_TIMES = 2
 
 
 class CameraModel(Enum):
@@ -180,6 +181,54 @@ def copy_images_list(
     return copied_image_paths
 
 
+def copy_and_upscale_polycam_depth_maps_list(
+    polycam_depth_image_filenames: List[Path],
+    depth_dir: Path,
+    crop_border_pixels: Optional[int] = None,
+    verbose: bool = False,
+) -> List[Path]:
+    """
+    Copy depth maps to working location and upscale them to match the RGB images dimensions and finally crop them
+    equally as RGB Images.
+    Args:
+        polycam_depth_image_filenames: List of Paths of images to copy to a new directory.
+        depth_dir: Path to the output directory.
+        crop_border_pixels: If not None, crops each edge by the specified number of pixels.
+        verbose: If True, print extra logging.
+    Returns:
+        A list of the copied depth maps paths.
+    """
+    depth_dir.mkdir(parents=True, exist_ok=True)
+
+    # copy and upscale them to new directory
+    with status(msg="[bold yellow] Upscaling depth maps...", spinner="growVertical", verbose=verbose):
+        upscale_factor = 2**POLYCAM_UPSCALING_TIMES
+        assert upscale_factor > 1
+        assert isinstance(upscale_factor, int)
+
+        copied_depth_map_paths = []
+        for idx, depth_map in enumerate(polycam_depth_image_filenames):
+            destination = depth_dir / f"frame_{idx + 1:05d}{depth_map.suffix}"
+            ffmpeg_cmd = [
+                f"ffmpeg -y -i {depth_map} ",
+                f"-q:v 2 -vf scale=iw*{upscale_factor}:ih*{upscale_factor}:flags=neighbor ",
+                f"{destination}",
+            ]
+            ffmpeg_cmd = " ".join(ffmpeg_cmd)
+            run_command(ffmpeg_cmd, verbose=verbose)
+            copied_depth_map_paths.append(destination)
+
+    if crop_border_pixels is not None:
+        file_type = depth_dir.glob("frame_*").__next__().suffix
+        filename = f"frame_%05d{file_type}"
+        crop = f"crop=iw-{crop_border_pixels * 2}:ih-{crop_border_pixels * 2}"
+        ffmpeg_cmd = f"ffmpeg -y -i {depth_dir / filename} -q:v 2 -vf {crop} {depth_dir / filename}"
+        run_command(ffmpeg_cmd, verbose=verbose)
+
+    CONSOLE.log("[bold green]:tada: Done upscaling depth maps.")
+    return copied_depth_map_paths
+
+
 def copy_images(data: Path, image_dir: Path, verbose) -> int:
     """Copy images from a directory to a new directory.
 
@@ -203,14 +252,15 @@ def copy_images(data: Path, image_dir: Path, verbose) -> int:
     return num_frames
 
 
-def downscale_images(image_dir: Path, num_downscales: int, verbose: bool = False) -> str:
+def downscale_images(image_dir: Path, num_downscales: int, folder_name: str = "images", verbose: bool = False) -> str:
     """Downscales the images in the directory. Uses FFMPEG.
 
     Assumes images are named frame_00001.png, frame_00002.png, etc.
 
     Args:
         image_dir: Path to the directory containing the images.
         num_downscales: Number of times to downscale the images. Downscales by 2 each time.
+        folder_name: Name of the output folder
         verbose: If True, logs the output of the command.
 
     Returns:
@@ -225,7 +275,7 @@ def downscale_images(image_dir: Path, num_downscales: int, verbose: bool = False
         for downscale_factor in downscale_factors:
             assert downscale_factor > 1
             assert isinstance(downscale_factor, int)
-            downscale_dir = image_dir.parent / f"images_{downscale_factor}"
+            downscale_dir = image_dir.parent / f"{folder_name}_{downscale_factor}"
             downscale_dir.mkdir(parents=True, exist_ok=True)
             # Using %05d ffmpeg commands appears to be unreliable (skips images), so use scandir.
             files = os.scandir(image_dir)

diff --git a/scripts/process_data.py b/scripts/process_data.py
@@ -526,7 +526,8 @@ class ProcessPolycam:
     """Minimum blur score to use an image. If the blur score is below this value, the image will be skipped."""
     crop_border_pixels: int = 15
     """Number of pixels to crop from each border of the image. Useful as borders may be black due to undistortion."""
-
+    use_depth: bool = False
+    """If True, processes the generated depth maps from Polycam"""
     verbose: bool = False
     """If True, print extra logging."""
 
@@ -551,47 +552,47 @@ def main(self) -> None:
         else:
             polycam_image_dir = self.data / "keyframes" / "images"
             polycam_cameras_dir = self.data / "keyframes" / "cameras"
-            self.crop_border_pixels = 0
             if not self.use_uncorrected_images:
                 CONSOLE.print("[bold yellow]Corrected images not found, using raw images.")
 
         if not polycam_image_dir.exists():
             raise ValueError(f"Image directory {polycam_image_dir} doesn't exist")
 
-        # Copy images to output directory
-        polycam_image_filenames, num_orig_images = process_data_utils.get_image_filenames(
-            polycam_image_dir, self.max_dataset_size
-        )
+        if not (self.data / "keyframes" / "depth").exists():
+            depth_dir = self.data / "keyframes" / "depth"
+            raise ValueError(f"Depth map directory {depth_dir} doesn't exist")
 
-        copied_image_paths = process_data_utils.copy_images_list(
-            polycam_image_filenames,
-            image_dir=image_dir,
+        (image_processing_log, polycam_image_filenames) = polycam_utils.process_images(
+            polycam_image_dir,
+            image_dir,
             crop_border_pixels=self.crop_border_pixels,
+            max_dataset_size=self.max_dataset_size,
+            num_downscales=self.num_downscales,
             verbose=self.verbose,
         )
-        num_frames = len(copied_image_paths)
 
-        copied_image_paths = [Path("images/" + copied_image_path.name) for copied_image_path in copied_image_paths]
-
-        if self.max_dataset_size > 0 and num_frames != num_orig_images:
-            summary_log.append(f"Started with {num_frames} images out of {num_orig_images} total")
-            summary_log.append(
-                "To change the size of the dataset add the argument [yellow]--max_dataset_size[/yellow] to "
-                f"larger than the current value ({self.max_dataset_size}), or -1 to use all images."
+        summary_log.extend(image_processing_log)
+
+        polycam_depth_filenames = []
+        if self.use_depth:
+            polycam_depth_image_dir = self.data / "keyframes" / "depth"
+            depth_dir = self.output_dir / "depth"
+            depth_dir.mkdir(parents=True, exist_ok=True)
+            (depth_processing_log, polycam_depth_filenames) = polycam_utils.process_depth_maps(
+                polycam_depth_image_dir,
+                depth_dir,
+                num_processed_images=len(polycam_image_filenames),
+                crop_border_pixels=self.crop_border_pixels,
+                max_dataset_size=self.max_dataset_size,
+                num_downscales=self.num_downscales,
+                verbose=self.verbose,
             )
-        else:
-            summary_log.append(f"Started with {num_frames} images")
-
-        # Downscale images
-        summary_log.append(process_data_utils.downscale_images(image_dir, self.num_downscales, verbose=self.verbose))
+            summary_log.extend(depth_processing_log)
 
-        # Save json
-        if num_frames == 0:
-            CONSOLE.print("[bold red]No images found, exiting")
-            sys.exit(1)
         summary_log.extend(
             polycam_utils.polycam_to_json(
                 image_filenames=polycam_image_filenames,
+                depth_filenames=polycam_depth_filenames,
                 cameras_dir=polycam_cameras_dir,
                 output_dir=self.output_dir,
                 min_blur_score=self.min_blur_score,