Skip to content

Commit

Permalink
Depth data loading for Polycam LiDAR data (nerfstudio-project#1253)
Browse files Browse the repository at this point in the history
* Add depth data loading for polycam datasets

* misc: missing arguments in process_data_utils.downscale_images

* Move processing to polycam_utils

* delete unused imports

* process_data_utils: patch for frame%05d issue

* accidentally deleted neighbor flag from ffmpeg

* wrong ffmpeg command

* upscale directly

* redundant comment

* remove unused imports

* black formatting
  • Loading branch information
ManuConcepBrito authored Jan 24, 2023
1 parent bc65639 commit 828d6bc
Show file tree
Hide file tree
Showing 3 changed files with 210 additions and 29 deletions.
132 changes: 131 additions & 1 deletion nerfstudio/process_data/polycam_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@
import json
import sys
from pathlib import Path
from typing import List
from typing import List, Tuple

from rich.console import Console

from nerfstudio.process_data import process_data_utils
from nerfstudio.process_data.process_data_utils import CAMERA_MODELS
from nerfstudio.utils import io

Expand All @@ -29,6 +30,7 @@

def polycam_to_json(
image_filenames: List[Path],
depth_filenames: List[Path],
cameras_dir: Path,
output_dir: Path,
min_blur_score: float = 0.0,
Expand All @@ -38,6 +40,7 @@ def polycam_to_json(
Args:
image_filenames: List of paths to the original images.
depth_filenames: List of paths to the original depth maps.
cameras_dir: Path to the polycam cameras directory.
output_dir: Path to the output directory.
min_blur_score: Minimum blur score to use an image. Images below this value will be skipped.
Expand All @@ -46,6 +49,7 @@ def polycam_to_json(
Returns:
Summary of the conversion.
"""
use_depth = len(image_filenames) == len(depth_filenames)
data = {}
data["camera_model"] = CAMERA_MODELS["perspective"].value
# Needs to be a string for camera_utils.auto_orient_and_center_poses
Expand All @@ -67,6 +71,8 @@ def polycam_to_json(
frame["w"] = frame_json["width"] - crop_border_pixels * 2
frame["h"] = frame_json["height"] - crop_border_pixels * 2
frame["file_path"] = f"./images/frame_{i+1:05d}{image_filename.suffix}"
if use_depth:
frame["depth_map_path"] = f"./depth/frame_{i+1:05d}{depth_filenames[i].suffix}"
# Transform matrix to nerfstudio format. Please refer to the documentation for coordinate system conventions.
frame["transform_matrix"] = [
[frame_json["t_20"], frame_json["t_21"], frame_json["t_22"], frame_json["t_23"]],
Expand All @@ -90,3 +96,127 @@ def polycam_to_json(
sys.exit(1)

return summary


def process_images(
polycam_image_dir: Path,
image_dir: Path,
crop_border_pixels: int = 15,
max_dataset_size: int = 600,
num_downscales: int = 3,
verbose: bool = True,
) -> Tuple[List[str], List[Path]]:
"""
Process RGB images only
Args:
polycam_image_dir: Path to the directory containing RGB Images
image_dir: Output directory for processed images
crop_border_pixels: Number of pixels to crop from each border of the image. Useful as borders may be
black due to undistortion.
max_dataset_size: Max number of images to train on. If the dataset has more, images will be sampled
approximately evenly. If -1, use all images.
num_downscales: Number of times to downscale the images. Downscales by 2 each time. For example a value of 3
will downscale the images by 2x, 4x, and 8x.
verbose: If True, print extra logging.
Returns:
summary_log: Summary of the processing.
polycam_image_filenames: List of processed images paths
"""
summary_log = []
polycam_image_filenames, num_orig_images = process_data_utils.get_image_filenames(
polycam_image_dir, max_dataset_size
)

# Copy images to output directory
copied_image_paths = process_data_utils.copy_images_list(
polycam_image_filenames,
image_dir=image_dir,
crop_border_pixels=crop_border_pixels,
verbose=verbose,
)
num_frames = len(copied_image_paths)

copied_image_paths = [Path("images/" + copied_image_path.name) for copied_image_path in copied_image_paths]

if max_dataset_size > 0 and num_frames != num_orig_images:
summary_log.append(f"Started with {num_frames} images out of {num_orig_images} total")
summary_log.append(
"To change the size of the dataset add the argument --max_dataset_size to larger than the "
f"current value ({max_dataset_size}), or -1 to use all images."
)
else:
summary_log.append(f"Started with {num_frames} images")

# Downscale images
summary_log.append(process_data_utils.downscale_images(image_dir, num_downscales, verbose=verbose))

# Save json
if num_frames == 0:
CONSOLE.print("[bold red]No images found, exiting")
sys.exit(1)

return summary_log, polycam_image_filenames


def process_depth_maps(
polycam_depth_dir: Path,
depth_dir: Path,
num_processed_images: int,
crop_border_pixels: int = 15,
max_dataset_size: int = 600,
num_downscales: int = 3,
verbose: bool = True,
) -> Tuple[List[str], List[Path]]:
"""
Process Depth maps from polycam only
Args:
polycam_depth_dir: Path to the directory containing depth maps
depth_dir: Output directory for processed depth maps
num_processed_images: Number of RGB processed that must match the number of depth maps
crop_border_pixels: Number of pixels to crop from each border of the image. Useful as borders may be
black due to undistortion.
max_dataset_size: Max number of images to train on. If the dataset has more, images will be sampled
approximately evenly. If -1, use all images.
num_downscales: Number of times to downscale the images. Downscales by 2 each time. For example a value of 3
will downscale the images by 2x, 4x, and 8x.
verbose: If True, print extra logging.
Returns:
summary_log: Summary of the processing.
polycam_depth_maps_filenames: List of processed depth maps paths
"""
summary_log = []
polycam_depth_maps_filenames, num_orig_depth_maps = process_data_utils.get_image_filenames(
polycam_depth_dir, max_dataset_size
)

# Copy depth images to output directory
copied_depth_maps_paths = process_data_utils.copy_and_upscale_polycam_depth_maps_list(
polycam_depth_maps_filenames, depth_dir=depth_dir, crop_border_pixels=crop_border_pixels, verbose=verbose
)

num_processed_depth_maps = len(copied_depth_maps_paths)

# assert same number of images as depth maps
if num_processed_images != num_processed_depth_maps:
raise ValueError(
f"Expected same amount of depth maps as images. "
f"Instead got {num_processed_images} images and {num_processed_depth_maps} depth maps"
)

if crop_border_pixels > 0 and num_processed_depth_maps != num_orig_depth_maps:
summary_log.append(f"Started with {num_processed_depth_maps} images out of {num_orig_depth_maps} total")
summary_log.append(
"To change the size of the dataset add the argument --max_dataset_size to larger than the "
f"current value ({crop_border_pixels}), or -1 to use all images."
)
else:
summary_log.append(f"Started with {num_processed_depth_maps} images")

# Downscale depth maps
summary_log.append(
process_data_utils.downscale_images(depth_dir, num_downscales, folder_name="depth", verbose=verbose)
)

return summary_log, polycam_depth_maps_filenames
54 changes: 52 additions & 2 deletions nerfstudio/process_data/process_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from nerfstudio.utils.scripts import run_command

CONSOLE = Console(width=120)
POLYCAM_UPSCALING_TIMES = 2


class CameraModel(Enum):
Expand Down Expand Up @@ -180,6 +181,54 @@ def copy_images_list(
return copied_image_paths


def copy_and_upscale_polycam_depth_maps_list(
polycam_depth_image_filenames: List[Path],
depth_dir: Path,
crop_border_pixels: Optional[int] = None,
verbose: bool = False,
) -> List[Path]:
"""
Copy depth maps to working location and upscale them to match the RGB images dimensions and finally crop them
equally as RGB Images.
Args:
polycam_depth_image_filenames: List of Paths of images to copy to a new directory.
depth_dir: Path to the output directory.
crop_border_pixels: If not None, crops each edge by the specified number of pixels.
verbose: If True, print extra logging.
Returns:
A list of the copied depth maps paths.
"""
depth_dir.mkdir(parents=True, exist_ok=True)

# copy and upscale them to new directory
with status(msg="[bold yellow] Upscaling depth maps...", spinner="growVertical", verbose=verbose):
upscale_factor = 2**POLYCAM_UPSCALING_TIMES
assert upscale_factor > 1
assert isinstance(upscale_factor, int)

copied_depth_map_paths = []
for idx, depth_map in enumerate(polycam_depth_image_filenames):
destination = depth_dir / f"frame_{idx + 1:05d}{depth_map.suffix}"
ffmpeg_cmd = [
f"ffmpeg -y -i {depth_map} ",
f"-q:v 2 -vf scale=iw*{upscale_factor}:ih*{upscale_factor}:flags=neighbor ",
f"{destination}",
]
ffmpeg_cmd = " ".join(ffmpeg_cmd)
run_command(ffmpeg_cmd, verbose=verbose)
copied_depth_map_paths.append(destination)

if crop_border_pixels is not None:
file_type = depth_dir.glob("frame_*").__next__().suffix
filename = f"frame_%05d{file_type}"
crop = f"crop=iw-{crop_border_pixels * 2}:ih-{crop_border_pixels * 2}"
ffmpeg_cmd = f"ffmpeg -y -i {depth_dir / filename} -q:v 2 -vf {crop} {depth_dir / filename}"
run_command(ffmpeg_cmd, verbose=verbose)

CONSOLE.log("[bold green]:tada: Done upscaling depth maps.")
return copied_depth_map_paths


def copy_images(data: Path, image_dir: Path, verbose) -> int:
"""Copy images from a directory to a new directory.
Expand All @@ -203,14 +252,15 @@ def copy_images(data: Path, image_dir: Path, verbose) -> int:
return num_frames


def downscale_images(image_dir: Path, num_downscales: int, verbose: bool = False) -> str:
def downscale_images(image_dir: Path, num_downscales: int, folder_name: str = "images", verbose: bool = False) -> str:
"""Downscales the images in the directory. Uses FFMPEG.
Assumes images are named frame_00001.png, frame_00002.png, etc.
Args:
image_dir: Path to the directory containing the images.
num_downscales: Number of times to downscale the images. Downscales by 2 each time.
folder_name: Name of the output folder
verbose: If True, logs the output of the command.
Returns:
Expand All @@ -225,7 +275,7 @@ def downscale_images(image_dir: Path, num_downscales: int, verbose: bool = False
for downscale_factor in downscale_factors:
assert downscale_factor > 1
assert isinstance(downscale_factor, int)
downscale_dir = image_dir.parent / f"images_{downscale_factor}"
downscale_dir = image_dir.parent / f"{folder_name}_{downscale_factor}"
downscale_dir.mkdir(parents=True, exist_ok=True)
# Using %05d ffmpeg commands appears to be unreliable (skips images), so use scandir.
files = os.scandir(image_dir)
Expand Down
53 changes: 27 additions & 26 deletions scripts/process_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,8 @@ class ProcessPolycam:
"""Minimum blur score to use an image. If the blur score is below this value, the image will be skipped."""
crop_border_pixels: int = 15
"""Number of pixels to crop from each border of the image. Useful as borders may be black due to undistortion."""

use_depth: bool = False
"""If True, processes the generated depth maps from Polycam"""
verbose: bool = False
"""If True, print extra logging."""

Expand All @@ -551,47 +552,47 @@ def main(self) -> None:
else:
polycam_image_dir = self.data / "keyframes" / "images"
polycam_cameras_dir = self.data / "keyframes" / "cameras"
self.crop_border_pixels = 0
if not self.use_uncorrected_images:
CONSOLE.print("[bold yellow]Corrected images not found, using raw images.")

if not polycam_image_dir.exists():
raise ValueError(f"Image directory {polycam_image_dir} doesn't exist")

# Copy images to output directory
polycam_image_filenames, num_orig_images = process_data_utils.get_image_filenames(
polycam_image_dir, self.max_dataset_size
)
if not (self.data / "keyframes" / "depth").exists():
depth_dir = self.data / "keyframes" / "depth"
raise ValueError(f"Depth map directory {depth_dir} doesn't exist")

copied_image_paths = process_data_utils.copy_images_list(
polycam_image_filenames,
image_dir=image_dir,
(image_processing_log, polycam_image_filenames) = polycam_utils.process_images(
polycam_image_dir,
image_dir,
crop_border_pixels=self.crop_border_pixels,
max_dataset_size=self.max_dataset_size,
num_downscales=self.num_downscales,
verbose=self.verbose,
)
num_frames = len(copied_image_paths)

copied_image_paths = [Path("images/" + copied_image_path.name) for copied_image_path in copied_image_paths]

if self.max_dataset_size > 0 and num_frames != num_orig_images:
summary_log.append(f"Started with {num_frames} images out of {num_orig_images} total")
summary_log.append(
"To change the size of the dataset add the argument [yellow]--max_dataset_size[/yellow] to "
f"larger than the current value ({self.max_dataset_size}), or -1 to use all images."
summary_log.extend(image_processing_log)

polycam_depth_filenames = []
if self.use_depth:
polycam_depth_image_dir = self.data / "keyframes" / "depth"
depth_dir = self.output_dir / "depth"
depth_dir.mkdir(parents=True, exist_ok=True)
(depth_processing_log, polycam_depth_filenames) = polycam_utils.process_depth_maps(
polycam_depth_image_dir,
depth_dir,
num_processed_images=len(polycam_image_filenames),
crop_border_pixels=self.crop_border_pixels,
max_dataset_size=self.max_dataset_size,
num_downscales=self.num_downscales,
verbose=self.verbose,
)
else:
summary_log.append(f"Started with {num_frames} images")

# Downscale images
summary_log.append(process_data_utils.downscale_images(image_dir, self.num_downscales, verbose=self.verbose))
summary_log.extend(depth_processing_log)

# Save json
if num_frames == 0:
CONSOLE.print("[bold red]No images found, exiting")
sys.exit(1)
summary_log.extend(
polycam_utils.polycam_to_json(
image_filenames=polycam_image_filenames,
depth_filenames=polycam_depth_filenames,
cameras_dir=polycam_cameras_dir,
output_dir=self.output_dir,
min_blur_score=self.min_blur_score,
Expand Down

0 comments on commit 828d6bc

Please sign in to comment.