using .npy to save depth image

Jianxff · May 1, 2024 · 4f1bd9c · 4f1bd9c
1 parent 2933ccd
commit 4f1bd9c
Show file tree

Hide file tree

Showing 11 changed files with 293 additions and 283 deletions.
diff --git a/depth.py b/depth.py
@@ -17,7 +17,6 @@
     parser.add_argument("--focal", help='focal length', type=float, default=None)
     parser.add_argument("--calib", help='calibration file, overwrite focal', type=str, default=None)
     parser.add_argument("--out", help='dir for output depth', type=str, default='')
-    parser.add_argument("--depth-scale", help='depth scale factor', type=float, default=1000.0)
     parser.add_argument("--ckpt", type=str, default='./weights/metric_depth_vit_large_800k.pth', help='checkpoint file')
     parser.add_argument("--model-name", type=str, default='v2-L', choices=['v2-L', 'v2-S'], help='model type')
     args = parser.parse_args()
@@ -30,7 +29,6 @@
         checkpoint=args.ckpt,
         model_name=args.model_name
     )
-    d_scale = args.depth_scale
 
     image_dir = Path(args.images).resolve()
     images = list(image_dir.glob('*.[p|j][n|p]g'))
@@ -43,8 +41,7 @@
         depth = metric(image, args.focal)
 
         # save orignal depth
-        depth_u16 = (depth * d_scale).astype('uint16')
-        cv2.imwrite(str(out_dir / f'{image.stem}.png'), depth_u16)
+        np.save(str(out_dir / f'{image.stem}.npy'), depth)
 
         # save colormap
         depth_color = metric.gray_to_colormap(depth)

diff --git a/mesh.py b/mesh.py
@@ -1,5 +1,6 @@
 # standard library
 from typing import *
+from pathlib import Path
 # third party
 import argparse
 import numpy as np
@@ -15,7 +16,8 @@
     parser.add_argument("--viz", type=bool, default=False, help="visualize")
     parser.add_argument("--focal", type=float, default=None, help="focal length")
     parser.add_argument("--calib", type=str, default=None, help="calib file, overwrite focal")
-    parser.add_argument("--mesh", type=str, default=None, help="save mesh")
+    parser.add_argument("--mesh", type=str, default=None, help="save mesh", required=True)
+    parser.add_argument("--voxel_length", type=float, default=0.05, help="voxel length")
 
     args = parser.parse_args()
 
@@ -34,9 +36,9 @@
         traj_dir=args.traj,
         intrinsic=intr,
         distort=distort,
-        depth_scale=args.depth_scale,
         viz=args.viz,
-        mesh_save=args.mesh
+        voxel_length=args.voxel_length,
+        mesh_save=Path(args.mesh).parent / 'mesh_raw.ply'
     )
 
     RGBDFusion.simplify_mesh(

diff --git a/modules/__init__.py b/modules/__init__.py
@@ -2,7 +2,7 @@
 from . import utils
 from .data import PosedImageStream
 from .metric import Metric3D as Metric
-from .tsdf import RGBDFusion as RGBDFusion
+from . import fusion as RGBDFusion
 
 __ALL__ = [
     "Droid"

diff --git a/modules/data.py b/modules/data.py
@@ -15,20 +15,18 @@ def __init__(
         image_dir: Path,
         depth_dir: Optional[Path] = None,
         traje_dir: Optional[Path] = None,
-        depth_scale: float = 1000.0,
         stride: Optional[int] = 1,
         intrinsic: Optional[Union[float, np.ndarray]] = None,
         distort: Optional[np.ndarray] = None,
         resize: Optional[Tuple[int, int]] = None,
     ):
-        self.depth_scale = depth_scale
         self.distort = distort
         self.stride = stride
 
         self.rgb_list = \
             sorted(list(Path(image_dir).glob('*.[p|j][n|p]g')))[::stride]
         self.depth_list = None if not depth_dir \
-            else sorted(list(Path(depth_dir).glob('*.png')))[::stride]
+            else sorted(list(Path(depth_dir).glob('*.npy')))[::stride]
         self.pose_list = None if not traje_dir \
             else sorted(list(Path(traje_dir).glob('*.txt')))[::stride]
 
@@ -58,24 +56,22 @@ def __init__(
         else:
             self.resize = None
 
-    def read_image(self, path: Union[str, Path]) -> np.ndarray:
-        image = cv2.imread(str(path), cv2.IMREAD_UNCHANGED)
-        # # convert rgb
-        # if len(image.shape) == 3 and image.shape[-1] == 3:
-        #     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    def preprocess(self, frame: np.ndarray) -> np.ndarray:
         if self.distort is not None:
-            image = cv2.undistort(image, self.K_origin, self.distort)
+            frame = cv2.undistort(frame, self.K_origin, self.distort)
         if self.resize:
-            image = cv2.resize(image, self.resize)
-        return image
+            frame = cv2.resize(frame, self.resize)
+        return frame
 
     def __len__(self) -> int:
         return len(self.rgb_list)
 
     def __getitem__(self, idx) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
-        rgb = self.read_image(self.rgb_list[idx])
-        depth = None if not self.depth_list else \
-            self.read_image(self.depth_list[idx]).astype(np.float32) / self.depth_scale
+        raw_rgb = cv2.imread(str(self.rgb_list[idx]), cv2.IMREAD_UNCHANGED)
+        raw_depth = None if not self.depth_list else np.load(self.depth_list[idx])
+        # pack data
+        rgb = self.preprocess(raw_rgb)
+        depth = None if not self.depth_list else self.preprocess(raw_depth)
         pose = None if not self.pose_list else np.loadtxt(self.pose_list[idx])
 
         return rgb, depth, pose, self.intrinsic

diff --git a/modules/droid.py b/modules/droid.py
@@ -39,8 +39,8 @@ class Options:
     intrinsic: np.ndarray = None
     focal: float = None
     trajectory_path: Path = None
+    convert_pose_gl: bool = False
     poses_dir: Path = None
-    depth_scale: float = 1.0
     distort: np.ndarray = None
     global_ba_frontend: int = 0
 
@@ -55,7 +55,6 @@ def __init__(
         self,
         image_dir: Path,
         depth_dir: Optional[Path],
-        depth_scale: float = 1000.0,
         stride: Optional[int] = 1,
         intrinsic: Optional[Union[float, np.ndarray]] = None,
         distort: Optional[np.ndarray] = None,
@@ -64,7 +63,6 @@ def __init__(
         super().__init__(
             image_dir=image_dir,
             depth_dir=depth_dir,
-            depth_scale=depth_scale,
             stride=stride,
             intrinsic=intrinsic,
             distort=distort,
@@ -101,7 +99,6 @@ def run(
     dataset = RGBDStream(
         image_dir=image_dir,
         depth_dir=depth_dir,
-        depth_scale=setting.depth_scale,
         stride=setting.stride,
         intrinsic=setting.intrinsic if setting.intrinsic is not None else setting.focal,
         distort=setting.distort,
@@ -140,11 +137,13 @@ def extract_rgb_stream(stream: RGBDStream):
             yield t, im, intr
     traj_est = droid.terminate(extract_rgb_stream(dataset))
 
+    # save raw trajectory under opencv coordinate
     if setting.trajectory_path is not None:
-        np.savetxt(setting.trajectory_path, traj_est)
+        np.savetxt(str(setting.trajectory_path), traj_est)
 
+    # save pose44 matrix under opencv/opengl coordinate, ordered by frame
     if setting.poses_dir is not None:
         from .utils import trajectory_to_poses
-        trajectory_to_poses(traj_est, setting.poses_dir)
+        trajectory_to_poses(traj_est, setting.poses_dir, convert_opengl=setting.convert_pose_gl)
 
     print('finished')
diff --git a/modules/fusion.py b/modules/fusion.py
@@ -0,0 +1,193 @@
+# standard library
+from pathlib import Path
+from typing import *
+# third party
+import cv2
+import numpy as np
+import open3d as o3d
+from tqdm import tqdm
+import psutil
+# dataset
+from .data import PosedImageStream
+
+
+def fusion(
+    data_stream: Optional[PosedImageStream] = None,
+    voxel_length: Optional[float] = 0.05,
+    sdf_trunc: Optional[float] = 0.1,
+    depth_trunc: Optional[float] = 5.0,
+    colored: Optional[bool] = True
+) -> None:
+    # tsdf volume
+    color_type = o3d.pipelines.integration.TSDFVolumeColorType.RGB8 if colored \
+        else o3d.pipelines.integration.TSDFVolumeColorType.Gray32
+
+    volume = o3d.pipelines.integration.ScalableTSDFVolume(
+        voxel_length=voxel_length,
+        sdf_trunc=sdf_trunc,
+        color_type=color_type,
+    )
+    # volume = o3d.geometry.VoxelBlockGrid(
+    #     attr_names=('tsdf', 'weight', 'color'),
+    #     attr_dtypes=(o3d.core.float32, o3d.core.float32, o3d.core.float32),
+    #     attr_channels=((1), (1), (3)),
+    #     voxel_size=3.0 / 256,
+    #     block_resolution=16,
+    #     block_count=50000,
+    # )
+    # camera data
+    intr = data_stream.intrinsic
+    size = data_stream.image_size
+    intr = o3d.camera.PinholeCameraIntrinsic(
+        width=size[0],
+        height=size[1],
+        fx=intr[0],
+        fy=intr[1],
+        cx=intr[2],
+        cy=intr[3]
+    )
+    # intr_tensor = o3d.core.Tensor(
+    #     intr.intrinsic_matrix, o3d.core.Dtype.Float64
+    # )
+
+    print('[TSDF] running RGBD TSDF integrating')
+    pbar = tqdm(total=len(data_stream))
+    for _, (rgb, depth, pose, _) in enumerate(data_stream):
+        extr = np.linalg.inv(pose)
+        color = o3d.geometry.Image(cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB))
+        depth = o3d.geometry.Image(depth)
+        # integrate
+        rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(
+            color=color,
+            depth=depth,
+            depth_scale=1.0,
+            depth_trunc=depth_trunc,
+            convert_rgb_to_intensity=(color_type != o3d.pipelines.integration.TSDFVolumeColorType.RGB8)
+        )
+
+        volume.integrate(
+            image=rgbd,
+            intrinsic=intr,
+            extrinsic=extr
+        )
+
+        # frustum_block_coords = \
+        #     volume.compute_unique_block_coordinates(
+        #         depth=depth, 
+        #         intrinsic=intr, 
+        #         extrinsic=extr, 
+        #         depth_scale=1.0, 
+        #         depth_max=depth_trunc
+        #     )
+        # volume.integrate(
+        #     frustum_block_coords, 
+        #     depth, color, intr, intr,
+        #     extr, 1.0, depth_trunc
+        # )
+
+        mem = psutil.virtual_memory()
+        total = mem.total / (1024 ** 3)
+        used = mem.used / (1024 ** 3)
+
+        pbar.set_description(f"[memory] {used:.2f}/{total:.0f} GB")
+
+        pbar.update()
+
+    pbar.close()
+
+    return volume
+
+
+def extract_mesh(
+    volume:o3d.pipelines.integration.ScalableTSDFVolume, 
+    save: Optional[Union[str, Path]] = None,
+    viz: Optional[bool] = False
+) -> o3d.geometry.TriangleMesh:
+    print('[TSDF] extracting mesh')
+    mesh = volume.extract_triangle_mesh()
+    # mesh.compute_vertex_normals()
+
+    if viz:
+        o3d.visualization.draw_geometries([mesh])
+    if save is not None:
+        o3d.io.write_triangle_mesh(str(save), mesh)
+
+    return mesh
+
+
+def extract_point_cloud(
+    volume:o3d.pipelines.integration.ScalableTSDFVolume,
+    save: Optional[Union[str, Path]] = None,
+    viz: Optional[bool] = False
+) -> o3d.geometry.PointCloud:
+    print('[TSDF] extracting point cloud')
+    pcd = volume.extract_point_cloud()
+
+    if viz:
+        o3d.visualization.draw_geometries([pcd])
+    if save is not None:
+        o3d.io.write_point_cloud(str(save), pcd)
+
+    return pcd
+
+
+
+def simplify_mesh(
+    mesh: Union[str, Path, o3d.geometry.TriangleMesh],
+    voxel_size: Optional[float] = 0.05,
+    save: Optional[Union[str, Path]] = None
+) -> o3d.geometry.TriangleMesh:
+    if isinstance(mesh, (str, Path)):
+        mesh = o3d.io.read_triangle_mesh(str(mesh))
+    mesh.compute_vertex_normals()
+
+    print('[TSDF] simplifying mesh')
+    # simplify
+    mesh_smp = mesh.simplify_vertex_clustering(
+        voxel_size=voxel_size,
+        contraction=o3d.geometry.SimplificationContraction.Average
+    )
+    # smooth
+    mesh_smp = mesh_smp.filter_smooth_taubin(number_of_iterations=100)
+    mesh_smp.compute_vertex_normals()
+
+    # write to file
+    if save is not None:
+        o3d.io.write_triangle_mesh(str(save), mesh_smp)
+
+    return mesh_smp
+
+
+
+def pipeline(
+    image_dir: Union[str, Path],                # rgb image directory
+    depth_dir: Optional[Union[str, Path]],      # depth image directory
+    traj_dir: Optional[Union[str, Path]],       # trajectory files directory
+    intrinsic: Optional[Union[float, np.ndarray]] = None,   # camera intrinsic
+    distort: Optional[np.ndarray] = None,                   # camera distortion
+    mesh_save: Optional[Union[str, Path]] = None,   # save mesh to file
+    viz: Optional[bool] = False,            # visualize mesh with Open3D
+    voxel_length: Optional[float] = 0.05,   # TSDF voxel length
+    sdf_trunc: Optional[float] = 0.1,       # TSDF truncation
+    depth_trunc: Optional[float] = 5.0,     # TSDF depth truncation
+    colored: Optional[bool] = True          # colored TSDF
+) -> None:
+    stream = PosedImageStream(
+        image_dir=image_dir,
+        depth_dir=depth_dir,
+        traje_dir=traj_dir,
+        intrinsic=intrinsic,
+        distort=distort,
+    )
+
+    volume = fusion(
+        data_stream=stream,
+        voxel_length=voxel_length,
+        sdf_trunc=sdf_trunc,
+        depth_trunc=depth_trunc,
+        colored=colored
+    )
+
+    mesh = extract_mesh(volume, save=mesh_save, viz=viz)
+
+    return mesh
diff --git a/modules/metric.py b/modules/metric.py
@@ -45,7 +45,7 @@ def __call__(
         self,
         rgb_image: Union[np.ndarray, Image.Image, str, Path],
         focal: Optional[float] = None,
-    ) -> None:
+    ) -> np.ndarray:
         # read image
         if isinstance(rgb_image, (str, Path)):
             rgb_image = np.array(Image.open(rgb_image))