support hypernerf dataset

stalhabukhari · Jun 29, 2022 · 34cb257 · 34cb257
1 parent 992c625
commit 34cb257
Show file tree

Hide file tree

Showing 8 changed files with 149 additions and 68 deletions.
diff --git a/assets/update_logs.md b/assets/update_logs.md
@@ -1,6 +1,9 @@
 ## Update logs
 
+* 6.29: add support for HyperNeRF's dataset.
+    * we use a simplified pinhole camera model, may introduce bias.
 * 6.26: add support for D-NeRF.
+    * issue: to enable the `--cuda_ray` in a dynamic scene, we have to record different density grid for different time. This lead to much slower `update_extra_status` and much larger `density_grid` since there is an additional time dimension. Current work arounds: (1) only use 64 time intervals, (2) update it every 100 steps (compared to the 16 steps in static nerf), (3) stop updation after 100 times since the grid should be stable now.
 * 6.16: add support for CCNeRF.
 * 6.15: fixed a bug in raymarching, improved PSNR. Density thresh is directly applied on sigmas now (removed the empirical scaling factor).
 * 6.6: fix gridencoder to always use more accurate float32 inputs (coords), slightly improved performance (matched with tcnn).

diff --git a/dnerf/provider.py b/dnerf/provider.py
@@ -115,6 +115,8 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):
             self.mode = 'colmap' # manually split, use view-interpolation for test.
         elif os.path.exists(os.path.join(self.root_path, 'transforms_train.json')):
             self.mode = 'blender' # provided split
+        else:
+            raise NotImplementedError(f'[NeRFDataset] Cannot find transforms*.json under {self.root_path}')
 
         # load nerf-compatible format data.
         if self.mode == 'colmap':
@@ -166,8 +168,8 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):
             f0, f1 = np.random.choice(frames, 2, replace=False)
             pose0 = nerf_matrix_to_ngp(np.array(f0['transform_matrix'], dtype=np.float32), scale=self.scale, offset=self.offset) # [4, 4]
             pose1 = nerf_matrix_to_ngp(np.array(f1['transform_matrix'], dtype=np.float32), scale=self.scale, offset=self.offset) # [4, 4]
-            time0 = f0['time'] if 'time' in f0 else 0
-            time1 = f1['time'] if 'time' in f1 else 0
+            time0 = f0['time'] if 'time' in f0 else int(os.path.basename(f0['file_path'])[:-4])
+            time1 = f1['time'] if 'time' in f1 else int(os.path.basename(f1['file_path'])[:-4])
             rots = Rotation.from_matrix(np.stack([pose0[:3, :3], pose1[:3, :3]]))
             slerp = Slerp([0, 1], rots)
 
@@ -182,6 +184,13 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):
                 self.poses.append(pose)
                 time = (1 - ratio) * time0 + ratio * time1
                 self.times.append(time)
+
+            # manually find max time to normalize
+            if 'time' not in f0:
+                max_time = 0
+                for f in frames:
+                    max_time = max(max_time, int(os.path.basename(f['file_path'])[:-4]))
+                self.times = [t / max_time for t in self.times]
 
         else:
             # for colmap, manually split a valid set (the first frame).
@@ -239,7 +248,10 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):
         if self.images is not None:
             self.images = torch.from_numpy(np.stack(self.images, axis=0)) # [N, H, W, C]
         self.times = torch.from_numpy(np.asarray(self.times, dtype=np.float32)).view(-1, 1) # [N, 1]
-        self.times = self.times / self.times.max() # normalize to [0, 1]
+
+        # manual normalize
+        if self.times.max() > 1:
+            self.times = self.times / (self.times.max() + 1e-8) # normalize to [0, 1]
 
         # calculate mean radius of all camera poses
         self.radius = self.poses[:, :3, 3].norm(dim=-1).mean(0).item()

diff --git a/dnerf/renderer.py b/dnerf/renderer.py
@@ -282,7 +282,7 @@ def run_cuda(self, rays_o, rays_d, time, dt_gamma=0, bg_color=None, perturb=Fals
             bg_color = 1
 
         # determine the correct frame of density grid to use
-        t = torch.floor(time[0][0] * self.time_size).clamp(max=self.time_size - 1).long()
+        t = torch.floor(time[0][0] * self.time_size).clamp(min=0, max=self.time_size - 1).long()
 
         results = {}
 

diff --git a/nerf/provider.py b/nerf/provider.py
@@ -116,6 +116,8 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):
             self.mode = 'colmap' # manually split, use view-interpolation for test.
         elif os.path.exists(os.path.join(self.root_path, 'transforms_train.json')):
             self.mode = 'blender' # provided split
+        else:
+            raise NotImplementedError(f'[NeRFDataset] Cannot find transforms*.json under {self.root_path}')
 
         # load nerf-compatible format data.
         if self.mode == 'colmap':

diff --git a/readme.md b/readme.md
@@ -125,6 +125,7 @@ python main_nerf.py data/TanksAndTemple/Family --workspace trial_nerf_family -O
 # 3. call the preprocess code: (should install ffmpeg and colmap first! refer to the file for more options)
 python scripts/colmap2nerf.py --video ./data/custom/video.mp4 --run_colmap # if use video
 python scripts/colmap2nerf.py --images ./data/custom/images/ --run_colmap # if use images
+python scripts/colmap2nerf.py --video ./data/custom/video.mp4 --run_colmap --dynamic # if the scene is dynamic (for D-NeRF settings), add the time for each frame.
 # 4. it should create the transform.json, and you can train with: (you'll need to try with different scale & bound & dt_gamma to make the object correctly located in the bounding box and render fluently.)
 python main_nerf.py data/custom --workspace trial_nerf_custom -O --gui --scale 2.0 --bound 1.0 --dt_gamma 0.02
 
@@ -155,6 +156,9 @@ python main_CCNeRF.py data/nerf_synthetic/hotdog --workspace trial_cc_hotdog -O
 # almost the same as Instant-ngp NeRF, just replace the main script.
 python main_dnerf.py data/dnerf/jumpingjacks --workspace trial_dnerf_jumpingjacks -O --bound 1.0 --scale 0.8 --dt_gamma 0
 python main_dnerf.py data/dnerf/jumpingjacks --workspace trial_dnerf_jumpingjacks -O --bound 1.0 --scale 0.8 --dt_gamma 0 --gui
+# for the hypernerf dataset, first convert it into nerf-compatible format:
+python scripts/hyper2nerf.py data/split-cookie --downscale 2 # will generate transforms*.json
+python main_dnerf.py data/split-cookie/ --workspace trial_dnerf_cookies -O --bound 1 --scale 0.3 --dt_gamma 0
 ```
 
 check the `scripts` directory for more provided examples.

diff --git a/scripts/colmap2nerf.py b/scripts/colmap2nerf.py
@@ -26,6 +26,7 @@ def parse_args():
     parser.add_argument("--video", default="", help="input path to the video")
     parser.add_argument("--images", default="", help="input path to the images folder, ignored if --video is provided")
     parser.add_argument("--run_colmap", action="store_true", help="run colmap first on the image folder")
+    parser.add_argument("--dynamic", action="store_true", help="for dynamic scene, extraly save time calculated from frame index.")
 
     parser.add_argument("--video_fps", default=3)
     parser.add_argument("--time_slice", default="", help="time (in seconds) in the format t1,t2 within which the images should be generated from the video. eg: \"--time_slice '10,300'\" will generate images only from 10th second to 300th second of the video")
@@ -231,7 +232,9 @@ def closest_point_2_lines(oa, da, ob, db): # returns point closest to both rays
 
     with open(os.path.join(TEXT_FOLDER, "images.txt"), "r") as f:
         i = 0
+
         bottom = np.array([0.0, 0.0, 0.0, 1.0]).reshape([1, 4])
+
         out = {
             "camera_angle_x": angle_x,
             "camera_angle_y": angle_y,
@@ -252,26 +255,29 @@ def closest_point_2_lines(oa, da, ob, db): # returns point closest to both rays
         up = np.zeros(3)
         for line in f:
             line = line.strip()
+
             if line[0] == "#":
                 continue
+
             i = i + 1
             if i < SKIP_EARLY*2:
                 continue
-            if  i % 2 == 1:
+
+            if i % 2 == 1:
                 elems = line.split(" ") # 1-4 is quat, 5-7 is trans, 9ff is filename (9, if filename contains no spaces)
 
                 name = '_'.join(elems[9:])
                 full_name = os.path.join(args.images, name)
                 rel_name = full_name[len(root_dir) + 1:]
 
                 b = sharpness(full_name)
-                print(name, "sharpness =",b)
+                # print(name, "sharpness =",b)
 
                 image_id = int(elems[0])
                 qvec = np.array(tuple(map(float, elems[1:5])))
                 tvec = np.array(tuple(map(float, elems[5:8])))
                 R = qvec2rotmat(-qvec)
-                t = tvec.reshape([3,1])
+                t = tvec.reshape([3, 1])
                 m = np.concatenate([np.concatenate([R, t], 1), bottom], 0)
                 c2w = np.linalg.inv(m)
 
@@ -282,12 +288,19 @@ def closest_point_2_lines(oa, da, ob, db): # returns point closest to both rays
 
                 up += c2w[0:3, 1]
 
-                frame = {"file_path" : rel_name, "sharpness" : b, "transform_matrix" : c2w}
+                frame = {
+                    "file_path": rel_name, 
+                    "sharpness": b, 
+                    "transform_matrix": c2w
+                }
+
                 out["frames"].append(frame)
 
-    nframes = len(out["frames"])
+    N = len(out["frames"])
     up = up / np.linalg.norm(up)
-    print("up vector was", up)
+
+    print("[INFO] up vector was", up)
+
     R = rotmat(up, [0, 0, 1]) # rotate up vector to [0,0,1]
     R = np.pad(R, [0, 1])
     R[-1, -1] = 1
@@ -296,7 +309,7 @@ def closest_point_2_lines(oa, da, ob, db): # returns point closest to both rays
         f["transform_matrix"] = np.matmul(R, f["transform_matrix"]) # rotate up to be the z axis
 
     # find a central point they are all looking at
-    print("computing center of attention...")
+    print("[INFO] computing center of attention...")
     totw = 0.0
     totp = np.array([0.0, 0.0, 0.0])
     for f in out["frames"]:
@@ -308,21 +321,27 @@ def closest_point_2_lines(oa, da, ob, db): # returns point closest to both rays
                 totp += p * w
                 totw += w
     totp /= totw
-    print(totp) # the cameras are looking at totp
     for f in out["frames"]:
         f["transform_matrix"][0:3,3] -= totp
-
     avglen = 0.
     for f in out["frames"]:
         avglen += np.linalg.norm(f["transform_matrix"][0:3,3])
-    avglen /= nframes
-    print("avg camera distance from origin", avglen)
+    avglen /= N
+    print("[INFO] avg camera distance from origin", avglen)
     for f in out["frames"]:
         f["transform_matrix"][0:3,3] *= 4.0 / avglen # scale to "nerf sized"
 
+    # sort frames by id
+    out["frames"].sort(key=lambda d: d['file_path'])
+
+    # add time if scene is dynamic
+    if args.dynamic:
+        for i, f in enumerate(out["frames"]):
+            f['time'] = i / N
+
     for f in out["frames"]:
         f["transform_matrix"] = f["transform_matrix"].tolist()
-    print(nframes,"frames")
-    print(f"writing {OUT_PATH}")
+
+    print(f"[INFO] writing {N} frames to {OUT_PATH}")
     with open(OUT_PATH, "w") as outfile:
         json.dump(out, outfile, indent=2)