Skip to content

Commit

Permalink
support hypernerf dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
ashawkey committed Jun 29, 2022
1 parent 992c625 commit 34cb257
Show file tree
Hide file tree
Showing 8 changed files with 149 additions and 68 deletions.
3 changes: 3 additions & 0 deletions assets/update_logs.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
## Update logs

* 6.29: add support for HyperNeRF's dataset.
* we use a simplified pinhole camera model, may introduce bias.
* 6.26: add support for D-NeRF.
* issue: to enable the `--cuda_ray` in a dynamic scene, we have to record different density grid for different time. This lead to much slower `update_extra_status` and much larger `density_grid` since there is an additional time dimension. Current work arounds: (1) only use 64 time intervals, (2) update it every 100 steps (compared to the 16 steps in static nerf), (3) stop updation after 100 times since the grid should be stable now.
* 6.16: add support for CCNeRF.
* 6.15: fixed a bug in raymarching, improved PSNR. Density thresh is directly applied on sigmas now (removed the empirical scaling factor).
* 6.6: fix gridencoder to always use more accurate float32 inputs (coords), slightly improved performance (matched with tcnn).
Expand Down
18 changes: 15 additions & 3 deletions dnerf/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):
self.mode = 'colmap' # manually split, use view-interpolation for test.
elif os.path.exists(os.path.join(self.root_path, 'transforms_train.json')):
self.mode = 'blender' # provided split
else:
raise NotImplementedError(f'[NeRFDataset] Cannot find transforms*.json under {self.root_path}')

# load nerf-compatible format data.
if self.mode == 'colmap':
Expand Down Expand Up @@ -166,8 +168,8 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):
f0, f1 = np.random.choice(frames, 2, replace=False)
pose0 = nerf_matrix_to_ngp(np.array(f0['transform_matrix'], dtype=np.float32), scale=self.scale, offset=self.offset) # [4, 4]
pose1 = nerf_matrix_to_ngp(np.array(f1['transform_matrix'], dtype=np.float32), scale=self.scale, offset=self.offset) # [4, 4]
time0 = f0['time'] if 'time' in f0 else 0
time1 = f1['time'] if 'time' in f1 else 0
time0 = f0['time'] if 'time' in f0 else int(os.path.basename(f0['file_path'])[:-4])
time1 = f1['time'] if 'time' in f1 else int(os.path.basename(f1['file_path'])[:-4])
rots = Rotation.from_matrix(np.stack([pose0[:3, :3], pose1[:3, :3]]))
slerp = Slerp([0, 1], rots)

Expand All @@ -182,6 +184,13 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):
self.poses.append(pose)
time = (1 - ratio) * time0 + ratio * time1
self.times.append(time)

# manually find max time to normalize
if 'time' not in f0:
max_time = 0
for f in frames:
max_time = max(max_time, int(os.path.basename(f['file_path'])[:-4]))
self.times = [t / max_time for t in self.times]

else:
# for colmap, manually split a valid set (the first frame).
Expand Down Expand Up @@ -239,7 +248,10 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):
if self.images is not None:
self.images = torch.from_numpy(np.stack(self.images, axis=0)) # [N, H, W, C]
self.times = torch.from_numpy(np.asarray(self.times, dtype=np.float32)).view(-1, 1) # [N, 1]
self.times = self.times / self.times.max() # normalize to [0, 1]

# manual normalize
if self.times.max() > 1:
self.times = self.times / (self.times.max() + 1e-8) # normalize to [0, 1]

# calculate mean radius of all camera poses
self.radius = self.poses[:, :3, 3].norm(dim=-1).mean(0).item()
Expand Down
2 changes: 1 addition & 1 deletion dnerf/renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def run_cuda(self, rays_o, rays_d, time, dt_gamma=0, bg_color=None, perturb=Fals
bg_color = 1

# determine the correct frame of density grid to use
t = torch.floor(time[0][0] * self.time_size).clamp(max=self.time_size - 1).long()
t = torch.floor(time[0][0] * self.time_size).clamp(min=0, max=self.time_size - 1).long()

results = {}

Expand Down
2 changes: 2 additions & 0 deletions nerf/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):
self.mode = 'colmap' # manually split, use view-interpolation for test.
elif os.path.exists(os.path.join(self.root_path, 'transforms_train.json')):
self.mode = 'blender' # provided split
else:
raise NotImplementedError(f'[NeRFDataset] Cannot find transforms*.json under {self.root_path}')

# load nerf-compatible format data.
if self.mode == 'colmap':
Expand Down
4 changes: 4 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ python main_nerf.py data/TanksAndTemple/Family --workspace trial_nerf_family -O
# 3. call the preprocess code: (should install ffmpeg and colmap first! refer to the file for more options)
python scripts/colmap2nerf.py --video ./data/custom/video.mp4 --run_colmap # if use video
python scripts/colmap2nerf.py --images ./data/custom/images/ --run_colmap # if use images
python scripts/colmap2nerf.py --video ./data/custom/video.mp4 --run_colmap --dynamic # if the scene is dynamic (for D-NeRF settings), add the time for each frame.
# 4. it should create the transform.json, and you can train with: (you'll need to try with different scale & bound & dt_gamma to make the object correctly located in the bounding box and render fluently.)
python main_nerf.py data/custom --workspace trial_nerf_custom -O --gui --scale 2.0 --bound 1.0 --dt_gamma 0.02

Expand Down Expand Up @@ -155,6 +156,9 @@ python main_CCNeRF.py data/nerf_synthetic/hotdog --workspace trial_cc_hotdog -O
# almost the same as Instant-ngp NeRF, just replace the main script.
python main_dnerf.py data/dnerf/jumpingjacks --workspace trial_dnerf_jumpingjacks -O --bound 1.0 --scale 0.8 --dt_gamma 0
python main_dnerf.py data/dnerf/jumpingjacks --workspace trial_dnerf_jumpingjacks -O --bound 1.0 --scale 0.8 --dt_gamma 0 --gui
# for the hypernerf dataset, first convert it into nerf-compatible format:
python scripts/hyper2nerf.py data/split-cookie --downscale 2 # will generate transforms*.json
python main_dnerf.py data/split-cookie/ --workspace trial_dnerf_cookies -O --bound 1 --scale 0.3 --dt_gamma 0
```

check the `scripts` directory for more provided examples.
Expand Down
45 changes: 32 additions & 13 deletions scripts/colmap2nerf.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def parse_args():
parser.add_argument("--video", default="", help="input path to the video")
parser.add_argument("--images", default="", help="input path to the images folder, ignored if --video is provided")
parser.add_argument("--run_colmap", action="store_true", help="run colmap first on the image folder")
parser.add_argument("--dynamic", action="store_true", help="for dynamic scene, extraly save time calculated from frame index.")

parser.add_argument("--video_fps", default=3)
parser.add_argument("--time_slice", default="", help="time (in seconds) in the format t1,t2 within which the images should be generated from the video. eg: \"--time_slice '10,300'\" will generate images only from 10th second to 300th second of the video")
Expand Down Expand Up @@ -231,7 +232,9 @@ def closest_point_2_lines(oa, da, ob, db): # returns point closest to both rays

with open(os.path.join(TEXT_FOLDER, "images.txt"), "r") as f:
i = 0

bottom = np.array([0.0, 0.0, 0.0, 1.0]).reshape([1, 4])

out = {
"camera_angle_x": angle_x,
"camera_angle_y": angle_y,
Expand All @@ -252,26 +255,29 @@ def closest_point_2_lines(oa, da, ob, db): # returns point closest to both rays
up = np.zeros(3)
for line in f:
line = line.strip()

if line[0] == "#":
continue

i = i + 1
if i < SKIP_EARLY*2:
continue
if i % 2 == 1:

if i % 2 == 1:
elems = line.split(" ") # 1-4 is quat, 5-7 is trans, 9ff is filename (9, if filename contains no spaces)

name = '_'.join(elems[9:])
full_name = os.path.join(args.images, name)
rel_name = full_name[len(root_dir) + 1:]

b = sharpness(full_name)
print(name, "sharpness =",b)
# print(name, "sharpness =",b)

image_id = int(elems[0])
qvec = np.array(tuple(map(float, elems[1:5])))
tvec = np.array(tuple(map(float, elems[5:8])))
R = qvec2rotmat(-qvec)
t = tvec.reshape([3,1])
t = tvec.reshape([3, 1])
m = np.concatenate([np.concatenate([R, t], 1), bottom], 0)
c2w = np.linalg.inv(m)

Expand All @@ -282,12 +288,19 @@ def closest_point_2_lines(oa, da, ob, db): # returns point closest to both rays

up += c2w[0:3, 1]

frame = {"file_path" : rel_name, "sharpness" : b, "transform_matrix" : c2w}
frame = {
"file_path": rel_name,
"sharpness": b,
"transform_matrix": c2w
}

out["frames"].append(frame)

nframes = len(out["frames"])
N = len(out["frames"])
up = up / np.linalg.norm(up)
print("up vector was", up)

print("[INFO] up vector was", up)

R = rotmat(up, [0, 0, 1]) # rotate up vector to [0,0,1]
R = np.pad(R, [0, 1])
R[-1, -1] = 1
Expand All @@ -296,7 +309,7 @@ def closest_point_2_lines(oa, da, ob, db): # returns point closest to both rays
f["transform_matrix"] = np.matmul(R, f["transform_matrix"]) # rotate up to be the z axis

# find a central point they are all looking at
print("computing center of attention...")
print("[INFO] computing center of attention...")
totw = 0.0
totp = np.array([0.0, 0.0, 0.0])
for f in out["frames"]:
Expand All @@ -308,21 +321,27 @@ def closest_point_2_lines(oa, da, ob, db): # returns point closest to both rays
totp += p * w
totw += w
totp /= totw
print(totp) # the cameras are looking at totp
for f in out["frames"]:
f["transform_matrix"][0:3,3] -= totp

avglen = 0.
for f in out["frames"]:
avglen += np.linalg.norm(f["transform_matrix"][0:3,3])
avglen /= nframes
print("avg camera distance from origin", avglen)
avglen /= N
print("[INFO] avg camera distance from origin", avglen)
for f in out["frames"]:
f["transform_matrix"][0:3,3] *= 4.0 / avglen # scale to "nerf sized"

# sort frames by id
out["frames"].sort(key=lambda d: d['file_path'])

# add time if scene is dynamic
if args.dynamic:
for i, f in enumerate(out["frames"]):
f['time'] = i / N

for f in out["frames"]:
f["transform_matrix"] = f["transform_matrix"].tolist()
print(nframes,"frames")
print(f"writing {OUT_PATH}")

print(f"[INFO] writing {N} frames to {OUT_PATH}")
with open(OUT_PATH, "w") as outfile:
json.dump(out, outfile, indent=2)
Loading

0 comments on commit 34cb257

Please sign in to comment.