Skip to content

Commit

Permalink
add LLFF dataset support
Browse files Browse the repository at this point in the history
  • Loading branch information
ashawkey committed Apr 13, 2022
1 parent 213b0b7 commit d02ea4f
Show file tree
Hide file tree
Showing 10 changed files with 246 additions and 68 deletions.
17 changes: 8 additions & 9 deletions colmap2nerf.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,24 +271,23 @@ def closest_point_2_lines(oa, da, ob, db): # returns point closest to both rays
t = tvec.reshape([3,1])
m = np.concatenate([np.concatenate([R, t], 1), bottom], 0)
c2w = np.linalg.inv(m)
c2w[0:3,2] *= -1 # flip the y and z axis
c2w[0:3,1] *= -1
c2w = c2w[[1,0,2,3],:] # swap y and z
c2w[2,:] *= -1 # flip whole world upside down
c2w[0:3, 2] *= -1 # flip the y and z axis
c2w[0:3, 1] *= -1
c2w = c2w[[1, 0, 2, 3],:] # swap y and z
c2w[2, :] *= -1 # flip whole world upside down

up += c2w[0:3,1]
up += c2w[0:3, 1]

frame = {"file_path" : rel_name, "sharpness" : b, "transform_matrix" : c2w}
out["frames"].append(frame)

nframes = len(out["frames"])
up = up / np.linalg.norm(up)
print("up vector was", up)
R = rotmat(up,[0,0,1]) # rotate up vector to [0,0,1]
R = np.pad(R,[0,1])
R = rotmat(up, [0, 0, 1]) # rotate up vector to [0,0,1]
R = np.pad(R, [0, 1])
R[-1, -1] = 1


for f in out["frames"]:
f["transform_matrix"] = np.matmul(R, f["transform_matrix"]) # rotate up to be the z axis

Expand All @@ -302,7 +301,7 @@ def closest_point_2_lines(oa, da, ob, db): # returns point closest to both rays
mg = g["transform_matrix"][0:3,:]
p, w = closest_point_2_lines(mf[:,3], mf[:,2], mg[:,3], mg[:,2])
if w > 0.01:
totp += p*w
totp += p * w
totw += w
totp /= totw
print(totp) # the cameras are looking at totp
Expand Down
128 changes: 128 additions & 0 deletions llff2nerf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
from audioop import avg
import os
import glob
import numpy as np
import math
import json

import argparse

def closest_point_2_lines(oa, da, ob, db): # returns point closest to both rays of form o+t*d, and a weight factor that goes to 0 if the lines are parallel
da = da / np.linalg.norm(da)
db = db / np.linalg.norm(db)
c = np.cross(da, db)
denom = np.linalg.norm(c)**2
t = ob - oa
ta = np.linalg.det([t, db, c]) / (denom + 1e-10)
tb = np.linalg.det([t, da, c]) / (denom + 1e-10)
if ta > 0:
ta = 0
if tb > 0:
tb = 0
return (oa+ta*da+ob+tb*db) * 0.5, denom

def rotmat(a, b):
a, b = a / np.linalg.norm(a), b / np.linalg.norm(b)
v = np.cross(a, b)
c = np.dot(a, b)
s = np.linalg.norm(v)
kmat = np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]])
return np.eye(3) + kmat + kmat.dot(kmat) * ((1 - c) / (s ** 2 + 1e-10))

if __name__ == '__main__':

parser = argparse.ArgumentParser()
parser.add_argument('path', type=str, help="root directory to the LLFF dataset (contains images/ and pose_bounds.npy)")
parser.add_argument('--images', type=str, default='images', help="images folder")
parser.add_argument('--downscale', type=float, default=1, help="image size down scale")

opt = parser.parse_args()
print(f'[INFO] process {opt.path}')

# load data
images = [f[len(opt.path):] for f in sorted(glob.glob(os.path.join(opt.path, opt.images, "*"))) if f.lower().endswith('png') or f.lower().endswith('jpg') or f.lower().endswith('jpeg')]

poses_bounds = np.load(os.path.join(opt.path, 'poses_bounds.npy'))
N = poses_bounds.shape[0]

print(f'[INFO] loaded {len(images)} images, {N} poses_bounds as {poses_bounds.shape}')

assert N == len(images)

poses = poses_bounds[:, :15].reshape(-1, 3, 5) # (N, 3, 5)
bounds = poses_bounds[:, -2:] # (N, 2)

H, W, fl = poses[0, :, -1]

H = H // opt.downscale
W = W // opt.downscale
fl = fl / opt.downscale

print(f'[INFO] H = {H}, W = {W}, fl = {fl} (downscale = {opt.downscale})')

# inversion of this: https://github.com/Fyusion/LLFF/blob/c6e27b1ee59cb18f054ccb0f87a90214dbe70482/llff/poses/pose_utils.py#L51
poses = np.concatenate([poses[..., 1:2], poses[..., 0:1], -poses[..., 2:3], poses[..., 3:4]], -1) # (N, 3, 4)

# to homogeneous
last_row = np.tile(np.array([0, 0, 0, 1]), (len(poses), 1, 1)) # (N, 1, 4)
poses = np.concatenate([poses, last_row], axis=1) # (N, 4, 4)

# the following stuff are from colmap2nerf...
poses[:, 0:3, 1] *= -1
poses[:, 0:3, 2] *= -1
poses = poses[:, [1, 0, 2, 3], :] # swap y and z
poses[:, 2, :] *= -1 # flip whole world upside down

up = poses[:, 0:3, 1].sum(0)
up = up / np.linalg.norm(up)
R = rotmat(up, [0, 0, 1]) # rotate up vector to [0,0,1]
R = np.pad(R, [0, 1])
R[-1, -1] = 1

poses = R @ poses

totw = 0.0
totp = np.array([0.0, 0.0, 0.0])
for i in range(N):
mf = poses[i, :3, :]
for j in range(N):
mg = poses[j, :3, :]
p, w = closest_point_2_lines(mf[:,3], mf[:,2], mg[:,3], mg[:,2])
if w > 0.01:
totp += p * w
totw += w
totp /= totw
poses[:, :3, 3] -= totp

avglen = np.linalg.norm(poses[:, :3, 3], axis=-1).mean()

poses[:, :3, 3] *= 4.0 / avglen

print(f'[INFO] average radius = {avglen}')

# construct frames
frames = []
for i in range(N):
frames.append({
'file_path': images[i],
'transform_matrix': poses[i].tolist(),
})

# construct a transforms.json
transforms = {
'w': W,
'h': H,
'fl_x': fl,
'fl_y': fl,
'cx': W // 2,
'cy': H // 2,
'aabb_scale': 2,
'frames': frames,
}

# write
output_path = os.path.join(opt.path, 'transforms.json')
print(f'[INFO] write to {output_path}')
with open(output_path, 'w') as f:
json.dump(transforms, f, indent=2)

37 changes: 21 additions & 16 deletions main_nerf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,29 @@
parser.add_argument('--test', action='store_true', help="test mode")
parser.add_argument('--workspace', type=str, default='workspace')
parser.add_argument('--seed', type=int, default=0)

### training options
parser.add_argument('--batch_size', type=int, default=1)
parser.add_argument('--num_rays', type=int, default=4096)
parser.add_argument('--num_rays', type=int, default=4096, help="num rays sampled per image for each training step")
parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch")
# (only valid when not using --cuda_ray)
parser.add_argument('--num_steps', type=int, default=512)
parser.add_argument('--upsample_steps', type=int, default=0)
parser.add_argument('--max_ray_batch', type=int, default=4096)
parser.add_argument('--error_map', action='store_true', help="use error map to sample rays")
parser.add_argument('--num_steps', type=int, default=512, help="num steps sampled per ray (only valid when not using --cuda_ray)")
parser.add_argument('--upsample_steps', type=int, default=0, help="num steps up-sampled per ray (only valid when not using --cuda_ray)")
parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when not using --cuda_ray)")
parser.add_argument('--error_map', action='store_true', help="[experimental] use error map to sample rays")

### network backbone options
parser.add_argument('--fp16', action='store_true', help="use amp mixed precision training")
parser.add_argument('--ff', action='store_true', help="use fully-fused MLP")
parser.add_argument('--tcnn', action='store_true', help="use TCNN backend")

### dataset options
parser.add_argument('--mode', type=str, default='colmap', help="dataset mode, supports (colmap, blender)")
parser.add_argument('--preload', action='store_true', help="preload all data into GPU, accelerate training but use more GPU memory")
# (default is for the fox dataset)
parser.add_argument('--rand_pose_interval', type=int, default=0, help="[experimental] sample one random poses every $ steps, for sparse view regularization. 0 disables this feature.")
# (the default value is for the fox dataset)
parser.add_argument('--bound', type=float, default=2, help="assume the scene is bounded in box[-bound, bound]^3, if > 1, will invoke adaptive ray marching.")
parser.add_argument('--scale', type=float, default=0.33, help="scale camera location into box[-bound, bound]^3")
parser.add_argument('--dt_gamma', type=float, default=1/128, help="dt_gamma (>=0) for adaptive ray marching. set to 0 to disable, >0 to accelerate rendering (but usually with worse quality)")

### GUI options
parser.add_argument('--gui', action='store_true', help="start a GUI")
parser.add_argument('--W', type=int, default=1920, help="GUI width")
Expand All @@ -60,6 +63,7 @@
from nerf.network import NeRFNetwork

print(opt)

seed_everything(opt.seed)

model = NeRFNetwork(
Expand All @@ -73,17 +77,18 @@

criterion = torch.nn.MSELoss(reduction='none')

### test mode
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if opt.test:

trainer = Trainer('ngp', opt, model, workspace=opt.workspace, criterion=criterion, fp16=opt.fp16, metrics=[PSNRMeter()], use_checkpoint='latest')
trainer = Trainer('ngp', opt, model, device=device, workspace=opt.workspace, criterion=criterion, fp16=opt.fp16, metrics=[PSNRMeter()], use_checkpoint='latest')

if opt.gui:
gui = NeRFGUI(opt, trainer)
gui.render()

else:
test_loader = NeRFDataset(opt, type='test').dataloader()
test_loader = NeRFDataset(opt, device=device, type='test').dataloader()

if opt.mode == 'blender':
trainer.evaluate(test_loader) # blender has gt, so evaluate it.
Expand All @@ -102,23 +107,23 @@
# need different milestones for GUI/CMD mode.
scheduler = lambda optimizer: optim.lr_scheduler.MultiStepLR(optimizer, milestones=[1000, 1500, 2000] if opt.gui else [100, 200], gamma=0.33)

trainer = Trainer('ngp', opt, model, workspace=opt.workspace, optimizer=optimizer, criterion=criterion, ema_decay=0.95, fp16=opt.fp16, lr_scheduler=scheduler, metrics=[PSNRMeter()], use_checkpoint='latest', eval_interval=50)
trainer = Trainer('ngp', opt, model, device=device, workspace=opt.workspace, optimizer=optimizer, criterion=criterion, ema_decay=0.95, fp16=opt.fp16, lr_scheduler=scheduler, metrics=[PSNRMeter()], use_checkpoint='latest', eval_interval=50)

if opt.gui:
train_loader = NeRFDataset(opt, type='all').dataloader()
train_loader = NeRFDataset(opt, device=device, type='all').dataloader()
trainer.train_loader = train_loader # attach dataloader to trainer

gui = NeRFGUI(opt, trainer)
gui.render()

else:
train_loader = NeRFDataset(opt, type='train').dataloader()
valid_loader = NeRFDataset(opt, type='val', downscale=2).dataloader()
train_loader = NeRFDataset(opt, device=device, type='train').dataloader()
valid_loader = NeRFDataset(opt, device=device, type='val', downscale=2).dataloader()

trainer.train(train_loader, valid_loader, 300)

# also test
test_loader = NeRFDataset(opt, type='test').dataloader()
test_loader = NeRFDataset(opt, device=device, type='test').dataloader()

if opt.mode == 'blender':
trainer.evaluate(test_loader) # blender has gt, so evaluate it.
Expand Down
38 changes: 21 additions & 17 deletions main_tensoRF.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,30 @@
parser.add_argument('--workspace', type=str, default='workspace')
parser.add_argument('--seed', type=int, default=0)
### training options
parser.add_argument('--batch_size', type=int, default=1)
parser.add_argument('--num_rays', type=int, default=4096)
parser.add_argument('--num_rays', type=int, default=4096, help="num rays sampled per image for each training step")
parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch")
parser.add_argument('--num_steps', type=int, default=512, help="num steps sampled per ray (only valid when not using --cuda_ray)")
parser.add_argument('--upsample_steps', type=int, default=0, help="num steps up-sampled per ray (only valid when not using --cuda_ray)")
parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when not using --cuda_ray)")
parser.add_argument('--error_map', action='store_true', help="[experimental] use error map to sample rays")
parser.add_argument('--l1_reg_weight', type=float, default=4e-5)
# (only valid when not using --cuda_ray)
parser.add_argument('--num_steps', type=int, default=512)
parser.add_argument('--upsample_steps', type=int, default=0)
parser.add_argument('--max_ray_batch', type=int, default=4096)
parser.add_argument('--error_map', action='store_true', help="use error map to sample rays")

### network backbone options
parser.add_argument('--fp16', action='store_true', help="use amp mixed precision training")
parser.add_argument('--cp', action='store_true', help="use TensorCP instead of TensorVMSplit")
parser.add_argument('--resolution0', type=int, default=128)
parser.add_argument('--resolution1', type=int, default=300)
parser.add_argument("--upsample_model_steps", type=int, action="append", default=[2000, 3000, 4000, 5500, 7000])

### dataset options
parser.add_argument('--mode', type=str, default='colmap', help="dataset mode, supports (colmap, blender)")
parser.add_argument('--preload', action='store_true', help="preload all data into GPU, accelerate training but use more GPU memory")
# (default is for the fox dataset)
parser.add_argument('--rand_pose_interval', type=int, default=0, help="[experimental] sample one random poses every $ steps, for sparse view regularization. 0 disables this feature.")
# (the default value is for the fox dataset)
parser.add_argument('--bound', type=float, default=2, help="assume the scene is bounded in box[-bound, bound]^3, if > 1, will invoke adaptive ray marching.")
parser.add_argument('--scale', type=float, default=0.33, help="scale camera location into box[-bound, bound]^3")
parser.add_argument('--dt_gamma', type=float, default=1/128, help="dt_gamma (>=0) for adaptive ray marching. set to 0 to disable, >0 to accelerate rendering (but usually with worse quality)")

### GUI options
parser.add_argument('--gui', action='store_true', help="start a GUI")
parser.add_argument('--W', type=int, default=1920, help="GUI width")
Expand All @@ -52,6 +54,7 @@
opt.fp16 = True
opt.cuda_ray = True
opt.preload = True

print(opt)
seed_everything(opt.seed)

Expand All @@ -69,19 +72,20 @@

print(model)

criterion = torch.nn.MSELoss()
criterion = torch.nn.MSELoss(reduction='none')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### test mode
if opt.test:

trainer = Trainer('ngp', opt, model, workspace=opt.workspace, criterion=criterion, fp16=opt.fp16, metrics=[PSNRMeter()], use_checkpoint='latest')
trainer = Trainer('ngp', opt, model, device=device, workspace=opt.workspace, criterion=criterion, fp16=opt.fp16, metrics=[PSNRMeter()], use_checkpoint='latest')

if opt.gui:
gui = NeRFGUI(opt, trainer)
gui.render()

else:
test_loader = NeRFDataset(opt, type='test').dataloader()
test_loader = NeRFDataset(opt, device=device, type='test').dataloader()

if opt.mode == 'blender':
trainer.evaluate(test_loader) # blender has gt, so evaluate it.
Expand All @@ -97,28 +101,28 @@
# need different milestones for GUI/CMD mode.
scheduler = lambda optimizer: optim.lr_scheduler.MultiStepLR(optimizer, milestones=[1000, 2000] if opt.gui else [100, 200], gamma=0.33)

trainer = Trainer('ngp', opt, model, workspace=opt.workspace, optimizer=optimizer, criterion=criterion, ema_decay=None, fp16=opt.fp16, lr_scheduler=scheduler, metrics=[PSNRMeter()], use_checkpoint='latest', eval_interval=50)
trainer = Trainer('ngp', opt, model, device=device, workspace=opt.workspace, optimizer=optimizer, criterion=criterion, ema_decay=None, fp16=opt.fp16, lr_scheduler=scheduler, metrics=[PSNRMeter()], use_checkpoint='latest', eval_interval=50)

# calc upsample target resolutions
upsample_resolutions = (np.round(np.exp(np.linspace(np.log(opt.resolution0), np.log(opt.resolution1), len(opt.upsample_model_steps) + 1)))).astype(np.int32).tolist()[1:]
print('upsample_resolutions:', upsample_resolutions)
trainer.upsample_resolutions = upsample_resolutions

if opt.gui:
train_loader = NeRFDataset(opt, type='all').dataloader()
train_loader = NeRFDataset(opt, device=device, type='all').dataloader()
trainer.train_loader = train_loader # attach dataloader to trainer

gui = NeRFGUI(opt, trainer)
gui.render()

else:
train_loader = NeRFDataset(opt, type='train').dataloader()
valid_loader = NeRFDataset(opt, type='val', downscale=2).dataloader()
train_loader = NeRFDataset(opt, device=device, type='train').dataloader()
valid_loader = NeRFDataset(opt, device=device, type='val', downscale=2).dataloader()

trainer.train(train_loader, valid_loader, 300)

# also test
test_loader = NeRFDataset(opt, type='test').dataloader()
test_loader = NeRFDataset(opt, device=device, type='test').dataloader()

if opt.mode == 'blender':
trainer.evaluate(test_loader) # blender has gt, so evaluate it.
Expand Down
4 changes: 4 additions & 0 deletions nerf/gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,11 +156,15 @@ def register_dpg(self):

### register window

# the rendered image, as the primary window
with dpg.window(tag="_primary_window", width=self.W, height=self.H):

# add the texture
dpg.add_image("_texture")

dpg.set_primary_window("_primary_window", True)

# control window
with dpg.window(label="Control", tag="_control_window", width=400, height=300):

# button theme
Expand Down
Loading

0 comments on commit d02ea4f

Please sign in to comment.