Skip to content

Commit

Permalink
fix raymarching thresh and grad in ashawkey#77, add offset to camera
Browse files Browse the repository at this point in the history
  • Loading branch information
ashawkey committed Jun 29, 2022
1 parent 39ed3bc commit aeaa996
Show file tree
Hide file tree
Showing 9 changed files with 95 additions and 62 deletions.
40 changes: 26 additions & 14 deletions dnerf/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@


# ref: https://github.com/NVlabs/instant-ngp/blob/b76004c8cf478880227401ae763be4c02f80b62f/include/neural-graphics-primitives/nerf_loader.h#L50
def nerf_matrix_to_ngp(pose, scale=0.33):
def nerf_matrix_to_ngp(pose, scale=0.33, offset=[0, 0, 0]):
# for the fox dataset, 0.33 scales camera radius to ~ 2
new_pose = np.array([
[pose[1, 0], -pose[1, 1], -pose[1, 2], pose[1, 3] * scale],
[pose[2, 0], -pose[2, 1], -pose[2, 2], pose[2, 3] * scale],
[pose[0, 0], -pose[0, 1], -pose[0, 2], pose[0, 3] * scale],
[pose[1, 0], -pose[1, 1], -pose[1, 2], pose[1, 3] * scale + offset[0]],
[pose[2, 0], -pose[2, 1], -pose[2, 2], pose[2, 3] * scale + offset[1]],
[pose[0, 0], -pose[0, 1], -pose[0, 2], pose[0, 3] * scale + offset[2]],
[0, 0, 0, 1],
], dtype=np.float32)
return new_pose
Expand All @@ -30,8 +30,9 @@ def visualize_poses(poses, size=0.1):
# poses: [B, 4, 4]

axes = trimesh.creation.axis(axis_length=4)
sphere = trimesh.creation.icosphere(radius=1)
objects = [axes, sphere]
box = trimesh.primitives.Box(extents=(2, 2, 2)).as_outline()
box.colors = np.array([[128, 128, 128]] * len(box.entities))
objects = [axes, box]

for pose in poses:
# a camera is visualized with 8 line segments.
Expand All @@ -41,7 +42,11 @@ def visualize_poses(poses, size=0.1):
c = pos - size * pose[:3, 0] - size * pose[:3, 1] + size * pose[:3, 2]
d = pos + size * pose[:3, 0] - size * pose[:3, 1] + size * pose[:3, 2]

segs = np.array([[pos, a], [pos, b], [pos, c], [pos, d], [a, b], [b, c], [c, d], [d, a]])
dir = (a + b + c + d) / 4 - pos
dir = dir / (np.linalg.norm(dir) + 1e-8)
o = pos + dir * 3

segs = np.array([[pos, a], [pos, b], [pos, c], [pos, d], [a, b], [b, c], [c, d], [d, a], [pos, o]])
segs = trimesh.load_path(segs)
objects.append(segs)

Expand Down Expand Up @@ -96,6 +101,7 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):
self.root_path = opt.path
self.preload = opt.preload # preload data into GPU
self.scale = opt.scale # camera radius scale to make sure camera are inside the bounding box.
self.offset = opt.offset # camera offset
self.bound = opt.bound # bounding box half length, also used as the radius to random sample poses.
self.fp16 = opt.fp16 # if preload, load into fp16.

Expand Down Expand Up @@ -158,19 +164,24 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):

# choose two random poses, and interpolate between.
f0, f1 = np.random.choice(frames, 2, replace=False)
pose0 = nerf_matrix_to_ngp(np.array(f0['transform_matrix'], dtype=np.float32), scale=self.scale) # [4, 4]
pose1 = nerf_matrix_to_ngp(np.array(f1['transform_matrix'], dtype=np.float32), scale=self.scale) # [4, 4]
pose0 = nerf_matrix_to_ngp(np.array(f0['transform_matrix'], dtype=np.float32), scale=self.scale, offset=self.offset) # [4, 4]
pose1 = nerf_matrix_to_ngp(np.array(f1['transform_matrix'], dtype=np.float32), scale=self.scale, offset=self.offset) # [4, 4]
time0 = f0['time'] if 'time' in f0 else 0
time1 = f1['time'] if 'time' in f1 else 0
rots = Rotation.from_matrix(np.stack([pose0[:3, :3], pose1[:3, :3]]))
slerp = Slerp([0, 1], rots)

self.poses = []
self.images = None
self.times = []
for i in range(n_test + 1):
ratio = np.sin(((i / n_test) - 0.5) * np.pi) * 0.5 + 0.5
pose = np.eye(4, dtype=np.float32)
pose[:3, :3] = slerp(ratio).as_matrix()
pose[:3, 3] = (1 - ratio) * pose0[:3, 3] + ratio * pose1[:3, 3]
self.poses.append(pose)
time = (1 - ratio) * time0 + ratio * time1
self.times.append(time)

else:
# for colmap, manually split a valid set (the first frame).
Expand All @@ -196,7 +207,7 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):
continue

pose = np.array(f['transform_matrix'], dtype=np.float32) # [4, 4]
pose = nerf_matrix_to_ngp(pose, scale=self.scale)
pose = nerf_matrix_to_ngp(pose, scale=self.scale, offset=self.offset)

image = cv2.imread(f_path, cv2.IMREAD_UNCHANGED) # [H, W, 3] o [H, W, 4]
if self.H is None or self.W is None:
Expand All @@ -218,7 +229,7 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):
if 'time' in f:
time = f['time']
else:
time = 0 # assume static scene
time = int(os.path.basename(f['file_path'])[:-4]) # assume frame index as time

self.poses.append(pose)
self.images.append(image)
Expand All @@ -228,6 +239,7 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):
if self.images is not None:
self.images = torch.from_numpy(np.stack(self.images, axis=0)) # [N, H, W, C]
self.times = torch.from_numpy(np.asarray(self.times, dtype=np.float32)).view(-1, 1) # [N, 1]
self.times = self.times / self.times.max() # normalize to [0, 1]

# calculate mean radius of all camera poses
self.radius = self.poses[:, :3, 3].norm(dim=-1).mean(0).item()
Expand All @@ -240,7 +252,7 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):
self.error_map = None

# [debug] uncomment to view all training poses.
visualize_poses(self.poses.numpy())
# visualize_poses(self.poses.numpy())

# [debug] uncomment to view examples of randomly generated poses.
# visualize_poses(rand_poses(100, self.device, radius=self.radius).cpu().numpy())
Expand Down Expand Up @@ -271,8 +283,8 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):
else:
raise RuntimeError('Failed to load focal length, please check the transforms.json!')

cx = (transform['cx'] / downscale) if 'cx' in transform else (self.H / 2)
cy = (transform['cy'] / downscale) if 'cy' in transform else (self.W / 2)
cx = (transform['cx'] / downscale) if 'cx' in transform else (self.W / 2)
cy = (transform['cy'] / downscale) if 'cy' in transform else (self.H / 2)

self.intrinsics = np.array([fl_x, fl_y, cx, cy])

Expand Down
1 change: 1 addition & 0 deletions main_CCNeRF.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
parser.add_argument('--preload', action='store_true', help="preload all data into GPU, accelerate training but use more GPU memory")
parser.add_argument('--bound', type=float, default=1, help="assume the scene is bounded in box[-bound, bound]^3, if > 1, will invoke adaptive ray marching.")
parser.add_argument('--scale', type=float, default=0.33, help="scale camera location into box[-bound, bound]^3")
parser.add_argument('--offset', type=float, nargs='*', default=[0, 0, 0], help="offset of camera location")
parser.add_argument('--dt_gamma', type=float, default=0, help="dt_gamma (>=0) for adaptive ray marching. set to 0 to disable, >0 to accelerate rendering (but usually with worse quality)")
parser.add_argument('--min_near', type=float, default=0.2, help="minimum near distance for camera")
parser.add_argument('--density_thresh', type=float, default=10, help="threshold for density grid to be occupied")
Expand Down
3 changes: 2 additions & 1 deletion main_dnerf.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch")
parser.add_argument('--max_steps', type=int, default=1024, help="max num steps sampled per ray (only valid when using --cuda_ray)")
parser.add_argument('--update_extra_interval', type=int, default=100, help="iter interval to update extra status (only valid when using --cuda_ray)")
parser.add_argument('--num_steps', type=int, default=256, help="num steps sampled per ray (only valid when NOT using --cuda_ray)")
parser.add_argument('--num_steps', type=int, default=128, help="num steps sampled per ray (only valid when NOT using --cuda_ray)")
parser.add_argument('--upsample_steps', type=int, default=0, help="num steps up-sampled per ray (only valid when NOT using --cuda_ray)")
parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when NOT using --cuda_ray)")

Expand All @@ -43,6 +43,7 @@
# (the default value is for the fox dataset)
parser.add_argument('--bound', type=float, default=2, help="assume the scene is bounded in box[-bound, bound]^3, if > 1, will invoke adaptive ray marching.")
parser.add_argument('--scale', type=float, default=0.33, help="scale camera location into box[-bound, bound]^3")
parser.add_argument('--offset', type=float, nargs='*', default=[0, 0, 0], help="offset of camera location")
parser.add_argument('--dt_gamma', type=float, default=1/128, help="dt_gamma (>=0) for adaptive ray marching. set to 0 to disable, >0 to accelerate rendering (but usually with worse quality)")
parser.add_argument('--min_near', type=float, default=0.2, help="minimum near distance for camera")
parser.add_argument('--density_thresh', type=float, default=10, help="threshold for density grid to be occupied")
Expand Down
1 change: 1 addition & 0 deletions main_nerf.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
# (the default value is for the fox dataset)
parser.add_argument('--bound', type=float, default=2, help="assume the scene is bounded in box[-bound, bound]^3, if > 1, will invoke adaptive ray marching.")
parser.add_argument('--scale', type=float, default=0.33, help="scale camera location into box[-bound, bound]^3")
parser.add_argument('--offset', type=float, nargs='*', default=[0, 0, 0], help="offset of camera location")
parser.add_argument('--dt_gamma', type=float, default=1/128, help="dt_gamma (>=0) for adaptive ray marching. set to 0 to disable, >0 to accelerate rendering (but usually with worse quality)")
parser.add_argument('--min_near', type=float, default=0.2, help="minimum near distance for camera")
parser.add_argument('--density_thresh', type=float, default=10, help="threshold for density grid to be occupied")
Expand Down
1 change: 1 addition & 0 deletions main_tensoRF.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
# (the default value is for the fox dataset)
parser.add_argument('--bound', type=float, default=2, help="assume the scene is bounded in box[-bound, bound]^3, if > 1, will invoke adaptive ray marching.")
parser.add_argument('--scale', type=float, default=0.33, help="scale camera location into box[-bound, bound]^3")
parser.add_argument('--offset', type=float, nargs='*', default=[0, 0, 0], help="offset of camera location")
parser.add_argument('--dt_gamma', type=float, default=1/128, help="dt_gamma (>=0) for adaptive ray marching. set to 0 to disable, >0 to accelerate rendering (but usually with worse quality)")
parser.add_argument('--min_near', type=float, default=0.2, help="minimum near distance for camera")
parser.add_argument('--density_thresh', type=float, default=10, help="threshold for density grid to be occupied")
Expand Down
33 changes: 20 additions & 13 deletions nerf/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import cv2
import glob
import json
from cv2 import transform
import tqdm
import numpy as np
from scipy.spatial.transform import Slerp, Rotation
Expand All @@ -11,16 +12,16 @@
import torch
from torch.utils.data import DataLoader

from .utils import get_rays, srgb_to_linear
from .utils import get_rays, srgb_to_linear, torch_vis_2d


# ref: https://github.com/NVlabs/instant-ngp/blob/b76004c8cf478880227401ae763be4c02f80b62f/include/neural-graphics-primitives/nerf_loader.h#L50
def nerf_matrix_to_ngp(pose, scale=0.33):
def nerf_matrix_to_ngp(pose, scale=0.33, offset=[0, 0, 0]):
# for the fox dataset, 0.33 scales camera radius to ~ 2
new_pose = np.array([
[pose[1, 0], -pose[1, 1], -pose[1, 2], pose[1, 3] * scale],
[pose[2, 0], -pose[2, 1], -pose[2, 2], pose[2, 3] * scale],
[pose[0, 0], -pose[0, 1], -pose[0, 2], pose[0, 3] * scale],
[pose[1, 0], -pose[1, 1], -pose[1, 2], pose[1, 3] * scale + offset[0]],
[pose[2, 0], -pose[2, 1], -pose[2, 2], pose[2, 3] * scale + offset[1]],
[pose[0, 0], -pose[0, 1], -pose[0, 2], pose[0, 3] * scale + offset[2]],
[0, 0, 0, 1],
], dtype=np.float32)
return new_pose
Expand All @@ -30,8 +31,9 @@ def visualize_poses(poses, size=0.1):
# poses: [B, 4, 4]

axes = trimesh.creation.axis(axis_length=4)
sphere = trimesh.creation.icosphere(radius=1)
objects = [axes, sphere]
box = trimesh.primitives.Box(extents=(2, 2, 2)).as_outline()
box.colors = np.array([[128, 128, 128]] * len(box.entities))
objects = [axes, box]

for pose in poses:
# a camera is visualized with 8 line segments.
Expand All @@ -41,7 +43,11 @@ def visualize_poses(poses, size=0.1):
c = pos - size * pose[:3, 0] - size * pose[:3, 1] + size * pose[:3, 2]
d = pos + size * pose[:3, 0] - size * pose[:3, 1] + size * pose[:3, 2]

segs = np.array([[pos, a], [pos, b], [pos, c], [pos, d], [a, b], [b, c], [c, d], [d, a]])
dir = (a + b + c + d) / 4 - pos
dir = dir / (np.linalg.norm(dir) + 1e-8)
o = pos + dir * 3

segs = np.array([[pos, a], [pos, b], [pos, c], [pos, d], [a, b], [b, c], [c, d], [d, a], [pos, o]])
segs = trimesh.load_path(segs)
objects.append(segs)

Expand Down Expand Up @@ -96,6 +102,7 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):
self.root_path = opt.path
self.preload = opt.preload # preload data into GPU
self.scale = opt.scale # camera radius scale to make sure camera are inside the bounding box.
self.offset = opt.offset # camera offset
self.bound = opt.bound # bounding box half length, also used as the radius to random sample poses.
self.fp16 = opt.fp16 # if preload, load into fp16.

Expand Down Expand Up @@ -158,8 +165,8 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):

# choose two random poses, and interpolate between.
f0, f1 = np.random.choice(frames, 2, replace=False)
pose0 = nerf_matrix_to_ngp(np.array(f0['transform_matrix'], dtype=np.float32), scale=self.scale) # [4, 4]
pose1 = nerf_matrix_to_ngp(np.array(f1['transform_matrix'], dtype=np.float32), scale=self.scale) # [4, 4]
pose0 = nerf_matrix_to_ngp(np.array(f0['transform_matrix'], dtype=np.float32), scale=self.scale, offset=self.offset) # [4, 4]
pose1 = nerf_matrix_to_ngp(np.array(f1['transform_matrix'], dtype=np.float32), scale=self.scale, offset=self.offset) # [4, 4]
rots = Rotation.from_matrix(np.stack([pose0[:3, :3], pose1[:3, :3]]))
slerp = Slerp([0, 1], rots)

Expand Down Expand Up @@ -193,7 +200,7 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):
continue

pose = np.array(f['transform_matrix'], dtype=np.float32) # [4, 4]
pose = nerf_matrix_to_ngp(pose, scale=self.scale)
pose = nerf_matrix_to_ngp(pose, scale=self.scale, offset=self.offset)

image = cv2.imread(f_path, cv2.IMREAD_UNCHANGED) # [H, W, 3] o [H, W, 4]
if self.H is None or self.W is None:
Expand Down Expand Up @@ -259,8 +266,8 @@ def __init__(self, opt, device, type='train', downscale=1, n_test=10):
else:
raise RuntimeError('Failed to load focal length, please check the transforms.json!')

cx = (transform['cx'] / downscale) if 'cx' in transform else (self.H / 2)
cy = (transform['cy'] / downscale) if 'cy' in transform else (self.W / 2)
cx = (transform['cx'] / downscale) if 'cx' in transform else (self.W / 2)
cy = (transform['cy'] / downscale) if 'cy' in transform else (self.H / 2)

self.intrinsics = np.array([fl_x, fl_y, cx, cy])

Expand Down
21 changes: 11 additions & 10 deletions raymarching/src/raymarching.cu
Original file line number Diff line number Diff line change
Expand Up @@ -546,21 +546,20 @@ __global__ void kernel_composite_rays_train_forward(
const scalar_t alpha = 1.0f - __expf(- sigmas[0] * deltas[0]);
const scalar_t weight = alpha * T;

// minimal remained transmittence
// NOTE: uncomment it won't affect instant-ngp, but totally breaks TensoRF...
//if (weight < 1e-4f) break;

r += weight * rgbs[0];
g += weight * rgbs[1];
b += weight * rgbs[2];

t += deltas[1]; // real delta
d += weight * t;

ws += weight;

T *= 1.0f - alpha;

// minimal remained transmittence
if (T < 1e-4f) break;

//printf("[n=%d] num_steps=%d, alpha=%f, w=%f, T=%f, sum_dt=%f, d=%f\n", n, step, alpha, weight, T, sum_delta, d);

// locate
Expand Down Expand Up @@ -650,15 +649,17 @@ __global__ void kernel_composite_rays_train_backward(
const scalar_t alpha = 1.0f - __expf(- sigmas[0] * deltas[0]);
const scalar_t weight = alpha * T;

//if (weight < 1e-4f) break;

r += weight * rgbs[0];
g += weight * rgbs[1];
b += weight * rgbs[2];
ws += weight;

T *= 1.0f - alpha;

// minimal remained transmittence
if (T < 1e-4f) break;

// check https://note.kiui.moe/others/nerf_gradient/ for the gradient calculation.
// write grad_rgbs
grad_rgbs[0] = grad_image[0] * weight;
grad_rgbs[1] = grad_image[1] * weight;
Expand All @@ -669,7 +670,7 @@ __global__ void kernel_composite_rays_train_backward(
grad_image[0] * (T * rgbs[0] - (r_final - r)) +
grad_image[1] * (T * rgbs[1] - (g_final - g)) +
grad_image[2] * (T * rgbs[2] - (b_final - b)) +
grad_weights_sum[0] * (T - (ws_final - ws))
grad_weights_sum[0] * (1 - ws_final)
);

//printf("[n=%d] num_steps=%d, T=%f, grad_sigmas=%f, r_final=%f, r=%f\n", n, step, T, grad_sigmas[0], r_final, r);
Expand Down
Loading

0 comments on commit aeaa996

Please sign in to comment.