misc fix, manually merge ashawkey#83, add basis based dnerf

lishy66 · Jul 19, 2022 · 5a7222a · 5a7222a
1 parent dd2add6
commit 5a7222a
Show file tree

Hide file tree

Showing 9 changed files with 297 additions and 13 deletions.
diff --git a/assets/update_logs.md b/assets/update_logs.md
@@ -1,5 +1,6 @@
 ## Update logs
 
+* 7.16: add temporal basis based dynamic nerf (experimental). It trains much faster compared to the deformation based dynamic nerf, but performance is much worse for now...
 * 6.29: add support for HyperNeRF's dataset.
  * we use a simplified pinhole camera model, may introduce bias.
 * 6.26: add support for D-NeRF.

diff --git a/dnerf/network_basis.py b/dnerf/network_basis.py
@@ -0,0 +1,262 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from encoding import get_encoder
+from activation import trunc_exp
+from .renderer import NeRFRenderer
+
+
+class NeRFNetwork(NeRFRenderer):
+ def __init__(self,
+ encoding="tiledgrid",
+ encoding_dir="sphere_harmonics",
+ encoding_time="frequency",
+ encoding_bg="hashgrid",
+ num_layers=2,
+ hidden_dim=64,
+ geo_feat_dim=32,
+ num_layers_color=3,
+ hidden_dim_color=64,
+ num_layers_bg=2,
+ hidden_dim_bg=64,
+ sigma_basis_dim=32,
+ color_basis_dim=8,
+ num_layers_basis=5,
+ hidden_dim_basis=128,
+ bound=1,
+ **kwargs,
+ ):
+ super().__init__(bound, **kwargs)
+
+ # basis network
+ self.num_layers_basis = num_layers_basis
+ self.hidden_dim_basis = hidden_dim_basis
+ self.sigma_basis_dim = sigma_basis_dim
+ self.color_basis_dim = color_basis_dim
+ self.encoder_time, self.in_dim_time = get_encoder(encoding_time, input_dim=1, multires=6)
+
+ basis_net = []
+ for l in range(num_layers_basis):
+ if l == 0:
+ in_dim = self.in_dim_time
+ else:
+ in_dim = hidden_dim_basis
+
+ if l == num_layers_basis - 1:
+ out_dim = self.sigma_basis_dim + self.color_basis_dim
+ else:
+ out_dim = hidden_dim_basis
+
+ basis_net.append(nn.Linear(in_dim, out_dim, bias=False))
+
+ self.basis_net = nn.ModuleList(basis_net)
+
+ # sigma network
+ self.num_layers = num_layers
+ self.hidden_dim = hidden_dim
+ self.geo_feat_dim = geo_feat_dim
+ self.encoder, self.in_dim = get_encoder(encoding, desired_resolution=2048 * bound)
+
+ sigma_net = []
+ for l in range(num_layers):
+ if l == 0:
+ in_dim = self.in_dim
+ else:
+ in_dim = hidden_dim
+
+ if l == num_layers - 1:
+ out_dim = self.sigma_basis_dim + self.geo_feat_dim # SB sigma + features for color
+ else:
+ out_dim = hidden_dim
+
+ sigma_net.append(nn.Linear(in_dim, out_dim, bias=False))
+
+ self.sigma_net = nn.ModuleList(sigma_net)
+
+ # color network
+ self.num_layers_color = num_layers_color 
+ self.hidden_dim_color = hidden_dim_color
+ self.encoder_dir, self.in_dim_dir = get_encoder(encoding_dir)
+
+ color_net = []
+ for l in range(num_layers_color):
+ if l == 0:
+ in_dim = self.in_dim_dir + self.geo_feat_dim
+ else:
+ in_dim = hidden_dim
+
+ if l == num_layers_color - 1:
+ out_dim = 3 * self.color_basis_dim # 3 * CB rgb
+ else:
+ out_dim = hidden_dim
+
+ color_net.append(nn.Linear(in_dim, out_dim, bias=False))
+
+ self.color_net = nn.ModuleList(color_net)
+
+ # background network
+ if self.bg_radius > 0:
+ self.num_layers_bg = num_layers_bg 
+ self.hidden_dim_bg = hidden_dim_bg
+ self.encoder_bg, self.in_dim_bg = get_encoder(encoding_bg, input_dim=2, num_levels=4, log2_hashmap_size=19, desired_resolution=2048) # much smaller hashgrid 
+
+ bg_net = []
+ for l in range(num_layers_bg):
+ if l == 0:
+ in_dim = self.in_dim_bg + self.in_dim_dir
+ else:
+ in_dim = hidden_dim_bg
+
+ if l == num_layers_bg - 1:
+ out_dim = 3 # 3 rgb
+ else:
+ out_dim = hidden_dim_bg
+
+ bg_net.append(nn.Linear(in_dim, out_dim, bias=False))
+
+ self.bg_net = nn.ModuleList(bg_net)
+ else:
+ self.bg_net = None
+
+
+ def forward(self, x, d, t):
+ # x: [N, 3], in [-bound, bound]
+ # d: [N, 3], nomalized in [-1, 1]
+ # t: [1, 1], in [0, 1]
+
+ # time --> basis
+ enc_t = self.encoder_time(t) # [1, 1] --> [1, C']
+ h = enc_t
+ for l in range(self.num_layers_basis):
+ h = self.basis_net[l](h)
+ if l != self.num_layers_basis - 1:
+ h = F.relu(h, inplace=True)
+
+ sigma_basis = h[0, :self.sigma_basis_dim]
+ color_basis = h[0, self.sigma_basis_dim:]
+
+ # sigma
+ x = self.encoder(x, bound=self.bound)
+ h = x
+ for l in range(self.num_layers):
+ h = self.sigma_net[l](h)
+ if l != self.num_layers - 1:
+ h = F.relu(h, inplace=True)
+
+ sigma = trunc_exp(h[..., :self.sigma_basis_dim] @ sigma_basis)
+ geo_feat = h[..., self.sigma_basis_dim:]
+
+ # color
+ d = self.encoder_dir(d)
+ h = torch.cat([d, geo_feat], dim=-1)
+ for l in range(self.num_layers_color):
+ h = self.color_net[l](h)
+ if l != self.num_layers_color - 1:
+ h = F.relu(h, inplace=True)
+
+ # sigmoid activation for rgb
+ rgbs = torch.sigmoid(h.view(-1, 3, self.color_basis_dim) @ color_basis)
+
+ return sigma, rgbs, None
+
+ def density(self, x, t):
+ # x: [N, 3], in [-bound, bound]
+ # t: [1, 1], in [0, 1]
+
+ results = {}
+
+ # time --> basis
+ enc_t = self.encoder_time(t) # [1, 1] --> [1, C']
+ h = enc_t
+ for l in range(self.num_layers_basis):
+ h = self.basis_net[l](h)
+ if l != self.num_layers_basis - 1:
+ h = F.relu(h, inplace=True)
+
+ sigma_basis = h[0, :self.sigma_basis_dim]
+ color_basis = h[0, self.sigma_basis_dim:]
+
+ # sigma
+ x = self.encoder(x, bound=self.bound)
+ h = x
+ for l in range(self.num_layers):
+ h = self.sigma_net[l](h)
+ if l != self.num_layers - 1:
+ h = F.relu(h, inplace=True)
+
+ sigma = trunc_exp(h[..., :self.sigma_basis_dim] @ sigma_basis)
+ geo_feat = h[..., self.sigma_basis_dim:]
+
+ results['sigma'] = sigma
+ results['geo_feat'] = geo_feat
+ # results['color_basis'] = color_basis
+
+ return results
+
+ def background(self, x, d):
+ # x: [N, 2], in [-1, 1]
+
+ h = self.encoder_bg(x) # [N, C]
+ d = self.encoder_dir(d)
+
+ h = torch.cat([d, h], dim=-1)
+ for l in range(self.num_layers_bg):
+ h = self.bg_net[l](h)
+ if l != self.num_layers_bg - 1:
+ h = F.relu(h, inplace=True)
+
+ # sigmoid activation for rgb
+ rgbs = torch.sigmoid(h)
+
+ return rgbs
+
+ # TODO: non cuda-ray mode is broken for now... (how to pass color_basis to self.color())
+ # # allow masked inference
+ # def color(self, x, d, mask=None, geo_feat=None, **kwargs):
+ # # x: [N, 3] in [-bound, bound]
+ # # t: [1, 1], in [0, 1]
+ # # mask: [N,], bool, indicates where we actually needs to compute rgb.
+
+ # if mask is not None:
+ # rgbs = torch.zeros(mask.shape[0], 3, dtype=x.dtype, device=x.device) # [N, 3]
+ # # in case of empty mask
+ # if not mask.any():
+ # return rgbs
+ # x = x[mask]
+ # d = d[mask]
+ # geo_feat = geo_feat[mask]
+
+ # d = self.encoder_dir(d)
+ # h = torch.cat([d, geo_feat], dim=-1)
+ # for l in range(self.num_layers_color):
+ # h = self.color_net[l](h)
+ # if l != self.num_layers_color - 1:
+ # h = F.relu(h, inplace=True)
+
+ # # sigmoid activation for rgb
+ # h = torch.sigmoid(h)
+
+ # if mask is not None:
+ # rgbs[mask] = h.to(rgbs.dtype) # fp16 --> fp32
+ # else:
+ # rgbs = h
+
+ # return rgbs 
+
+ # optimizer utils
+ def get_params(self, lr, lr_net):
+
+ params = [
+ {'params': self.encoder.parameters(), 'lr': lr},
+ {'params': self.sigma_net.parameters(), 'lr': lr_net},
+ {'params': self.encoder_dir.parameters(), 'lr': lr},
+ {'params': self.color_net.parameters(), 'lr': lr_net},
+ {'params': self.encoder_time.parameters(), 'lr': lr},
+ {'params': self.basis_net.parameters(), 'lr': lr_net},
+ ]
+ if self.bg_radius > 0:
+ params.append({'params': self.encoder_bg.parameters(), 'lr': lr})
+ params.append({'params': self.bg_net.parameters(), 'lr': lr_net})
+
+ return params
diff --git a/dnerf/utils.py b/dnerf/utils.py
@@ -115,8 +115,8 @@ def train_step(self, data):
  loss = loss.mean()
 
  # deform regularization
- deform = outputs['deform']
- loss = loss + 1e-3 * deform.abs().mean()
+ if 'deform' in outputs and outputs['deform'] is not None:
+  loss = loss + 1e-3 * outputs['deform'].abs().mean()
 
  return pred_rgb, gt_rgb, loss
 

diff --git a/main_dnerf.py b/main_dnerf.py
@@ -34,6 +34,7 @@
 
  ### network backbone options
  parser.add_argument('--fp16', action='store_true', help="use amp mixed precision training")
+ parser.add_argument('--basis', action='store_true', help="[experimental] use temporal basis instead of deformation to model dynamic scene (check Fourier PlenOctree and NeuVV)")
  # parser.add_argument('--ff', action='store_true', help="use fully-fused MLP")
  # parser.add_argument('--tcnn', action='store_true', help="use TCNN backend")
 
@@ -69,7 +70,11 @@
  opt.cuda_ray = True
  opt.preload = True
 
- from dnerf.network import NeRFNetwork
+ if opt.basis:
+ assert opt.cuda_ray, "Non-cuda-ray mode is temporarily broken with temporal basis mode"
+ from dnerf.network_basis import NeRFNetwork
+ else:
+ from dnerf.network import NeRFNetwork
 
  print(opt)
 

diff --git a/nerf/renderer.py b/nerf/renderer.py
@@ -249,6 +249,7 @@ def run(self, rays_o, rays_d, num_steps=128, upsample_steps=128, bg_color=None,
  return {
  'depth': depth,
  'image': image,
+ 'weights_sum': weights_sum,
  }
 
 
@@ -274,6 +275,8 @@ def run_cuda(self, rays_o, rays_d, dt_gamma=0, bg_color=None, perturb=False, for
  elif bg_color is None:
  bg_color = 1
 
+ results = {}
+
  if self.training:
  # setup counter
  counter = self.step_counter[self.local_step % 16]
@@ -314,6 +317,8 @@ def run_cuda(self, rays_o, rays_d, dt_gamma=0, bg_color=None, perturb=False, for
  depth = torch.clamp(depth - nears, min=0) / (fars - nears)
  image = image.view(*prefix, 3)
  depth = depth.view(*prefix)
+
+ results['weights_sum'] = weights_sum
 
  else:
 
@@ -365,11 +370,11 @@ def run_cuda(self, rays_o, rays_d, dt_gamma=0, bg_color=None, perturb=False, for
  depth = torch.clamp(depth - nears, min=0) / (fars - nears)
  image = image.view(*prefix, 3)
  depth = depth.view(*prefix)
+
+ results['depth'] = depth
+ results['image'] = image
 
- return {
- 'depth': depth,
- 'image': image,
- }
+ return results
 
  @torch.no_grad()
  def mark_untrained_grid(self, poses, intrinsic, S=64):

diff --git a/nerf/utils.py b/nerf/utils.py
@@ -292,7 +292,7 @@ def __init__(self,
  self.scaler = torch.cuda.amp.GradScaler(enabled=self.fp16)
 
  # variable init
- self.epoch = 1
+ self.epoch = 0
  self.global_step = 0
  self.local_step = 0
  self.stats = {
@@ -442,6 +442,11 @@ def train_step(self, data):
 
  loss = loss.mean()
 
+ # extra loss
+ # pred_weights_sum = outputs['weights_sum'] + 1e-8
+ # loss_ws = - 1e-1 * pred_weights_sum * torch.log(pred_weights_sum) # entropy to encourage weights_sum to be 0 or 1.
+ # loss = loss + loss_ws.mean()
+
  return pred_rgb, gt_rgb, loss
 
  def eval_step(self, data):
@@ -523,7 +528,7 @@ def train(self, train_loader, valid_loader, max_epochs):
  # get a ref to error_map
  self.error_map = train_loader._data.error_map
 
- for epoch in range(self.epoch, max_epochs + 1):
+ for epoch in range(self.epoch + 1, max_epochs + 1):
  self.epoch = epoch
 
  self.train_one_epoch(train_loader)

diff --git a/readme.md b/readme.md
@@ -187,8 +187,12 @@ python main_CCNeRF.py data/nerf_synthetic/hotdog --workspace trial_cc_hotdog -O
 
 ### D-NeRF
 # almost the same as Instant-ngp NeRF, just replace the main script.
+# use deformation to model dynamic scene
 python main_dnerf.py data/dnerf/jumpingjacks --workspace trial_dnerf_jumpingjacks -O --bound 1.0 --scale 0.8 --dt_gamma 0
 python main_dnerf.py data/dnerf/jumpingjacks --workspace trial_dnerf_jumpingjacks -O --bound 1.0 --scale 0.8 --dt_gamma 0 --gui
+# use temporal basis to model dynamic scene
+python main_dnerf.py data/dnerf/jumpingjacks --workspace trial_dnerf_basis_jumpingjacks -O --bound 1.0 --scale 0.8 --dt_gamma 0 --basis
+python main_dnerf.py data/dnerf/jumpingjacks --workspace trial_dnerf_basis_jumpingjacks -O --bound 1.0 --scale 0.8 --dt_gamma 0 --basis --gui
 # for the hypernerf dataset, first convert it into nerf-compatible format:
 python scripts/hyper2nerf.py data/split-cookie --downscale 2 # will generate transforms*.json
 python main_dnerf.py data/split-cookie/ --workspace trial_dnerf_cookies -O --bound 1 --scale 0.3 --dt_gamma 0