upsample H and W

richzhang · Jul 9, 2020 · 6abcdd1 · 6abcdd1
1 parent 5d2afe2
commit 6abcdd1
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 10 deletions.
diff --git a/compute_dists.py b/compute_dists.py
@@ -5,12 +5,13 @@
 parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 parser.add_argument('-p0','--path0', type=str, default='./imgs/ex_ref.png')
 parser.add_argument('-p1','--path1', type=str, default='./imgs/ex_p0.png')
+parser.add_argument('-v','--version', type=str, default='0.1')
 parser.add_argument('--use_gpu', action='store_true', help='turn on flag to use GPU')
 
 opt = parser.parse_args()
 
 ## Initializing the model
-model = models.PerceptualLoss(model='net-lin',net='alex',use_gpu=opt.use_gpu)
+model = models.PerceptualLoss(model='net-lin',net='alex',use_gpu=opt.use_gpu,version=opt.version)
 
 # Load images
 img0 = util.im2tensor(util.load_image(opt.path0)) # RGB image from [-1,1]

diff --git a/models/dist_model.py b/models/dist_model.py
@@ -40,9 +40,6 @@ def initialize(self, model='net-lin', net='alex', colorspace='Lab', pnet_rand=Fa
  use_gpu - bool - whether or not to use a GPU
  printNet - bool - whether or not to print network architecture out
  spatial - bool - whether to output an array containing varying distances across spatial dimensions
- spatial_shape - if given, output spatial shape. if None then spatial shape is determined automatically via spatial_factor (see below).
- spatial_factor - if given, specifies upsampling factor relative to the largest spatial extent of a convolutional layer. if None then resized to size of input images.
- spatial_order - spline order of filter for upsampling in spatial mode, by default 1 (bilinear).
  is_train - bool - [True] for training mode
  lr - float - initial learning rate
  beta1 - float - initial momentum term for adam

diff --git a/models/networks_basic.py b/models/networks_basic.py
@@ -17,11 +17,11 @@
 def spatial_average(in_tens, keepdim=True):
  return in_tens.mean([2,3],keepdim=keepdim)
 
-def upsample(in_tens, out_H=64): # assumes scale factor is same for H and W
- in_H = in_tens.shape[2]
- scale_factor = 1.*out_H/in_H
+def upsample(in_tens, out_HW=(64,64)): # assumes scale factor is same for H and W
+ in_H, in_W = in_tens.shape[2], in_tens.shape[3]
+ scale_factor_H, scale_factor_W = 1.*out_HW[0]/in_H, 1.*out_HW[1]/in_W
 
- return nn.Upsample(scale_factor=scale_factor, mode='bilinear', align_corners=False)(in_tens)
+ return nn.Upsample(scale_factor=(scale_factor_H, scale_factor_W), mode='bilinear', align_corners=False)(in_tens)
 
 # Learned perceptual metric
 class PNetLin(nn.Module):
@@ -73,12 +73,12 @@ def forward(self, in0, in1, retPerLayer=False):
 
  if(self.lpips):
  if(self.spatial):
- res = [upsample(self.lins[kk].model(diffs[kk]), out_H=in0.shape[2]) for kk in range(self.L)]
+ res = [upsample(self.lins[kk].model(diffs[kk]), out_HW=in0.shape[2:]) for kk in range(self.L)]
  else:
  res = [spatial_average(self.lins[kk].model(diffs[kk]), keepdim=True) for kk in range(self.L)]
  else:
  if(self.spatial):
- res = [upsample(diffs[kk].sum(dim=1,keepdim=True), out_H=in0.shape[2]) for kk in range(self.L)]
+ res = [upsample(diffs[kk].sum(dim=1,keepdim=True), out_HW=in0.shape[2:]) for kk in range(self.L)]
  else:
  res = [spatial_average(diffs[kk].sum(dim=1,keepdim=True), keepdim=True) for kk in range(self.L)]