added conv layer to reduce feature size to 32

sophiajw · Jul 22, 2019 · 9c81f1f · 9c81f1f
1 parent 265f9ca
commit 9c81f1f
Show file tree

Hide file tree

Showing 2 changed files with 116 additions and 0 deletions.
diff --git a/model.py b/model.py
@@ -84,6 +84,9 @@ def __init__(self, num_classes, num_images, input_channels, intrinsic, image_dim
         use_xyz = True
         bn=True
 
+        # reduce feature size to 32
+        self.conv = nn.Conv2d(128, 32, 1)
+
         # pooling across num_images point clouds
         self.pooling = nn.MaxPool1d(kernel_size=self.num_images)
 
@@ -237,6 +240,10 @@ def forward(self, point_cloud, image_features, projection_indices_3d, projection
         num_points = point_cloud.shape[1] # number of points in sample. do we need number of points in whole scene?
         num_images = projection_indices_3d.shape[0] // batch_size
 
+        # reduce number of feature channels
+
+        image_features = self.conv(image_features)
+
         # project 2d to 3d
         image_features = [Projection.apply(ft, ind3d, ind2d, num_points) for ft, ind3d, ind2d in zip(image_features, projection_indices_3d, projection_indices_2d)]
         image_features = torch.stack(image_features, dim=2) # (input_channels, num_points_sample, batch_size*num_images)

diff --git a/test_projection_hd5f.py b/test_projection_hd5f.py
@@ -0,0 +1,109 @@
+import os
+import argparse
+#from scipy import misc
+from scipy.ndimage import imread
+
+import numpy as np
+import torch
+import h5py
+
+
+from data_util import resize_crop_image
+from projection_cpu import ProjectionHelper
+import util
+
+# params
+parser = argparse.ArgumentParser()
+# data paths
+parser.add_argument('--data_path_2d',
+        default='/Users/sophia/Documents/Studium/Mathematik/Master/AdvancedDeepLearning4ComputerVision/data/2doutput',
+        help='path to 2d train data')
+# scannet intrinsic params
+parser.add_argument('--intrinsic_image_width', type=int, default=640, help='2d image width')
+parser.add_argument('--intrinsic_image_height', type=int, default=480, help='2d image height')
+parser.add_argument('--fx', type=float, default=577.870605, help='intrinsics')
+parser.add_argument('--fy', type=float, default=577.870605, help='intrinsics')
+parser.add_argument('--mx', type=float, default=319.5, help='intrinsics')
+parser.add_argument('--my', type=float, default=239.5, help='intrinsics')
+#2d/3d
+parser.add_argument('--accuracy', type=float, default=0.1, help='voxel size (in meters)')
+parser.add_argument('--depth_min', type=float, default=0.4, help='min depth (in meters)')
+parser.add_argument('--depth_max', type=float, default=4.0, help='max depth (in meters)')
+
+opt = parser.parse_args()
+print(opt)
+
+proj_image_dims = [240, 320]
+
+def _load_data_file(name):
+    f = h5py.File(name)
+    data = f["points"][:]
+    label = f["labels"][:]
+    frames = f["corresponding_images"][:]
+    return data, label, frames
+
+# initialize projection class
+# get intrinsic
+intrinsic = util.make_intrinsic(opt.fx, opt.fy, opt.mx, opt.my)
+intrinsic = util.adjust_intrinsic(intrinsic, [opt.intrinsic_image_width, opt.intrinsic_image_height], proj_image_dims)
+
+projection = ProjectionHelper(intrinsic, opt.depth_min, opt.depth_max, proj_image_dims, opt.accuracy)
+
+### compute_projection
+
+points, label, frames = _load_data_file('/Users/sophia/Documents/Studium/Mathematik/Master/AdvancedDeepLearning4ComputerVision/data/sampleBeachData/chunksWithCorrespondences/train0000_00_42.hdf5')
+points = torch.from_numpy(points)
+points = points.type(torch.DoubleTensor)
+label = torch.from_numpy(label).double()
+## get depth_image
+batch_size = 1
+num_images = 1
+depth_images = torch.FloatTensor(batch_size * num_images, proj_image_dims[1], proj_image_dims[0])
+# depth_images = torch.cuda.FloatTensor(batch_size * num_images, proj_image_dims[1], proj_image_dims[0])
+# load_frames_multi
+scan_name = 'scene0000_00'
+frame_id = frames[3]
+depth_file = os.path.join(opt.data_path_2d, scan_name, 'depth', str(frame_id) + '.png')
+depth_image_dims = [depth_images.shape[2], depth_images.shape[1]]
+# load_depth_label_pose
+depth_image = imread(depth_file)
+# preprocess
+depth_image = resize_crop_image(depth_image, depth_image_dims) # resize to proj_iamge (features), i.e. 32x14
+depth_image = depth_image.astype(np.float32) / 1000.0
+depth_image = torch.from_numpy(depth_image)
+
+# color image
+color_images = torch.FloatTensor(batch_size * num_images, proj_image_dims[1], proj_image_dims[0])
+color_file = os.path.join(opt.data_path_2d, scan_name, 'color', str(frame_id) + '.jpg')
+color_image = imread(color_file)
+print(color_image.shape)
+color_image = torch.from_numpy(color_image)
+color_image = color_image.type(torch.DoubleTensor)
+
+
+
+## get camera_pose
+pose_file = os.path.join(opt.data_path_2d, scan_name, 'pose', str(frame_id) + '.txt')
+lines = open(pose_file).read().splitlines()
+assert len(lines) == 4
+lines = [[x[0],x[1],x[2],x[3]] for x in (x.split(" ") for x in lines)]
+camera_pose = torch.from_numpy(np.asarray(lines).astype(np.float32))
+
+# load point cloud
+np.savetxt('scenechunk_0000_00_42_labels.txt', torch.cat((points, label.unsqueeze(1)), dim=1), delimiter=',')
+num_points = points.shape[0]
+#
+three_dim, two_dim = projection.compute_projection(points, depth_image, camera_pose, num_points)
+#
+print(points[three_dim[1:(three_dim[0]+1)]].type())
+print(color_image.view((76800, 3)).type())
+points_proj = torch.cat((points[three_dim[1:(three_dim[0]+1)]], color_image.view(76800, 3)[two_dim[1:(two_dim[0]+1)]]), dim=1)
+print(points_proj.shape)
+np.savetxt('test_projection_chunk_0000_00_42.txt', points_proj, delimiter=',')
+#
+#corner_coords = projection.compute_frustum_corners(camera_pose)
+#np.savetxt('corner_coords_840.txt', corner_coords, delimiter=',')
+# normals = projection.compute_frustum_normals(corner_coords)
+# new_pt = projection.point_in_frustum(corner_coords, normals, corner_coords[4][:3].view(-1))
+
+