Skip to content

Commit

Permalink
added conv layer to reduce feature size to 32
Browse files Browse the repository at this point in the history
  • Loading branch information
sophiajw committed Jul 22, 2019
1 parent 265f9ca commit 9c81f1f
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 0 deletions.
7 changes: 7 additions & 0 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ def __init__(self, num_classes, num_images, input_channels, intrinsic, image_dim
use_xyz = True
bn=True

# reduce feature size to 32
self.conv = nn.Conv2d(128, 32, 1)

# pooling across num_images point clouds
self.pooling = nn.MaxPool1d(kernel_size=self.num_images)

Expand Down Expand Up @@ -237,6 +240,10 @@ def forward(self, point_cloud, image_features, projection_indices_3d, projection
num_points = point_cloud.shape[1] # number of points in sample. do we need number of points in whole scene?
num_images = projection_indices_3d.shape[0] // batch_size

# reduce number of feature channels

image_features = self.conv(image_features)

# project 2d to 3d
image_features = [Projection.apply(ft, ind3d, ind2d, num_points) for ft, ind3d, ind2d in zip(image_features, projection_indices_3d, projection_indices_2d)]
image_features = torch.stack(image_features, dim=2) # (input_channels, num_points_sample, batch_size*num_images)
Expand Down
109 changes: 109 additions & 0 deletions test_projection_hd5f.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import os
import argparse
#from scipy import misc
from scipy.ndimage import imread

import numpy as np
import torch
import h5py


from data_util import resize_crop_image
from projection_cpu import ProjectionHelper
import util

# params
parser = argparse.ArgumentParser()
# data paths
parser.add_argument('--data_path_2d',
default='/Users/sophia/Documents/Studium/Mathematik/Master/AdvancedDeepLearning4ComputerVision/data/2doutput',
help='path to 2d train data')
# scannet intrinsic params
parser.add_argument('--intrinsic_image_width', type=int, default=640, help='2d image width')
parser.add_argument('--intrinsic_image_height', type=int, default=480, help='2d image height')
parser.add_argument('--fx', type=float, default=577.870605, help='intrinsics')
parser.add_argument('--fy', type=float, default=577.870605, help='intrinsics')
parser.add_argument('--mx', type=float, default=319.5, help='intrinsics')
parser.add_argument('--my', type=float, default=239.5, help='intrinsics')
#2d/3d
parser.add_argument('--accuracy', type=float, default=0.1, help='voxel size (in meters)')
parser.add_argument('--depth_min', type=float, default=0.4, help='min depth (in meters)')
parser.add_argument('--depth_max', type=float, default=4.0, help='max depth (in meters)')

opt = parser.parse_args()
print(opt)

proj_image_dims = [240, 320]

def _load_data_file(name):
f = h5py.File(name)
data = f["points"][:]
label = f["labels"][:]
frames = f["corresponding_images"][:]
return data, label, frames

# initialize projection class
# get intrinsic
intrinsic = util.make_intrinsic(opt.fx, opt.fy, opt.mx, opt.my)
intrinsic = util.adjust_intrinsic(intrinsic, [opt.intrinsic_image_width, opt.intrinsic_image_height], proj_image_dims)

projection = ProjectionHelper(intrinsic, opt.depth_min, opt.depth_max, proj_image_dims, opt.accuracy)

### compute_projection

points, label, frames = _load_data_file('/Users/sophia/Documents/Studium/Mathematik/Master/AdvancedDeepLearning4ComputerVision/data/sampleBeachData/chunksWithCorrespondences/train0000_00_42.hdf5')
points = torch.from_numpy(points)
points = points.type(torch.DoubleTensor)
label = torch.from_numpy(label).double()
## get depth_image
batch_size = 1
num_images = 1
depth_images = torch.FloatTensor(batch_size * num_images, proj_image_dims[1], proj_image_dims[0])
# depth_images = torch.cuda.FloatTensor(batch_size * num_images, proj_image_dims[1], proj_image_dims[0])
# load_frames_multi
scan_name = 'scene0000_00'
frame_id = frames[3]
depth_file = os.path.join(opt.data_path_2d, scan_name, 'depth', str(frame_id) + '.png')
depth_image_dims = [depth_images.shape[2], depth_images.shape[1]]
# load_depth_label_pose
depth_image = imread(depth_file)
# preprocess
depth_image = resize_crop_image(depth_image, depth_image_dims) # resize to proj_iamge (features), i.e. 32x14
depth_image = depth_image.astype(np.float32) / 1000.0
depth_image = torch.from_numpy(depth_image)

# color image
color_images = torch.FloatTensor(batch_size * num_images, proj_image_dims[1], proj_image_dims[0])
color_file = os.path.join(opt.data_path_2d, scan_name, 'color', str(frame_id) + '.jpg')
color_image = imread(color_file)
print(color_image.shape)
color_image = torch.from_numpy(color_image)
color_image = color_image.type(torch.DoubleTensor)



## get camera_pose
pose_file = os.path.join(opt.data_path_2d, scan_name, 'pose', str(frame_id) + '.txt')
lines = open(pose_file).read().splitlines()
assert len(lines) == 4
lines = [[x[0],x[1],x[2],x[3]] for x in (x.split(" ") for x in lines)]
camera_pose = torch.from_numpy(np.asarray(lines).astype(np.float32))

# load point cloud
np.savetxt('scenechunk_0000_00_42_labels.txt', torch.cat((points, label.unsqueeze(1)), dim=1), delimiter=',')
num_points = points.shape[0]
#
three_dim, two_dim = projection.compute_projection(points, depth_image, camera_pose, num_points)
#
print(points[three_dim[1:(three_dim[0]+1)]].type())
print(color_image.view((76800, 3)).type())
points_proj = torch.cat((points[three_dim[1:(three_dim[0]+1)]], color_image.view(76800, 3)[two_dim[1:(two_dim[0]+1)]]), dim=1)
print(points_proj.shape)
np.savetxt('test_projection_chunk_0000_00_42.txt', points_proj, delimiter=',')
#
#corner_coords = projection.compute_frustum_corners(camera_pose)
#np.savetxt('corner_coords_840.txt', corner_coords, delimiter=',')
# normals = projection.compute_frustum_normals(corner_coords)
# new_pt = projection.point_in_frustum(corner_coords, normals, corner_coords[4][:3].view(-1))


0 comments on commit 9c81f1f

Please sign in to comment.