Skip to content

Commit

Permalink
cuda version working now
Browse files Browse the repository at this point in the history
  • Loading branch information
koide3 committed Jan 31, 2020
1 parent a3846ce commit 889d0ea
Show file tree
Hide file tree
Showing 11 changed files with 209 additions and 384 deletions.
15 changes: 5 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,21 +86,16 @@ if(CUDA_FOUND)
thirdparty/Eigen
thirdparty/nvbio
)

cuda_add_executable(cuda_test
src/fast_gicp/cuda/cuda_test.cu
)
target_include_directories(cuda_test PRIVATE
include
thirdparty/Eigen
)
target_link_libraries(cuda_test
fast_vgicp_cuda
target_link_libraries(fast_vgicp_cuda
cublas
)

target_sources(fast_gicp PRIVATE
src/fast_gicp/gicp/fast_vgicp_cuda.cpp
)
target_link_libraries(fast_gicp
fast_vgicp_cuda
)
add_dependencies(fast_gicp fast_vgicp_cuda)


Expand Down
2 changes: 1 addition & 1 deletion include/fast_gicp/cuda/compute_derivatives.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ struct compute_derivatives_kernel {
Eigen::Vector3f& loss = thrust::get<2>(tuple);
Eigen::Matrix<float, 3, 6, Eigen::RowMajor>& J = thrust::get<3>(tuple);

loss = num_points * (RCR_inv * d);
loss = (RCR_inv * d);
J.block<3, 3>(0, 0) = RCR_inv * skew_mean_A;
J.block<3, 3>(0, 3) = -RCR_inv;
}
Expand Down
25 changes: 17 additions & 8 deletions include/fast_gicp/cuda/fast_vgicp_cuda.cuh
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
#ifndef FAST_GICP_FAST_VGICP_CUDA_CUH
#define FAST_GICP_FAST_VGICP_CUDA_CUH
#ifndef FAST_GICP_FAST_VGICP_CUDA_CORE_CUH
#define FAST_GICP_FAST_VGICP_CUDA_CORE_CUH

#include <memory>
#include <vector>
#include <Eigen/Core>
#include <Eigen/Geometry>

struct cublasContext;

namespace thrust {
template<typename T>
class device_allocator;

template<typename T, typename Alloc>
class device_vector;
}
} // namespace thrust

namespace fast_gicp {

Expand All @@ -24,6 +26,7 @@ public:
using Indices = thrust::device_vector<int, thrust::device_allocator<int>>;
using Matrices = thrust::device_vector<Eigen::Matrix3f, thrust::device_allocator<Eigen::Matrix3f>>;

EIGEN_MAKE_ALIGNED_OPERATOR_NEW
FastVGICPCudaCore();
~FastVGICPCudaCore();

Expand All @@ -41,14 +44,21 @@ public:

void create_target_voxelmap();

void optimize();
void optimize(const Eigen::Isometry3f& initial_guess);
bool optimize(Eigen::Isometry3f& estimated);
bool optimize(const Eigen::Isometry3f& initial_guess, Eigen::Isometry3f& estimated);

void test_print();
private:
bool is_converged(const Eigen::Matrix<float, 6, 1>& delta) const;

private:
cublasContext* cublas_handle;

double resolution;

int max_iterations;
double rotation_epsilon;
double transformation_epsilon;

std::unique_ptr<Points> source_points;
std::unique_ptr<Points> target_points;

Expand All @@ -61,7 +71,6 @@ private:
std::unique_ptr<GaussianVoxelMap> voxelmap;
};

} // namespace fast_gicp

} // namespace fast_gicp

#endif
2 changes: 1 addition & 1 deletion include/fast_gicp/cuda/gaussian_voxelmap.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ namespace {

class GaussianVoxelMap {
public:
GaussianVoxelMap(float resolution, int init_num_buckets = 8192 * 2, int max_bucket_scan_count = 10) : init_num_buckets(init_num_buckets), max_bucket_scan_count(max_bucket_scan_count), voxel_resolution(resolution) {}
GaussianVoxelMap(float resolution, int init_num_buckets = 8192 * 4, int max_bucket_scan_count = 10) : init_num_buckets(init_num_buckets), max_bucket_scan_count(max_bucket_scan_count), voxel_resolution(resolution) {}

void create_voxelmap(const thrust::device_vector<Eigen::Vector3f>& points, const thrust::device_vector<Eigen::Matrix3f>& covariances) {
thrust::device_vector<Eigen::Vector3i> voxel_coords(points.size());
Expand Down
2 changes: 2 additions & 0 deletions include/fast_gicp/gicp/fast_vgicp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ class FastVGICP : public pcl::Registration<PointSource, PointTarget, float> {
using PointCloudTargetPtr = typename PointCloudTarget::Ptr;
using PointCloudTargetConstPtr = typename PointCloudTarget::ConstPtr;

using Ptr = boost::shared_ptr<FastVGICP<PointSource, PointTarget>>;

using pcl::Registration<PointSource, PointTarget, Scalar>::reg_name_;
using pcl::Registration<PointSource, PointTarget, Scalar>::input_;
using pcl::Registration<PointSource, PointTarget, Scalar>::target_;
Expand Down
43 changes: 12 additions & 31 deletions include/fast_gicp/gicp/fast_vgicp_cuda.hpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#ifndef FAST_GICP_FAST_VGICP_CUDA_HPP
#define FAST_GICP_FAST_VGICP_CUDA_HPP

#include <unordered_map>

#include <Eigen/Core>
#include <Eigen/Geometry>

Expand All @@ -13,12 +11,15 @@

#include <sophus/so3.hpp>
#include <fast_gicp/gicp/gicp_settings.hpp>
#include <fast_gicp/gicp/fast_vgicp_voxel.hpp>

namespace fast_gicp {

class FastVGICPCudaCore;

enum NearestNeighborMethod { CPU_PARALLEL_KDTREE, GPU_BRUTEFORCE };

/**
* @brief Fast Voxelized GICP algorithm boosted with OpenMP
* @brief Fast Voxelized GICP algorithm boosted with CUDA
*/
template<typename PointSource, typename PointTarget>
class FastVGICPCuda : public pcl::Registration<PointSource, PointTarget, float> {
Expand All @@ -34,6 +35,8 @@ class FastVGICPCuda : public pcl::Registration<PointSource, PointTarget, float>
using PointCloudTargetPtr = typename PointCloudTarget::Ptr;
using PointCloudTargetConstPtr = typename PointCloudTarget::ConstPtr;

using Ptr = boost::shared_ptr<FastVGICPCuda<PointSource, PointTarget>>;

using pcl::Registration<PointSource, PointTarget, Scalar>::reg_name_;
using pcl::Registration<PointSource, PointTarget, Scalar>::input_;
using pcl::Registration<PointSource, PointTarget, Scalar>::target_;
Expand All @@ -48,58 +51,36 @@ class FastVGICPCuda : public pcl::Registration<PointSource, PointTarget, float>
FastVGICPCuda();
virtual ~FastVGICPCuda() override;

void setNumThreads(int n);

void setResolution(double resolution);

void setCorrespondenceRandomness(int k);

void setRegularizationMethod(RegularizationMethod method);

void setNeighborSearchMethod(NeighborSearchMethod method);

void setVoxelAccumulationMode(VoxelAccumulationMode mode);

virtual void setInputSource(const PointCloudSourceConstPtr& cloud) override;

virtual void setInputTarget(const PointCloudTargetConstPtr& cloud) override;

protected:
virtual void computeTransformation(PointCloudSource& output, const Matrix4& guess) override;

private:
std::vector<Eigen::Vector3i, Eigen::aligned_allocator<Eigen::Vector3i>> neighbor_offsets() const;

Eigen::Vector3i voxel_coord(const Eigen::Vector4f& x) const;
Eigen::Vector4f voxel_origin(const Eigen::Vector3i& coord) const;
GaussianVoxel::Ptr lookup_voxel(const Eigen::Vector3i& x) const;

bool is_converged(const Eigen::Matrix<float, 6, 1>& delta) const;

Eigen::VectorXf loss_ls(const Eigen::Matrix<float, 6, 1>& x, Eigen::MatrixXf* J) const;

template<typename PointT>
bool calculate_covariances(const boost::shared_ptr<const pcl::PointCloud<PointT>>& cloud, pcl::search::KdTree<PointT>& kdtree, std::vector<Matrix4, Eigen::aligned_allocator<Matrix4>>& covariances);
std::vector<int> find_neighbors_parallel_kdtree(int k, const boost::shared_ptr<const pcl::PointCloud<PointT>>& cloud, pcl::search::KdTree<PointT>& kdtree) const;

private:
int num_threads_;
private:
int k_correspondences_;
double rotation_epsilon_;

pcl::search::KdTree<PointSource> source_kdtree;
pcl::search::KdTree<PointTarget> target_kdtree;

std::vector<Matrix4, Eigen::aligned_allocator<Matrix4>> source_covs;
std::vector<Matrix4, Eigen::aligned_allocator<Matrix4>> target_covs;

double voxel_resolution_;
NeighborSearchMethod search_method_;
RegularizationMethod regularization_method_;
VoxelAccumulationMode voxel_mode_;
NearestNeighborMethod neighbor_search_method_;

using VoxelMap = std::unordered_map<Eigen::Vector3i, GaussianVoxel::Ptr, Vector3iHash, std::equal_to<Eigen::Vector3i>, Eigen::aligned_allocator<std::pair<Eigen::Vector3i, GaussianVoxel::Ptr>>>;
VoxelMap voxels;
};
std::unique_ptr<FastVGICPCudaCore> vgicp_cuda;
};
} // namespace fast_gicp

#endif
Loading

0 comments on commit 889d0ea

Please sign in to comment.