cuda version working now

Oasis-Guo · Jan 31, 2020 · 889d0ea · 889d0ea
1 parent a3846ce
commit 889d0ea
Show file tree

Hide file tree

Showing 11 changed files with 209 additions and 384 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -86,21 +86,16 @@ if(CUDA_FOUND)
     thirdparty/Eigen
     thirdparty/nvbio
   )
-
-  cuda_add_executable(cuda_test
-    src/fast_gicp/cuda/cuda_test.cu
-  )
-  target_include_directories(cuda_test PRIVATE
-    include
-    thirdparty/Eigen
-  )
-  target_link_libraries(cuda_test
-    fast_vgicp_cuda
+  target_link_libraries(fast_vgicp_cuda
+    cublas
   )
 
   target_sources(fast_gicp PRIVATE
     src/fast_gicp/gicp/fast_vgicp_cuda.cpp
   )
+  target_link_libraries(fast_gicp
+    fast_vgicp_cuda
+  )
   add_dependencies(fast_gicp fast_vgicp_cuda)
 
 

diff --git a/include/fast_gicp/cuda/compute_derivatives.cuh b/include/fast_gicp/cuda/compute_derivatives.cuh
@@ -88,7 +88,7 @@ struct compute_derivatives_kernel {
     Eigen::Vector3f& loss = thrust::get<2>(tuple);
     Eigen::Matrix<float, 3, 6, Eigen::RowMajor>& J = thrust::get<3>(tuple);
 
-    loss = num_points * (RCR_inv * d);
+    loss = (RCR_inv * d);
     J.block<3, 3>(0, 0) = RCR_inv * skew_mean_A;
     J.block<3, 3>(0, 3) = -RCR_inv;
   }

diff --git a/include/fast_gicp/cuda/fast_vgicp_cuda.cuh b/include/fast_gicp/cuda/fast_vgicp_cuda.cuh
@@ -1,18 +1,20 @@
-#ifndef FAST_GICP_FAST_VGICP_CUDA_CUH
-#define FAST_GICP_FAST_VGICP_CUDA_CUH
+#ifndef FAST_GICP_FAST_VGICP_CUDA_CORE_CUH
+#define FAST_GICP_FAST_VGICP_CUDA_CORE_CUH
 
 #include <memory>
 #include <vector>
 #include <Eigen/Core>
 #include <Eigen/Geometry>
 
+struct cublasContext;
+
 namespace thrust {
 template<typename T>
 class device_allocator;
 
 template<typename T, typename Alloc>
 class device_vector;
-}
+}  // namespace thrust
 
 namespace fast_gicp {
 
@@ -24,6 +26,7 @@ public:
   using Indices = thrust::device_vector<int, thrust::device_allocator<int>>;
   using Matrices = thrust::device_vector<Eigen::Matrix3f, thrust::device_allocator<Eigen::Matrix3f>>;
 
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
   FastVGICPCudaCore();
   ~FastVGICPCudaCore();
 
@@ -41,14 +44,21 @@ public:
 
   void create_target_voxelmap();
 
-  void optimize();
-  void optimize(const Eigen::Isometry3f& initial_guess);
+  bool optimize(Eigen::Isometry3f& estimated);
+  bool optimize(const Eigen::Isometry3f& initial_guess, Eigen::Isometry3f& estimated);
 
-  void test_print();
+private:
+  bool is_converged(const Eigen::Matrix<float, 6, 1>& delta) const;
 
 private:
+  cublasContext* cublas_handle;
+
   double resolution;
 
+  int max_iterations;
+  double rotation_epsilon;
+  double transformation_epsilon;
+
   std::unique_ptr<Points> source_points;
   std::unique_ptr<Points> target_points;
 
@@ -61,7 +71,6 @@ private:
   std::unique_ptr<GaussianVoxelMap> voxelmap;
 };
 
-} // namespace fast_gicp
-
+}  // namespace fast_gicp
 
 #endif
diff --git a/include/fast_gicp/cuda/gaussian_voxelmap.cuh b/include/fast_gicp/cuda/gaussian_voxelmap.cuh
@@ -93,7 +93,7 @@ namespace {
 
   class GaussianVoxelMap {
   public:
-    GaussianVoxelMap(float resolution, int init_num_buckets = 8192 * 2, int max_bucket_scan_count = 10) : init_num_buckets(init_num_buckets), max_bucket_scan_count(max_bucket_scan_count), voxel_resolution(resolution) {}
+    GaussianVoxelMap(float resolution, int init_num_buckets = 8192 * 4, int max_bucket_scan_count = 10) : init_num_buckets(init_num_buckets), max_bucket_scan_count(max_bucket_scan_count), voxel_resolution(resolution) {}
 
     void create_voxelmap(const thrust::device_vector<Eigen::Vector3f>& points, const thrust::device_vector<Eigen::Matrix3f>& covariances) {
       thrust::device_vector<Eigen::Vector3i> voxel_coords(points.size());

diff --git a/include/fast_gicp/gicp/fast_vgicp.hpp b/include/fast_gicp/gicp/fast_vgicp.hpp
@@ -34,6 +34,8 @@ class FastVGICP : public pcl::Registration<PointSource, PointTarget, float> {
   using PointCloudTargetPtr = typename PointCloudTarget::Ptr;
   using PointCloudTargetConstPtr = typename PointCloudTarget::ConstPtr;
 
+  using Ptr = boost::shared_ptr<FastVGICP<PointSource, PointTarget>>;
+
   using pcl::Registration<PointSource, PointTarget, Scalar>::reg_name_;
   using pcl::Registration<PointSource, PointTarget, Scalar>::input_;
   using pcl::Registration<PointSource, PointTarget, Scalar>::target_;

diff --git a/include/fast_gicp/gicp/fast_vgicp_cuda.hpp b/include/fast_gicp/gicp/fast_vgicp_cuda.hpp
@@ -1,8 +1,6 @@
 #ifndef FAST_GICP_FAST_VGICP_CUDA_HPP
 #define FAST_GICP_FAST_VGICP_CUDA_HPP
 
-#include <unordered_map>
-
 #include <Eigen/Core>
 #include <Eigen/Geometry>
 
@@ -13,12 +11,15 @@
 
 #include <sophus/so3.hpp>
 #include <fast_gicp/gicp/gicp_settings.hpp>
-#include <fast_gicp/gicp/fast_vgicp_voxel.hpp>
 
 namespace fast_gicp {
 
+class FastVGICPCudaCore;
+
+enum NearestNeighborMethod { CPU_PARALLEL_KDTREE, GPU_BRUTEFORCE };
+
 /**
- * @brief Fast Voxelized GICP algorithm boosted with OpenMP
+ * @brief Fast Voxelized GICP algorithm boosted with CUDA
  */
 template<typename PointSource, typename PointTarget>
 class FastVGICPCuda : public pcl::Registration<PointSource, PointTarget, float> {
@@ -34,6 +35,8 @@ class FastVGICPCuda : public pcl::Registration<PointSource, PointTarget, float>
   using PointCloudTargetPtr = typename PointCloudTarget::Ptr;
   using PointCloudTargetConstPtr = typename PointCloudTarget::ConstPtr;
 
+  using Ptr = boost::shared_ptr<FastVGICPCuda<PointSource, PointTarget>>;
+
   using pcl::Registration<PointSource, PointTarget, Scalar>::reg_name_;
   using pcl::Registration<PointSource, PointTarget, Scalar>::input_;
   using pcl::Registration<PointSource, PointTarget, Scalar>::target_;
@@ -48,58 +51,36 @@ class FastVGICPCuda : public pcl::Registration<PointSource, PointTarget, float>
   FastVGICPCuda();
   virtual ~FastVGICPCuda() override;
 
-  void setNumThreads(int n);
-
   void setResolution(double resolution);
 
   void setCorrespondenceRandomness(int k);
 
   void setRegularizationMethod(RegularizationMethod method);
 
-  void setNeighborSearchMethod(NeighborSearchMethod method);
-
-  void setVoxelAccumulationMode(VoxelAccumulationMode mode);
-
   virtual void setInputSource(const PointCloudSourceConstPtr& cloud) override;
 
   virtual void setInputTarget(const PointCloudTargetConstPtr& cloud) override;
 
 protected:
   virtual void computeTransformation(PointCloudSource& output, const Matrix4& guess) override;
 
-private:
-  std::vector<Eigen::Vector3i, Eigen::aligned_allocator<Eigen::Vector3i>> neighbor_offsets() const;
-
-  Eigen::Vector3i voxel_coord(const Eigen::Vector4f& x) const;
-  Eigen::Vector4f voxel_origin(const Eigen::Vector3i& coord) const;
-  GaussianVoxel::Ptr lookup_voxel(const Eigen::Vector3i& x) const;
-
-  bool is_converged(const Eigen::Matrix<float, 6, 1>& delta) const;
-
-  Eigen::VectorXf loss_ls(const Eigen::Matrix<float, 6, 1>& x, Eigen::MatrixXf* J) const;
-
   template<typename PointT>
-  bool calculate_covariances(const boost::shared_ptr<const pcl::PointCloud<PointT>>& cloud, pcl::search::KdTree<PointT>& kdtree, std::vector<Matrix4, Eigen::aligned_allocator<Matrix4>>& covariances);
+  std::vector<int> find_neighbors_parallel_kdtree(int k, const boost::shared_ptr<const pcl::PointCloud<PointT>>& cloud, pcl::search::KdTree<PointT>& kdtree) const;
 
 private:
-  int num_threads_;
+private:
   int k_correspondences_;
   double rotation_epsilon_;
 
   pcl::search::KdTree<PointSource> source_kdtree;
   pcl::search::KdTree<PointTarget> target_kdtree;
 
-  std::vector<Matrix4, Eigen::aligned_allocator<Matrix4>> source_covs;
-  std::vector<Matrix4, Eigen::aligned_allocator<Matrix4>> target_covs;
-
   double voxel_resolution_;
-  NeighborSearchMethod search_method_;
   RegularizationMethod regularization_method_;
-  VoxelAccumulationMode voxel_mode_;
+  NearestNeighborMethod neighbor_search_method_;
 
-  using VoxelMap = std::unordered_map<Eigen::Vector3i, GaussianVoxel::Ptr, Vector3iHash, std::equal_to<Eigen::Vector3i>, Eigen::aligned_allocator<std::pair<Eigen::Vector3i, GaussianVoxel::Ptr>>>;
-  VoxelMap voxels;
-};
+  std::unique_ptr<FastVGICPCudaCore> vgicp_cuda;
+  };
 }  // namespace fast_gicp
 
 #endif