Merge branch 'main' into ci-image

caic99 · Mar 2, 2024 · 5730c42 · 5730c42
2 parents 665e385 + 4193693
commit 5730c42
Show file tree

Hide file tree

Showing 20 changed files with 348 additions and 4,373 deletions.
diff --git a/unidock/README.md b/unidock/README.md
@@ -248,6 +248,10 @@ DOI 10.1002/jcc.21334
 
      The optimal application of Uni-Dock occurs in scenarios where one binding pocket interacts with numerous (in an order of 1000) ligands. As the number of ligands within a single computational batch increases, the average processing speed improves. In instances where only a few ligands are present for one binding pocket, the overhead proportion becomes considerably large, leading to slower computational performance.
 
+4. Which GPU architectures does Uni-Dock support?
+
+     Starting from v1.1, Uni-Dock supports NVIDIA GPUs with [compute capability](https://en.wikipedia.org/wiki/CUDA#GPUs_supported) >= 7.0 . Uni-Dock v1.0 supports both CUDA platform for NVIDIA GPUs and ROCm platform for AMD GPUs. Since the performance optimizations are targeted at CUDA, future updates will no longer support the ROCm platform.
+
 ### Addendum to "FAQ 3 - Uni-Dock computes slowly for few (<10) ligands."
 
 The `paired_batch` mode provides a mechanism to accelerate simultaneous 1:1 docking in batches using Vina scoring, using CUDA streams. To run docking using this mode, invoke unidock with the parameter `--paired_batch_size` value >0, with the protein:ligand configurations passed in JSON form using `--ligand_index`. The JSON file should use schema defined in paired_batching.schema.json.

diff --git a/unidock/src/cuda/monte_carlo.cu b/unidock/src/cuda/monte_carlo.cu
@@ -356,7 +356,6 @@ __global__ __launch_bounds__(MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP) void kern
 }
 /* Above based on kernel.cl */
 
-
 __host__ void monte_carlo::mc_stream(
     std::vector<model> &m_gpu, std::vector<output_container> &out_gpu,
     std::vector<precalculate_byatom> &p_gpu, triangular_matrix_cuda_t *m_data_list_gpu,
@@ -366,7 +365,7 @@ __host__ void monte_carlo::mc_stream(
     DEBUG_PRINTF("entering CUDA monte_carlo search\n");  // debug
 
     cudaStream_t curr_stream = 0;
-    checkCUDA(cudaStreamCreate ( &curr_stream));
+    checkCUDA(cudaStreamCreate(&curr_stream));
     DEBUG_PRINTF("Stream created [0x%p]\n", curr_stream);
 
     vec authentic_v(1000, 1000,
@@ -518,7 +517,8 @@ __host__ void monte_carlo::mc_stream(
         if (m.ligands.size() == 0) {  // ligand parsing error
             m_cuda->m_num_movable_atoms = -1;
             DEBUG_PRINTF("copy m_cuda to gpu, size=%lu\n", sizeof(m_cuda_t));
-            checkCUDA(cudaMemcpyAsync(m_cuda_gpu + l, m_cuda, sizeof(m_cuda_t), cudaMemcpyHostToDevice, curr_stream));
+            checkCUDA(cudaMemcpyAsync(m_cuda_gpu + l, m_cuda, sizeof(m_cuda_t),
+                                      cudaMemcpyHostToDevice, curr_stream));
         } else {
             for (int i = 0; i < m.atoms.size(); i++) {
                 m_cuda->atoms[i].types[0]
@@ -607,7 +607,8 @@ __host__ void monte_carlo::mc_stream(
             m_cuda->m_num_movable_atoms = m.num_movable_atoms();
 
             DEBUG_PRINTF("copy m_cuda to gpu, size=%lu\n", sizeof(m_cuda_t));
-            checkCUDA(cudaMemcpyAsync(m_cuda_gpu + l, m_cuda, sizeof(m_cuda_t), cudaMemcpyHostToDevice, curr_stream));
+            checkCUDA(cudaMemcpyAsync(m_cuda_gpu + l, m_cuda, sizeof(m_cuda_t),
+                                      cudaMemcpyHostToDevice, curr_stream));
 
             /* Prepare rand_molec_struc data */
             int lig_torsion_size = tmp.c.ligands[0].torsions.size();
@@ -652,7 +653,8 @@ __host__ void monte_carlo::mc_stream(
                     = rand_molec_struc_gpu
                       + (l * threads_per_ligand + i) * SIZE_OF_MOLEC_STRUC / sizeof(float);
                 checkCUDA(cudaMemcpyAsync(rand_molec_struc_gpu_tmp, rand_molec_struc_tmp,
-                                     SIZE_OF_MOLEC_STRUC, cudaMemcpyHostToDevice, curr_stream));
+                                          SIZE_OF_MOLEC_STRUC, cudaMemcpyHostToDevice,
+                                          curr_stream));
             }
 
             /* Preparing p related data */
@@ -663,10 +665,11 @@ __host__ void monte_carlo::mc_stream(
             p_cuda->factor = p.m_factor;
             p_cuda->n = p.m_n;
             p_cuda->m_data_size = p.m_data.m_data.size();
-            checkCUDA(cudaMemcpyAsync(p_cuda_gpu + l, p_cuda, sizeof(p_cuda_t), cudaMemcpyHostToDevice, curr_stream));
+            checkCUDA(cudaMemcpyAsync(p_cuda_gpu + l, p_cuda, sizeof(p_cuda_t),
+                                      cudaMemcpyHostToDevice, curr_stream));
             checkCUDA(cudaMemcpyAsync(&(p_cuda_gpu[l].m_data), &(m_data_list_gpu[l].p_data),
-                                 sizeof(p_m_data_cuda_t *),
-                                 cudaMemcpyHostToDevice, curr_stream));  // check if fl == float
+                                      sizeof(p_m_data_cuda_t *), cudaMemcpyHostToDevice,
+                                      curr_stream));  // check if fl == float
         }
     }
 
@@ -780,8 +783,8 @@ __host__ void monte_carlo::mc_stream(
                 }
             }
 
-            checkCUDA(
-                cudaMemcpyAsync(ig_cuda_gpu + l, ig_cuda_ptr, ig_cuda_size, cudaMemcpyHostToDevice, curr_stream));
+            checkCUDA(cudaMemcpyAsync(ig_cuda_gpu + l, ig_cuda_ptr, ig_cuda_size,
+                                      cudaMemcpyHostToDevice, curr_stream));
         }
         std::cout << "set\n";
     } else {
@@ -821,15 +824,17 @@ __host__ void monte_carlo::mc_stream(
         }
         DEBUG_PRINTF("memcpy ig_cuda, ig_cuda_size=%lu\n", ig_cuda_size);
         checkCUDA(cudaMalloc(&ig_cuda_gpu, ig_cuda_size));
-        checkCUDA(cudaMemcpyAsync(ig_cuda_gpu, ig_cuda_ptr, ig_cuda_size, cudaMemcpyHostToDevice, curr_stream));
+        checkCUDA(cudaMemcpyAsync(ig_cuda_gpu, ig_cuda_ptr, ig_cuda_size, cudaMemcpyHostToDevice,
+                                  curr_stream));
     }
 
     float mutation_amplitude_float = static_cast<float>(mutation_amplitude);
 
-    checkCUDA(cudaMemcpyAsync(hunt_cap_gpu, hunt_cap_float, 3 * sizeof(float), cudaMemcpyHostToDevice, curr_stream));
+    checkCUDA(cudaMemcpyAsync(hunt_cap_gpu, hunt_cap_float, 3 * sizeof(float),
+                              cudaMemcpyHostToDevice, curr_stream));
 
     checkCUDA(cudaMemcpyAsync(authentic_v_gpu, authentic_v_float, sizeof(authentic_v_float),
-                         cudaMemcpyHostToDevice, curr_stream));
+                              cudaMemcpyHostToDevice, curr_stream));
 
     /* Add timing */
     cudaEvent_t start, stop;
@@ -843,20 +848,20 @@ __host__ void monte_carlo::mc_stream(
 
     output_type_cuda_t *results_aux;
     checkCUDA(cudaMalloc(&results_aux, 5 * thread * sizeof(output_type_cuda_t)));
-    checkCUDA(cudaMemsetAsync(results_aux, 0, 5 * thread * sizeof(output_type_cuda_t), curr_stream));
+    checkCUDA(
+        cudaMemsetAsync(results_aux, 0, 5 * thread * sizeof(output_type_cuda_t), curr_stream));
     change_cuda_t *change_aux;
     checkCUDA(cudaMalloc(&change_aux, 6 * thread * sizeof(change_cuda_t)));
     checkCUDA(cudaMemsetAsync(change_aux, 0, 6 * thread * sizeof(change_cuda_t), curr_stream));
     pot_cuda_t *pot_aux;
     checkCUDA(cudaMalloc(&pot_aux, thread * sizeof(pot_cuda_t)));
     checkCUDA(cudaMemsetAsync(pot_aux, 0, thread * sizeof(pot_cuda_t), curr_stream));
 
-    kernel<32><<<thread, 32, 0, curr_stream>>>(m_cuda_gpu, ig_cuda_gpu, p_cuda_gpu, rand_molec_struc_gpu,
-                               quasi_newton_par_max_steps, mutation_amplitude_float, states, seed,
-                               epsilon_fl_float, hunt_cap_gpu, authentic_v_gpu, results_gpu,
-                               results_aux, change_aux, pot_aux, h_cuda_global, m_cuda_global,
-                               global_steps, num_of_ligands, threads_per_ligand, multi_bias);
-
+    kernel<32><<<thread, 32, 0, curr_stream>>>(
+        m_cuda_gpu, ig_cuda_gpu, p_cuda_gpu, rand_molec_struc_gpu, quasi_newton_par_max_steps,
+        mutation_amplitude_float, states, seed, epsilon_fl_float, hunt_cap_gpu, authentic_v_gpu,
+        results_gpu, results_aux, change_aux, pot_aux, h_cuda_global, m_cuda_global, global_steps,
+        num_of_ligands, threads_per_ligand, multi_bias);
 
     // Wait for stream operations to complete
     checkCUDA(cudaStreamSynchronize(curr_stream));
@@ -950,8 +955,7 @@ __host__ void monte_carlo::mc_stream(
     checkCUDA(cudaStreamDestroy(curr_stream));
     curr_stream = 0;
 
-    DEBUG_PRINTF("exit monte_carlo\n");       
-
+    DEBUG_PRINTF("exit monte_carlo\n");
 }
 
 /* Below based on monte-carlo.cpp */

diff --git a/unidock/src/lib/monte_carlo.h b/unidock/src/lib/monte_carlo.h
@@ -65,10 +65,10 @@ struct monte_carlo {
                     int verbosity, unsigned long long seed,
                     std::vector<std::vector<bias_element> >& bias_batch_list) const;
     void mc_stream(std::vector<model>& m, std::vector<output_container>& out,
-                    std::vector<precalculate_byatom>& p, triangular_matrix_cuda_t* m_data_list_gpu,
-                    const igrid& ig, const vec& corner1, const vec& corner2, rng& generator,
-                    int verbosity, unsigned long long seed,
-                    std::vector<std::vector<bias_element> >& bias_batch_list) const;
+                   std::vector<precalculate_byatom>& p, triangular_matrix_cuda_t* m_data_list_gpu,
+                   const igrid& ig, const vec& corner1, const vec& corner2, rng& generator,
+                   int verbosity, unsigned long long seed,
+                   std::vector<std::vector<bias_element> >& bias_batch_list) const;
 
     std::vector<output_type> cuda_to_vina(output_type_cuda_t* results_p, int thread) const;
 };

diff --git a/unidock/src/lib/potentials.h b/unidock/src/lib/potentials.h
@@ -214,7 +214,7 @@ class vina_non_dir_h_bond : public Potential {
         if (r >= cutoff) return 0.0;
         if ((a.xs >= XS_TYPE_SIZE) || (b.xs >= XS_TYPE_SIZE)) return 0.0;
         if (xs_h_bond_possible(a.xs, b.xs)) {
-            if((a.xs  >= 32 && a.xs  <= 35) || (b.xs >= 32 && b.xs <= 35))  {
+            if ((a.xs >= 32 && a.xs <= 35) || (b.xs >= 32 && b.xs <= 35)) {
                 return 10.0 * slope_step(bad, good, r - optimal_distance(a.xs, b.xs));
             } else
                 return slope_step(bad, good, r - optimal_distance(a.xs, b.xs));

diff --git a/unidock/src/lib/scoring_function.h b/unidock/src/lib/scoring_function.h
@@ -93,24 +93,19 @@ class ScoringFunction {
         m_num_conf_independents = m_conf_independents.size();
         m_weights = weights;
     };
-    void Destroy()
-    {
-        for (auto p : m_potentials)
-        {
+    void Destroy() {
+        for (auto p : m_potentials) {
             delete p;
         }
         m_potentials.clear();
         m_num_potentials = 0;
-        for (auto p : m_conf_independents)
-        {
+        for (auto p : m_conf_independents) {
             delete p;
         }
         m_conf_independents.clear();
         m_num_conf_independents = 0;
     }
-    ~ScoringFunction() {
-        Destroy();
-    }
+    ~ScoringFunction() { Destroy(); }
     fl eval(atom& a, atom& b, fl r) const {  // intentionally not checking for cutoff
         fl acc = 0;
         VINA_FOR(i, m_num_potentials) { acc += m_weights[i] * m_potentials[i]->eval(a, b, r); }

diff --git a/unidock/src/lib/vina.cpp b/unidock/src/lib/vina.cpp
@@ -1654,8 +1654,8 @@ void Vina::global_search(const int exhaustiveness, const int n_poses, const doub
 
 void Vina::global_search_gpu(const int exhaustiveness, const int n_poses, const double min_rmsd,
                              const int max_evals, const int max_step, int num_of_ligands,
-                             unsigned long long seed, const int refine_step,
-                             const bool local_only, const bool create_new_stream) {
+                             unsigned long long seed, const int refine_step, const bool local_only,
+                             const bool create_new_stream) {
     // Vina search (Monte-carlo and local optimization)
     // Check if ff, box and ligand were initialized
     if (!m_ligand_initialized) {
@@ -1716,15 +1716,13 @@ void Vina::global_search_gpu(const int exhaustiveness, const int n_poses, const
     doing(sstm.str(), m_verbosity, 0);
     auto start = std::chrono::system_clock::now();
     if (m_sf_choice == SF_VINA || m_sf_choice == SF_VINARDO) {
-        if (create_new_stream)
-        {
-            mc.mc_stream(m_model_gpu, poses_gpu, m_precalculated_byatom_gpu, m_data_list_gpu, m_grid,
-                m_grid.corner1(), m_grid.corner2(), generator, m_verbosity, seed, bias_batch_list);            
-        }
-        else
-        {
+        if (create_new_stream) {
+            mc.mc_stream(m_model_gpu, poses_gpu, m_precalculated_byatom_gpu, m_data_list_gpu,
+                         m_grid, m_grid.corner1(), m_grid.corner2(), generator, m_verbosity, seed,
+                         bias_batch_list);
+        } else {
             mc(m_model_gpu, poses_gpu, m_precalculated_byatom_gpu, m_data_list_gpu, m_grid,
-                m_grid.corner1(), m_grid.corner2(), generator, m_verbosity, seed, bias_batch_list);
+               m_grid.corner1(), m_grid.corner2(), generator, m_verbosity, seed, bias_batch_list);
         }
     } else {
         mc(m_model_gpu, poses_gpu, m_precalculated_byatom_gpu, m_data_list_gpu, m_ad4grid,

diff --git a/unidock/src/lib/vina.h b/unidock/src/lib/vina.h
@@ -150,7 +150,7 @@ class Vina {
                            const double min_rmsd = 1.0, const int max_evals = 0,
                            const int max_step = 0, int num_of_ligands = 1,
                            unsigned long long seed = 181129, const int refine_step = 5,
-                           const bool local_only = false,const bool create_new_stream = false);
+                           const bool local_only = false, const bool create_new_stream = false);
     template <typename Config>
     void global_search_gpu(const int exhaustiveness = 8, const int n_poses = 20,
                            const double min_rmsd = 1.0, const int max_evals = 0,
@@ -355,8 +355,7 @@ class Vina {
     end = std::chrono::system_clock::now();
     std::cout << "poses saveing time: "
               << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << std::endl;
-}
-
+    }
     std::string get_poses(int how_many = 9, double energy_range = 3.0);
     std::string get_sdf_poses(int how_many = 9, double energy_range = 3.0);
     std::string get_poses_gpu(int ligand_id, int how_many = 9, double energy_range = 3.0);

diff --git a/unidock/src/main/complex_property.h b/unidock/src/main/complex_property.h
@@ -37,75 +37,78 @@
 
 // Holds properties of each ligand complex
 
-struct complex_property
-{
+struct complex_property {
     double center_x = 0;
     double center_y = 0;
     double center_z = 0;
     double box_x = 0;
     double box_y = 0;
-    double box_z = 0;    
+    double box_z = 0;
     std::string protein_name;
     std::string ligand_name;
-    complex_property(double x, double y, double z, 
-                double box_x, double box_y, double box_z,
-                std::string protein_name, std::string ligand_name):
-        center_x(x),
-        center_y(y),
-        center_z(z),
-        box_x(box_x),
-        box_y(box_y),
-        box_z(box_z),
-        protein_name(protein_name),
-        ligand_name(ligand_name){};
+    complex_property(double x, double y, double z, double box_x, double box_y, double box_z,
+                     std::string protein_name, std::string ligand_name)
+        : center_x(x),
+          center_y(y),
+          center_z(z),
+          box_x(box_x),
+          box_y(box_y),
+          box_z(box_z),
+          protein_name(protein_name),
+          ligand_name(ligand_name){};
     complex_property(){};
 };
 
 // Holds properties of all ligand complexs
 
-struct complex_property_holder
-{
+struct complex_property_holder {
     int max_count;
     complex_property* m_properties;
-    complex_property_holder(int N):
-        max_count(N),
-        m_properties(nullptr)
-        {
-            m_properties = new complex_property[N];
-        }
-    ~complex_property_holder()
-    {
-        delete [] m_properties;
-        m_properties = nullptr;
+    complex_property_holder(int N) : max_count(N), m_properties(nullptr) {
+        m_properties = new complex_property[N];
     }
-    complex_property* get_end()
-    {
-        return &m_properties[max_count];
+    ~complex_property_holder() {
+        delete[] m_properties;
+        m_properties = nullptr;
     }
+    complex_property* get_end() { return &m_properties[max_count]; }
 
-    struct complex_property_iterator 
-    {
+    struct complex_property_iterator {
         using iterator_category = std::forward_iterator_tag;
-        using difference_type   = std::ptrdiff_t;
-        using value_type        = complex_property;
-        using pointer           = complex_property*; 
-        using reference         = complex_property&; 
+        using difference_type = std::ptrdiff_t;
+        using value_type = complex_property;
+        using pointer = complex_property*;
+        using reference = complex_property&;
 
         complex_property_iterator(pointer ptr) : m_ptr(ptr) {}
         reference operator*() const { return *m_ptr; }
         pointer operator->() { return m_ptr; }
 
         // Prefix increment
-        complex_property_iterator& operator++() { m_ptr++; return *this; }  
+        complex_property_iterator& operator++() {
+            m_ptr++;
+            return *this;
+        }
 
         // Postfix increment
-        complex_property_iterator operator++(int) { complex_property_iterator tmp = *this; ++(*this); return tmp; }
+        complex_property_iterator operator++(int) {
+            complex_property_iterator tmp = *this;
+            ++(*this);
+            return tmp;
+        }
+
+        friend bool operator==(const complex_property_iterator& a,
+                               const complex_property_iterator& b) {
+            return a.m_ptr == b.m_ptr;
+        };
+        friend bool operator!=(const complex_property_iterator& a,
+                               const complex_property_iterator& b) {
+            return a.m_ptr != b.m_ptr;
+        };
 
-        friend bool operator== (const complex_property_iterator& a, const complex_property_iterator& b) { return a.m_ptr == b.m_ptr; };
-        friend bool operator!= (const complex_property_iterator& a, const complex_property_iterator& b) { return a.m_ptr != b.m_ptr; }; 
     private:
-        pointer m_ptr;    
+        pointer m_ptr;
     };
     complex_property_iterator begin() { return complex_property_iterator(&m_properties[0]); }
-    complex_property_iterator end()   { return complex_property_iterator(get_end()); }
+    complex_property_iterator end() { return complex_property_iterator(get_end()); }
 };