Doxygen documentation: cuda

chenbk85 · Dec 1, 2014 · ceb6e8b · ceb6e8b
1 parent 472c210
commit ceb6e8b
Show file tree

Hide file tree

Showing 80 changed files with 2,917 additions and 398 deletions.
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt
@@ -159,12 +159,18 @@ if(BUILD_DOCS AND HAVE_DOXYGEN)
   set(reflist) # modules reference
   foreach(m ${candidates})
     set(reflist "${reflist} \n- @subpage ${m}")
-    set(all_headers ${all_headers} "${OPENCV_MODULE_opencv_${m}_HEADERS}")
+
+    set(header_dir "${OPENCV_MODULE_opencv_${m}_LOCATION}/include")
+    if(EXISTS ${header_dir})
+        set(all_headers ${all_headers} ${header_dir})
+    endif()
+
     set(docs_dir "${OPENCV_MODULE_opencv_${m}_LOCATION}/doc")
     if(EXISTS ${docs_dir})
       set(all_images ${all_images} ${docs_dir})
       set(all_headers ${all_headers} ${docs_dir})
     endif()
+
   endforeach()
 
   # additional config

diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in
@@ -99,7 +99,7 @@ FILE_PATTERNS          =
 RECURSIVE              = YES
 EXCLUDE                =
 EXCLUDE_SYMLINKS       = NO
-EXCLUDE_PATTERNS       =
+EXCLUDE_PATTERNS       = *.inl.hpp *.impl.hpp *_detail.hpp */cudev/**/detail/*.hpp
 EXCLUDE_SYMBOLS        = cv::DataType<*> int
 EXAMPLE_PATH           = @CMAKE_DOXYGEN_EXAMPLE_PATH@
 EXAMPLE_PATTERNS       = *

diff --git a/modules/core/include/opencv2/core/cuda.hpp b/modules/core/include/opencv2/core/cuda.hpp
@@ -52,10 +52,12 @@
 #include "opencv2/core/cuda_types.hpp"
 
 /**
-@defgroup cuda CUDA-accelerated Computer Vision
-@{
-    @defgroup cuda_struct Data structures
-@}
+  @addtogroup cuda
+  @{
+    @defgroup cuda_init Initalization and Information
+    @defgroup cuda_struct Data Structures
+    @defgroup cuda_calib3d Camera Calibration and 3D Reconstruction
+  @}
  */
 
 namespace cv { namespace cuda {
@@ -65,8 +67,28 @@ namespace cv { namespace cuda {
 
 //////////////////////////////// GpuMat ///////////////////////////////
 
-//! Smart pointer for GPU memory with reference counting.
-//! Its interface is mostly similar with cv::Mat.
+/** @brief Base storage class for GPU memory with reference counting.
+
+Its interface matches the Mat interface with the following limitations:
+
+-   no arbitrary dimensions support (only 2D)
+-   no functions that return references to their data (because references on GPU are not valid for
+    CPU)
+-   no expression templates technique support
+
+Beware that the latter limitation may lead to overloaded matrix operators that cause memory
+allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be
+passed directly to the kernel.
+
+@note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are
+aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix.
+
+@note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely
+on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
+release function returns error if the CUDA context has been destroyed before.
+
+@sa Mat
+ */
 class CV_EXPORTS GpuMat
 {
 public:
@@ -277,11 +299,28 @@ class CV_EXPORTS GpuMat
     Allocator* allocator;
 };
 
-//! creates continuous matrix
+/** @brief Creates a continuous matrix.
+
+@param rows Row count.
+@param cols Column count.
+@param type Type of the matrix.
+@param arr Destination matrix. This parameter changes only if it has a proper type and area (
+\f$\texttt{rows} \times \texttt{cols}\f$ ).
+
+Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
+end of each row.
+ */
 CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr);
 
-//! ensures that size of the given matrix is not less than (rows, cols) size
-//! and matrix type is match specified one too
+/** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.
+
+@param rows Minimum desired number of rows.
+@param cols Minimum desired number of columns.
+@param type Desired matrix type.
+@param arr Destination matrix.
+
+The function does not reallocate memory if the matrix has proper attributes already.
+ */
 CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
 
 CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat);
@@ -292,10 +331,21 @@ CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCou
 
 //////////////////////////////// CudaMem ////////////////////////////////
 
-//! CudaMem is limited cv::Mat with page locked memory allocation.
-//! Page locked memory is only needed for async and faster coping to GPU.
-//! It is convertable to cv::Mat header without reference counting
-//! so you can use it with other opencv functions.
+/** @brief Class with reference counting wrapping special memory type allocation functions from CUDA.
+
+Its interface is also Mat-like but with additional memory type parameters.
+
+-   **PAGE\_LOCKED** sets a page locked memory type used commonly for fast and asynchronous
+    uploading/downloading data from/to GPU.
+-   **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU
+    address space, if supported.
+-   **WRITE\_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are
+    used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache
+    utilization.
+
+@note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2
+Pinned Memory APIs* document or *CUDA C Programming Guide*.
+ */
 class CV_EXPORTS CudaMem
 {
 public:
@@ -335,7 +385,13 @@ class CV_EXPORTS CudaMem
     //! returns matrix header with disabled reference counting for CudaMem data.
     Mat createMatHeader() const;
 
-    //! maps host memory into device address space and returns GpuMat header for it. Throws exception if not supported by hardware.
+    /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting
+    for it.
+
+    This can be done only if memory was allocated with the SHARED flag and if it is supported by the
+    hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which
+    eliminates an extra copy.
+     */
     GpuMat createGpuMatHeader() const;
 
     // Please see cv::Mat for descriptions
@@ -363,17 +419,28 @@ class CV_EXPORTS CudaMem
     AllocType alloc_type;
 };
 
-//! page-locks the matrix m memory and maps it for the device(s)
+/** @brief Page-locks the memory of matrix and maps it for the device(s).
+
+@param m Input matrix.
+ */
 CV_EXPORTS void registerPageLocked(Mat& m);
 
-//! unmaps the memory of matrix m, and makes it pageable again
+/** @brief Unmaps the memory of matrix and makes it pageable again.
+
+@param m Input matrix.
+ */
 CV_EXPORTS void unregisterPageLocked(Mat& m);
 
 ///////////////////////////////// Stream //////////////////////////////////
 
-//! Encapculates Cuda Stream. Provides interface for async coping.
-//! Passed to each function that supports async kernel execution.
-//! Reference counting is enabled.
+/** @brief This class encapsulates a queue of asynchronous calls.
+
+@note Currently, you may face problems if an operation is enqueued twice with different data. Some
+functions use the constant GPU memory, and next call may update the memory before the previous one
+has been finished. But calling different operations asynchronously is safe because each operation
+has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are
+also safe. :
+ */
 class CV_EXPORTS Stream
 {
     typedef void (Stream::*bool_type)() const;
@@ -385,16 +452,26 @@ class CV_EXPORTS Stream
     //! creates a new asynchronous stream
     Stream();
 
-    //! queries an asynchronous stream for completion status
+    /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false.
+    */
     bool queryIfComplete() const;
 
-    //! waits for stream tasks to complete
+    /** @brief Blocks the current CPU thread until all operations in the stream are complete.
+    */
     void waitForCompletion();
 
-    //! makes a compute stream wait on an event
+    /** @brief Makes a compute stream wait on an event.
+    */
     void waitEvent(const Event& event);
 
-    //! adds a callback to be called on the host after all currently enqueued items in the stream have completed
+    /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have
+    completed.
+
+    @note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization
+    that may depend on outstanding device work or other callbacks that are not mandated to run earlier.
+    Callbacks without a mandated order (in independent streams) execute in undefined order and may be
+    serialized.
+     */
     void enqueueHostCallback(StreamCallback callback, void* userData);
 
     //! return Stream object for default CUDA stream
@@ -446,21 +523,41 @@ class CV_EXPORTS Event
     friend struct EventAccessor;
 };
 
+//! @} cuda_struct
+
 //////////////////////////////// Initialization & Info ////////////////////////
 
-//! this is the only function that do not throw exceptions if the library is compiled without CUDA
+//! @addtogroup cuda_init
+//! @{
+
+/** @brief Returns the number of installed CUDA-enabled devices.
+
+Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support,
+this function returns 0.
+ */
 CV_EXPORTS int getCudaEnabledDeviceCount();
 
-//! set device to be used for GPU executions for the calling host thread
+/** @brief Sets a device and initializes it for the current thread.
+
+@param device System index of a CUDA device starting with 0.
+
+If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
+ */
 CV_EXPORTS void setDevice(int device);
 
-//! returns which device is currently being used for the calling host thread
+/** @brief Returns the current device index set by cuda::setDevice or initialized by default.
+ */
 CV_EXPORTS int getDevice();
 
-//! explicitly destroys and cleans up all resources associated with the current device in the current process
-//! any subsequent API call to this device will reinitialize the device
+/** @brief Explicitly destroys and cleans up all resources associated with the current device in the current
+process.
+
+Any subsequent API call to this device will reinitialize the device.
+ */
 CV_EXPORTS void resetDevice();
 
+/** @brief Enumeration providing CUDA computing features.
+ */
 enum FeatureSet
 {
     FEATURE_SET_COMPUTE_10 = 10,
@@ -482,12 +579,27 @@ enum FeatureSet
 //! checks whether current device supports the given feature
 CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
 
-//! information about what GPU archs this OpenCV CUDA module was compiled for
+/** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was
+built for.
+
+According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
+capability can always be compiled to binary code of greater or equal compute capability".
+ */
 class CV_EXPORTS TargetArchs
 {
 public:
+    /** @brief The following method checks whether the module was built with the support of the given feature:
+
+    @param feature\_set Features to be checked. See :ocvcuda::FeatureSet.
+     */
     static bool builtWith(FeatureSet feature_set);
 
+    /** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA
+    code for the given architecture(s):
+
+    @param major Major compute capability version.
+    @param minor Minor compute capability version.
+     */
     static bool has(int major, int minor);
     static bool hasPtx(int major, int minor);
     static bool hasBin(int major, int minor);
@@ -498,17 +610,25 @@ class CV_EXPORTS TargetArchs
     static bool hasEqualOrGreaterBin(int major, int minor);
 };
 
-//! information about the given GPU.
+/** @brief Class providing functionality for querying the specified GPU properties.
+ */
 class CV_EXPORTS DeviceInfo
 {
 public:
     //! creates DeviceInfo object for the current GPU
     DeviceInfo();
 
-    //! creates DeviceInfo object for the given GPU
+    /** @brief The constructors.
+
+    @param device\_id System index of the CUDA device starting with 0.
+
+    Constructs the DeviceInfo object for the specified device. If device\_id parameter is missed, it
+    constructs an object for the current device.
+     */
     DeviceInfo(int device_id);
 
-    //! device number.
+    /** @brief Returns system index of the CUDA device starting with 0.
+    */
     int deviceID() const;
 
     //! ASCII string identifying device
@@ -680,10 +800,19 @@ class CV_EXPORTS DeviceInfo
     size_t freeMemory() const;
     size_t totalMemory() const;
 
-    //! checks whether device supports the given feature
+    /** @brief Provides information on CUDA feature support.
+
+    @param feature\_set Features to be checked. See cuda::FeatureSet.
+
+    This function returns true if the device has the specified CUDA feature. Otherwise, it returns false
+     */
     bool supports(FeatureSet feature_set) const;
 
-    //! checks whether the CUDA module can be run on the given device
+    /** @brief Checks the CUDA module and device compatibility.
+
+    This function returns true if the CUDA module can be run on the specified device. Otherwise, it
+    returns false .
+     */
     bool isCompatible() const;
 
 private:
@@ -693,7 +822,7 @@ class CV_EXPORTS DeviceInfo
 CV_EXPORTS void printCudaDeviceInfo(int device);
 CV_EXPORTS void printShortCudaDeviceInfo(int device);
 
-//! @}
+//! @} cuda_init
 
 }} // namespace cv { namespace cuda {
 

diff --git a/modules/core/include/opencv2/core/cuda_stream_accessor.hpp b/modules/core/include/opencv2/core/cuda_stream_accessor.hpp
@@ -66,6 +66,11 @@ namespace cv
         class Stream;
         class Event;
 
+        /** @brief Class that enables getting cudaStream\_t from cuda::Stream
+
+        because it is the only public header that depends on the CUDA Runtime API. Including it
+        brings a dependency to your code.
+         */
         struct StreamAccessor
         {
             CV_EXPORTS static cudaStream_t getStream(const Stream& stream);

diff --git a/modules/core/include/opencv2/core/cuda_types.hpp b/modules/core/include/opencv2/core/cuda_types.hpp
@@ -89,6 +89,11 @@ namespace cv
             size_t size;
         };
 
+        /** @brief Structure similar to cuda::PtrStepSz but containing only a pointer and row step.
+
+        Width and height fields are excluded due to performance reasons. The structure is intended
+        for internal use or for users who write device code.
+         */
         template <typename T> struct PtrStep : public DevPtr<T>
         {
             __CV_CUDA_HOST_DEVICE__ PtrStep() : step(0) {}
@@ -104,6 +109,12 @@ namespace cv
             __CV_CUDA_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
         };
 
+        /** @brief Lightweight class encapsulating pitched memory on a GPU and passed to nvcc-compiled code (CUDA
+        kernels).
+
+        Typically, it is used internally by OpenCV and by users who write device code. You can call
+        its members from both host and device code.
+         */
         template <typename T> struct PtrStepSz : public PtrStep<T>
         {
             __CV_CUDA_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}