Skip to content

Commit

Permalink
Doxygen documentation: cuda
Browse files Browse the repository at this point in the history
  • Loading branch information
mshabunin committed Dec 1, 2014
1 parent 472c210 commit ceb6e8b
Show file tree
Hide file tree
Showing 80 changed files with 2,917 additions and 398 deletions.
8 changes: 7 additions & 1 deletion doc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,18 @@ if(BUILD_DOCS AND HAVE_DOXYGEN)
set(reflist) # modules reference
foreach(m ${candidates})
set(reflist "${reflist} \n- @subpage ${m}")
set(all_headers ${all_headers} "${OPENCV_MODULE_opencv_${m}_HEADERS}")

set(header_dir "${OPENCV_MODULE_opencv_${m}_LOCATION}/include")
if(EXISTS ${header_dir})
set(all_headers ${all_headers} ${header_dir})
endif()

set(docs_dir "${OPENCV_MODULE_opencv_${m}_LOCATION}/doc")
if(EXISTS ${docs_dir})
set(all_images ${all_images} ${docs_dir})
set(all_headers ${all_headers} ${docs_dir})
endif()

endforeach()

# additional config
Expand Down
2 changes: 1 addition & 1 deletion doc/Doxyfile.in
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ FILE_PATTERNS =
RECURSIVE = YES
EXCLUDE =
EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS =
EXCLUDE_PATTERNS = *.inl.hpp *.impl.hpp *_detail.hpp */cudev/**/detail/*.hpp
EXCLUDE_SYMBOLS = cv::DataType<*> int
EXAMPLE_PATH = @CMAKE_DOXYGEN_EXAMPLE_PATH@
EXAMPLE_PATTERNS = *
Expand Down
199 changes: 164 additions & 35 deletions modules/core/include/opencv2/core/cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,12 @@
#include "opencv2/core/cuda_types.hpp"

/**
@defgroup cuda CUDA-accelerated Computer Vision
@{
@defgroup cuda_struct Data structures
@}
@addtogroup cuda
@{
@defgroup cuda_init Initalization and Information
@defgroup cuda_struct Data Structures
@defgroup cuda_calib3d Camera Calibration and 3D Reconstruction
@}
*/

namespace cv { namespace cuda {
Expand All @@ -65,8 +67,28 @@ namespace cv { namespace cuda {

//////////////////////////////// GpuMat ///////////////////////////////

//! Smart pointer for GPU memory with reference counting.
//! Its interface is mostly similar with cv::Mat.
/** @brief Base storage class for GPU memory with reference counting.
Its interface matches the Mat interface with the following limitations:
- no arbitrary dimensions support (only 2D)
- no functions that return references to their data (because references on GPU are not valid for
CPU)
- no expression templates technique support
Beware that the latter limitation may lead to overloaded matrix operators that cause memory
allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be
passed directly to the kernel.
@note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are
aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix.
@note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely
on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
release function returns error if the CUDA context has been destroyed before.
@sa Mat
*/
class CV_EXPORTS GpuMat
{
public:
Expand Down Expand Up @@ -277,11 +299,28 @@ class CV_EXPORTS GpuMat
Allocator* allocator;
};

//! creates continuous matrix
/** @brief Creates a continuous matrix.
@param rows Row count.
@param cols Column count.
@param type Type of the matrix.
@param arr Destination matrix. This parameter changes only if it has a proper type and area (
\f$\texttt{rows} \times \texttt{cols}\f$ ).
Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
end of each row.
*/
CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr);

//! ensures that size of the given matrix is not less than (rows, cols) size
//! and matrix type is match specified one too
/** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.
@param rows Minimum desired number of rows.
@param cols Minimum desired number of columns.
@param type Desired matrix type.
@param arr Destination matrix.
The function does not reallocate memory if the matrix has proper attributes already.
*/
CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);

CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat);
Expand All @@ -292,10 +331,21 @@ CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCou

//////////////////////////////// CudaMem ////////////////////////////////

//! CudaMem is limited cv::Mat with page locked memory allocation.
//! Page locked memory is only needed for async and faster coping to GPU.
//! It is convertable to cv::Mat header without reference counting
//! so you can use it with other opencv functions.
/** @brief Class with reference counting wrapping special memory type allocation functions from CUDA.
Its interface is also Mat-like but with additional memory type parameters.
- **PAGE\_LOCKED** sets a page locked memory type used commonly for fast and asynchronous
uploading/downloading data from/to GPU.
- **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU
address space, if supported.
- **WRITE\_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are
used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache
utilization.
@note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2
Pinned Memory APIs* document or *CUDA C Programming Guide*.
*/
class CV_EXPORTS CudaMem
{
public:
Expand Down Expand Up @@ -335,7 +385,13 @@ class CV_EXPORTS CudaMem
//! returns matrix header with disabled reference counting for CudaMem data.
Mat createMatHeader() const;

//! maps host memory into device address space and returns GpuMat header for it. Throws exception if not supported by hardware.
/** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting
for it.
This can be done only if memory was allocated with the SHARED flag and if it is supported by the
hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which
eliminates an extra copy.
*/
GpuMat createGpuMatHeader() const;

// Please see cv::Mat for descriptions
Expand Down Expand Up @@ -363,17 +419,28 @@ class CV_EXPORTS CudaMem
AllocType alloc_type;
};

//! page-locks the matrix m memory and maps it for the device(s)
/** @brief Page-locks the memory of matrix and maps it for the device(s).
@param m Input matrix.
*/
CV_EXPORTS void registerPageLocked(Mat& m);

//! unmaps the memory of matrix m, and makes it pageable again
/** @brief Unmaps the memory of matrix and makes it pageable again.
@param m Input matrix.
*/
CV_EXPORTS void unregisterPageLocked(Mat& m);

///////////////////////////////// Stream //////////////////////////////////

//! Encapculates Cuda Stream. Provides interface for async coping.
//! Passed to each function that supports async kernel execution.
//! Reference counting is enabled.
/** @brief This class encapsulates a queue of asynchronous calls.
@note Currently, you may face problems if an operation is enqueued twice with different data. Some
functions use the constant GPU memory, and next call may update the memory before the previous one
has been finished. But calling different operations asynchronously is safe because each operation
has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are
also safe. :
*/
class CV_EXPORTS Stream
{
typedef void (Stream::*bool_type)() const;
Expand All @@ -385,16 +452,26 @@ class CV_EXPORTS Stream
//! creates a new asynchronous stream
Stream();

//! queries an asynchronous stream for completion status
/** @brief Returns true if the current stream queue is finished. Otherwise, it returns false.
*/
bool queryIfComplete() const;

//! waits for stream tasks to complete
/** @brief Blocks the current CPU thread until all operations in the stream are complete.
*/
void waitForCompletion();

//! makes a compute stream wait on an event
/** @brief Makes a compute stream wait on an event.
*/
void waitEvent(const Event& event);

//! adds a callback to be called on the host after all currently enqueued items in the stream have completed
/** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have
completed.
@note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization
that may depend on outstanding device work or other callbacks that are not mandated to run earlier.
Callbacks without a mandated order (in independent streams) execute in undefined order and may be
serialized.
*/
void enqueueHostCallback(StreamCallback callback, void* userData);

//! return Stream object for default CUDA stream
Expand Down Expand Up @@ -446,21 +523,41 @@ class CV_EXPORTS Event
friend struct EventAccessor;
};

//! @} cuda_struct

//////////////////////////////// Initialization & Info ////////////////////////

//! this is the only function that do not throw exceptions if the library is compiled without CUDA
//! @addtogroup cuda_init
//! @{

/** @brief Returns the number of installed CUDA-enabled devices.
Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support,
this function returns 0.
*/
CV_EXPORTS int getCudaEnabledDeviceCount();

//! set device to be used for GPU executions for the calling host thread
/** @brief Sets a device and initializes it for the current thread.
@param device System index of a CUDA device starting with 0.
If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
*/
CV_EXPORTS void setDevice(int device);

//! returns which device is currently being used for the calling host thread
/** @brief Returns the current device index set by cuda::setDevice or initialized by default.
*/
CV_EXPORTS int getDevice();

//! explicitly destroys and cleans up all resources associated with the current device in the current process
//! any subsequent API call to this device will reinitialize the device
/** @brief Explicitly destroys and cleans up all resources associated with the current device in the current
process.
Any subsequent API call to this device will reinitialize the device.
*/
CV_EXPORTS void resetDevice();

/** @brief Enumeration providing CUDA computing features.
*/
enum FeatureSet
{
FEATURE_SET_COMPUTE_10 = 10,
Expand All @@ -482,12 +579,27 @@ enum FeatureSet
//! checks whether current device supports the given feature
CV_EXPORTS bool deviceSupports(FeatureSet feature_set);

//! information about what GPU archs this OpenCV CUDA module was compiled for
/** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was
built for.
According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
capability can always be compiled to binary code of greater or equal compute capability".
*/
class CV_EXPORTS TargetArchs
{
public:
/** @brief The following method checks whether the module was built with the support of the given feature:
@param feature\_set Features to be checked. See :ocvcuda::FeatureSet.
*/
static bool builtWith(FeatureSet feature_set);

/** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA
code for the given architecture(s):
@param major Major compute capability version.
@param minor Minor compute capability version.
*/
static bool has(int major, int minor);
static bool hasPtx(int major, int minor);
static bool hasBin(int major, int minor);
Expand All @@ -498,17 +610,25 @@ class CV_EXPORTS TargetArchs
static bool hasEqualOrGreaterBin(int major, int minor);
};

//! information about the given GPU.
/** @brief Class providing functionality for querying the specified GPU properties.
*/
class CV_EXPORTS DeviceInfo
{
public:
//! creates DeviceInfo object for the current GPU
DeviceInfo();

//! creates DeviceInfo object for the given GPU
/** @brief The constructors.
@param device\_id System index of the CUDA device starting with 0.
Constructs the DeviceInfo object for the specified device. If device\_id parameter is missed, it
constructs an object for the current device.
*/
DeviceInfo(int device_id);

//! device number.
/** @brief Returns system index of the CUDA device starting with 0.
*/
int deviceID() const;

//! ASCII string identifying device
Expand Down Expand Up @@ -680,10 +800,19 @@ class CV_EXPORTS DeviceInfo
size_t freeMemory() const;
size_t totalMemory() const;

//! checks whether device supports the given feature
/** @brief Provides information on CUDA feature support.
@param feature\_set Features to be checked. See cuda::FeatureSet.
This function returns true if the device has the specified CUDA feature. Otherwise, it returns false
*/
bool supports(FeatureSet feature_set) const;

//! checks whether the CUDA module can be run on the given device
/** @brief Checks the CUDA module and device compatibility.
This function returns true if the CUDA module can be run on the specified device. Otherwise, it
returns false .
*/
bool isCompatible() const;

private:
Expand All @@ -693,7 +822,7 @@ class CV_EXPORTS DeviceInfo
CV_EXPORTS void printCudaDeviceInfo(int device);
CV_EXPORTS void printShortCudaDeviceInfo(int device);

//! @}
//! @} cuda_init

}} // namespace cv { namespace cuda {

Expand Down
5 changes: 5 additions & 0 deletions modules/core/include/opencv2/core/cuda_stream_accessor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ namespace cv
class Stream;
class Event;

/** @brief Class that enables getting cudaStream\_t from cuda::Stream
because it is the only public header that depends on the CUDA Runtime API. Including it
brings a dependency to your code.
*/
struct StreamAccessor
{
CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
Expand Down
11 changes: 11 additions & 0 deletions modules/core/include/opencv2/core/cuda_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ namespace cv
size_t size;
};

/** @brief Structure similar to cuda::PtrStepSz but containing only a pointer and row step.
Width and height fields are excluded due to performance reasons. The structure is intended
for internal use or for users who write device code.
*/
template <typename T> struct PtrStep : public DevPtr<T>
{
__CV_CUDA_HOST_DEVICE__ PtrStep() : step(0) {}
Expand All @@ -104,6 +109,12 @@ namespace cv
__CV_CUDA_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
};

/** @brief Lightweight class encapsulating pitched memory on a GPU and passed to nvcc-compiled code (CUDA
kernels).
Typically, it is used internally by OpenCV and by users who write device code. You can call
its members from both host and device code.
*/
template <typename T> struct PtrStepSz : public PtrStep<T>
{
__CV_CUDA_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
Expand Down
Loading

0 comments on commit ceb6e8b

Please sign in to comment.