Skip to content

Commit

Permalink
Add dft and gemm to ocl module, using AMD's clAmdFft and clAmdBlas li…
Browse files Browse the repository at this point in the history
…braries
  • Loading branch information
bitwangyaoyao committed Aug 7, 2012
1 parent 7741d58 commit c03ac12
Show file tree
Hide file tree
Showing 9 changed files with 710 additions and 3 deletions.
9 changes: 9 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@ OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF
OCV_OPTION(WITH_XINE "Include Xine support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) )
OCV_OPTION(WITH_CLP "Include Clp support (EPL)" OFF)
OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" OFF IF (NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" OFF IF (NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" OFF IF (NOT ANDROID AND NOT IOS) )


# OpenCV build components
# ===================================================
Expand Down Expand Up @@ -396,6 +399,12 @@ if(WITH_OPENCL)
if(OPENCL_FOUND)
set(HAVE_OPENCL 1)
endif()
if(WITH_OPENCLAMDFFT)
set(HAVE_CLAMDFFT 1)
endif()
if(WITH_OPENCLAMDBLAS)
set(HAVE_CLAMDBLAS 1)
endif()
endif()

# ----------------------------------------------------------------------------
Expand Down
15 changes: 13 additions & 2 deletions cmake/OpenCVDetectOpenCL.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,19 @@ if(APPLE)
set(OPENCL_FOUND YES)
set(OPENCL_LIBRARIES "-framework OpenCL")
else()
find_package(OpenCL QUIET)

#find_package(OpenCL QUIET)
if(WITH_OPENCLAMDFFT)
find_path(CLAMDFFT_INCLUDE_DIR
NAMES clAmdFft.h)
find_library(CLAMDFFT_LIBRARIES
NAMES clAmdFft.Runtime)
endif()
if(WITH_OPENCLAMDBLAS)
find_path(CLAMDBLAS_INCLUDE_DIR
NAMES clAmdBlas.h)
find_library(CLAMDBLAS_LIBRARIES
NAMES clAmdBlas)
endif()
# Try AMD/ATI Stream SDK
if (NOT OPENCL_FOUND)
set(ENV_AMDSTREAMSDKROOT $ENV{AMDAPPSDKROOT})
Expand Down
6 changes: 6 additions & 0 deletions cmake/templates/cvconfig.h.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,12 @@
/* OpenCL Support */
#cmakedefine HAVE_OPENCL

/* AMD's OpenCL Fast Fourier Transform Library*/
#cmakedefine HAVE_CLAMDFFT

/* AMD's Basic Linear Algebra Subprograms Library*/
#cmakedefine HAVE_CLAMDBLAS

/* NVidia Cuda Fast Fourier Transform (FFT) API*/
#cmakedefine HAVE_CUFFT

Expand Down
8 changes: 8 additions & 0 deletions modules/ocl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ if (HAVE_OPENCL)
if(OPENCL_INCLUDE_DIR)
ocv_include_directories(${OPENCL_INCLUDE_DIR})
endif()
if (HAVE_CLAMDFFT)
set(ocl_link_libs ${ocl_link_libs} ${CLAMDFFT_LIBRARIES})
ocv_include_directories(${CLAMDFFT_INCLUDE_DIR})
endif()
if (HAVE_CLAMDBLAS)
set(ocl_link_libs ${ocl_link_libs} ${CLAMDBLAS_LIBRARIES})
ocv_include_directories(${CLAMDBLAS_INCLUDE_DIR})
endif()
endif()

ocv_set_module_sources(
Expand Down
30 changes: 29 additions & 1 deletion modules/ocl/include/opencv2/ocl/ocl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,35 @@ namespace cv
// Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
CV_EXPORTS void matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf);


#ifdef HAVE_CLAMDFFT
///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
// the two functions must be called before/after run any fft library functions.
CV_EXPORTS void fft_setup(); // this will be implicitly invoked
CV_EXPORTS void fft_teardown(); // you need to teardown fft library manually

/////////////////////////////////////// DFT /////////////////////////////////////////////////////
//! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
//! Param dft_size is the size of DFT transform.
//!
//! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
// support src type of CV32FC1, CV32FC2
// support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
// dft_size is the size of original input, which is used for transformation from complex to real.
// dft_size must be powers of 2, 3 and 5
// real to complex dft requires at least v1.8 clAmdFft
// real to complex dft output is not the same with cpu version
// real to complex and complex to real does not support DFT_ROWS
CV_EXPORTS void dft(const oclMat& src, oclMat& dst, Size dft_size = Size(0, 0), int flags = 0);
#endif // HAVE_CLAMDFFT

#ifdef HAVE_CLAMDBLAS
//! implements generalized matrix product algorithm GEMM from BLAS
// The functionality requires clAmdBlas library
// only support type CV_32FC1
// flag GEMM_3_T is not supported
CV_EXPORTS void gemm(const oclMat& src1, const oclMat& src2, double alpha,
const oclMat& src3, double beta, oclMat& dst, int flags = 0);
#endif

}
}
Expand Down
Loading

0 comments on commit c03ac12

Please sign in to comment.