From d8c5dfc852363b1847ab781f178a1d51e0c3ac49 Mon Sep 17 00:00:00 2001 From: Raaj Date: Mon, 22 Jan 2018 21:56:09 -0500 Subject: [PATCH] Alpha CPU_ONLY version (#394) * Converted Caffe to submodule git * added support for display of .hpp files in qtcreator * added function to display as a string array size * added logging code * added a caffeutil function class to display blob size * wip resize cpu * implemented resizeAndMerge cpu * completion of cpu nms and resize functions * added warp affine function. but its very slow so kept commented * WIP. Intel Caffe compile * wip. cpu version * wip. intel mkl * Bug fix revert cpu gpu on resize * NMS Border Patrol * Bug fix for cmake to check if caffe pulled * hands extractor cpu * Face extractor cpu made to work * Removed caffeutil * changed array toString function * cleanup * added explanation for logic nms * update installation text for cpu prelim * Removed duplicated `if (BUILD_DOCS)` * Removed duplicated include(cmake/Utils.cmake) * Recovered deleted text from OP master branch * Removed duplicated `find_package(CuDNN)` * Update CMakeLists.txt * Update CMakeLists.txt * OpenCL not available yet (removed) * Updated spacing * Recovering original format * Fixed typo * Fixed messages bug * Fixed message bug * printSize working on Windows * updates to doc on cpu version * OpenMP only on CPU_ONLY. Cout override, std::memcpy instead of memcpy * additional fixes to profiler cmake. change to std::copy * Fixed pthread leftovers * Fixed minor typos and format * NMS remove if 18, resize convert to smart pointer * update of mkl lib for new mkl version * Fix to select cpu only if no cuda available * Some updates to the Doc for CPU Version * moved ostream to cpp. remove mkl from util * Add gines CMake * remove cpu doc (temp) * add try catch to print blocks * move cout overload outside class * Bug fix to cout overload. Not possible to use error function as its outside scope * More clean cout and fixed typo --- doc/installation_cmake.md | 11 +- examples/openpose/CMakeLists.txt | 4 +- examples/tests/CMakeLists.txt | 3 +- examples/tutorial_add_module/CMakeLists.txt | 16 +- examples/tutorial_pose/CMakeLists.txt | 4 +- examples/tutorial_thread/CMakeLists.txt | 4 +- examples/tutorial_wrapper/CMakeLists.txt | 4 +- include/openpose/core/array.hpp | 115 +++++++---- include/openpose/core/common.hpp | 5 +- include/openpose/core/datum.hpp | 6 + include/openpose/core/macros.hpp | 28 +-- include/openpose/core/point.hpp | 11 +- include/openpose/core/rectangle.hpp | 8 +- src/openpose/CMakeLists.txt | 8 +- src/openpose/core/CMakeLists.txt | 9 +- src/openpose/core/array.cpp | 24 ++- src/openpose/core/maximumBase.cpp | 80 ++++---- src/openpose/core/netCaffe.cpp | 12 +- src/openpose/core/nmsBase.cpp | 200 +++++++++++++++----- src/openpose/core/point.cpp | 2 +- src/openpose/core/rectangle.cpp | 1 - src/openpose/core/resizeAndMergeBase.cpp | 133 +++++++++---- src/openpose/face/CMakeLists.txt | 6 +- src/openpose/face/faceExtractorCaffe.cpp | 47 +++-- src/openpose/hand/CMakeLists.txt | 6 +- src/openpose/hand/handExtractorCaffe.cpp | 47 +++-- src/openpose/pose/CMakeLists.txt | 6 +- src/openpose/pose/poseExtractorCaffe.cpp | 10 +- src/openpose/utilities/CMakeLists.txt | 7 +- 29 files changed, 559 insertions(+), 258 deletions(-) diff --git a/doc/installation_cmake.md b/doc/installation_cmake.md index c03a1f27d..057c698f4 100644 --- a/doc/installation_cmake.md +++ b/doc/installation_cmake.md @@ -10,11 +10,11 @@ OpenPose - Installation using CMake 6. [Uninstallation](#uninstallation) 7. [Optional Settings](#optional-settings) 1. [MPI Model](#mpi-model) - 2. [Custom Caffe (Ubuntu Only)](#custom-caffe-ubuntu-only) - 3. [Custom OpenCV (Ubuntu Only)](#custom-opencv-ubuntu-only) - 4. [OpenPose 3D Reconstruction Demo (Windows Only)](#openpose-3d-reconstruction-demo-windows-only) - 5. [Doxygen Documentation Autogeneration (Ubuntu Only)](#doxygen-documentation-autogeneration-ubuntu-only) - 6. [CMake Command Line Configuration (Ubuntu Only)](#cmake-command-line-configuration-ubuntu-only) + 3. [Custom Caffe (Ubuntu Only)](#custom-caffe-ubuntu-only) + 4. [Custom OpenCV (Ubuntu Only)](#custom-opencv-ubuntu-only) + 5. [OpenPose 3D Reconstruction Demo (Windows Only)](#openpose-3d-reconstruction-demo-windows-only) + 6. [Doxygen Documentation Autogeneration (Ubuntu Only)](#doxygen-documentation-autogeneration-ubuntu-only) + 7. [CMake Command Line Configuration (Ubuntu Only)](#cmake-command-line-configuration-ubuntu-only) @@ -216,3 +216,4 @@ If Caffe is not already present but OpenCV is, then use the below command. ```bash cmake -DOpenCV_DIR=/home/"${USER}"/softwares/opencv/build ``` + diff --git a/examples/openpose/CMakeLists.txt b/examples/openpose/CMakeLists.txt index 2a4a7c134..905630f9c 100644 --- a/examples/openpose/CMakeLists.txt +++ b/examples/openpose/CMakeLists.txt @@ -13,7 +13,7 @@ foreach(EXAMPLE_FILE ${EXAMPLE_FILES}) message(STATUS "Adding Example ${EXE_NAME}") add_executable(${EXE_NAME} ${EXAMPLE_FILE}) - target_link_libraries( ${EXE_NAME} openpose ${GLOG_LIBRARY} ${GFLAGS_LIBRARY} ${Caffe_LIBS}) + target_link_libraries( ${EXE_NAME} openpose ${GLOG_LIBRARY} ${GFLAGS_LIBRARY} ${Caffe_LIBS} ${MKL_LIBS}) if (WIN32) set_property(TARGET ${EXE_NAME} PROPERTY FOLDER "Examples") @@ -21,4 +21,4 @@ foreach(EXAMPLE_FILE ${EXAMPLE_FILES}) ${CMAKE_CURRENT_BINARY_DIR}/${EXE_NAME}.vcxproj.user @ONLY) endif (WIN32) -endforeach() \ No newline at end of file +endforeach() diff --git a/examples/tests/CMakeLists.txt b/examples/tests/CMakeLists.txt index 7e9df8758..f1f67edda 100644 --- a/examples/tests/CMakeLists.txt +++ b/examples/tests/CMakeLists.txt @@ -13,7 +13,7 @@ foreach(EXAMPLE_FILE ${EXAMPLE_FILES}) message(STATUS "Adding Example ${EXE_NAME}") add_executable(${EXE_NAME} ${EXAMPLE_FILE}) - target_link_libraries( ${EXE_NAME} openpose ${GLOG_LIBRARY} ${GFLAGS_LIBRARY} ${Caffe_LIBS}) + target_link_libraries( ${EXE_NAME} openpose ${GLOG_LIBRARY} ${GFLAGS_LIBRARY} ${Caffe_LIBS} ${MKL_LIBS}) if (WIN32) set_property(TARGET ${EXE_NAME} PROPERTY FOLDER "Examples/Tutorial/Tests") @@ -22,3 +22,4 @@ foreach(EXAMPLE_FILE ${EXAMPLE_FILES}) endif (WIN32) endforeach() + diff --git a/examples/tutorial_add_module/CMakeLists.txt b/examples/tutorial_add_module/CMakeLists.txt index bd6305ef1..3fb74a359 100644 --- a/examples/tutorial_add_module/CMakeLists.txt +++ b/examples/tutorial_add_module/CMakeLists.txt @@ -1,4 +1,4 @@ -set(EXAMPLE_FILES +set(EXAMPLE_FILES 1_custom_post_processing.cpp) include(${CMAKE_SOURCE_DIR}/cmake/Utils.cmake) @@ -6,7 +6,7 @@ include(${CMAKE_SOURCE_DIR}/cmake/Utils.cmake) foreach(EXAMPLE_FILE ${EXAMPLE_FILES}) get_filename_component(SOURCE_NAME ${EXAMPLE_FILE} NAME_WE) - + if (UNIX AND NOT APPLE) set(EXE_NAME "${SOURCE_NAME}.bin") elseif (WIN32) @@ -15,13 +15,13 @@ foreach(EXAMPLE_FILE ${EXAMPLE_FILES}) message(STATUS "Adding Example ${EXE_NAME}") add_executable(${EXE_NAME} ${EXAMPLE_FILE}) - target_link_libraries( ${EXE_NAME} openpose ${GLOG_LIBRARY} ${GFLAGS_LIBRARY} - ${Caffe_LIBS}) - + target_link_libraries( ${EXE_NAME} openpose ${GLOG_LIBRARY} ${GFLAGS_LIBRARY} + ${Caffe_LIBS} ${MKL_LIBS}) + if (WIN32) set_property(TARGET ${EXE_NAME} PROPERTY FOLDER "Examples/Tutorial/AddModule") - configure_file(${CMAKE_SOURCE_DIR}/cmake/OpenPose.vcxproj.user - ${CMAKE_CURRENT_BINARY_DIR}/${EXE_NAME}.vcxproj.user @ONLY) + configure_file(${CMAKE_SOURCE_DIR}/cmake/OpenPose.vcxproj.user + ${CMAKE_CURRENT_BINARY_DIR}/${EXE_NAME}.vcxproj.user @ONLY) endif (WIN32) -endforeach() \ No newline at end of file +endforeach() diff --git a/examples/tutorial_pose/CMakeLists.txt b/examples/tutorial_pose/CMakeLists.txt index 71a285d9b..f5cf0a66b 100644 --- a/examples/tutorial_pose/CMakeLists.txt +++ b/examples/tutorial_pose/CMakeLists.txt @@ -14,7 +14,7 @@ foreach(EXAMPLE_FILE ${EXAMPLE_FILES}) message(STATUS "Adding Example ${EXE_NAME}") add_executable(${EXE_NAME} ${EXAMPLE_FILE}) - target_link_libraries(${EXE_NAME} openpose ${GLOG_LIBRARY} ${GFLAGS_LIBRARY} ${Caffe_LIBS}) + target_link_libraries(${EXE_NAME} openpose ${GLOG_LIBRARY} ${GFLAGS_LIBRARY} ${Caffe_LIBS} ${MKL_LIBS}) if (WIN32) set_property(TARGET ${EXE_NAME} PROPERTY FOLDER "Examples/Tutorial/Pose") @@ -22,4 +22,4 @@ foreach(EXAMPLE_FILE ${EXAMPLE_FILES}) ${CMAKE_CURRENT_BINARY_DIR}/${EXE_NAME}.vcxproj.user @ONLY) endif (WIN32) -endforeach() \ No newline at end of file +endforeach() diff --git a/examples/tutorial_thread/CMakeLists.txt b/examples/tutorial_thread/CMakeLists.txt index df4ac8c52..e6a620700 100644 --- a/examples/tutorial_thread/CMakeLists.txt +++ b/examples/tutorial_thread/CMakeLists.txt @@ -16,7 +16,7 @@ foreach(EXAMPLE_FILE ${EXAMPLE_FILES}) message(STATUS "Adding Example ${EXE_NAME}") add_executable(${EXE_NAME} ${EXAMPLE_FILE}) - target_link_libraries( ${EXE_NAME} openpose ${GLOG_LIBRARY} ${GFLAGS_LIBRARY} ${Caffe_LIBS}) + target_link_libraries( ${EXE_NAME} openpose ${GLOG_LIBRARY} ${GFLAGS_LIBRARY} ${Caffe_LIBS} ${MKL_LIBS}) if (WIN32) set_property(TARGET ${EXE_NAME} PROPERTY FOLDER "Examples/Tutorial/Thread") @@ -24,4 +24,4 @@ foreach(EXAMPLE_FILE ${EXAMPLE_FILES}) ${CMAKE_CURRENT_BINARY_DIR}/${EXE_NAME}.vcxproj.user @ONLY) endif (WIN32) -endforeach() \ No newline at end of file +endforeach() diff --git a/examples/tutorial_wrapper/CMakeLists.txt b/examples/tutorial_wrapper/CMakeLists.txt index 84feb1033..9c1e10411 100644 --- a/examples/tutorial_wrapper/CMakeLists.txt +++ b/examples/tutorial_wrapper/CMakeLists.txt @@ -17,7 +17,7 @@ foreach(EXAMPLE_FILE ${EXAMPLE_FILES}) message(STATUS "Adding Example ${EXE_NAME}") add_executable(${EXE_NAME} ${EXAMPLE_FILE}) - target_link_libraries( ${EXE_NAME} openpose ${GLOG_LIBRARY} ${GFLAGS_LIBRARY} ${Caffe_LIBS}) + target_link_libraries( ${EXE_NAME} openpose ${GLOG_LIBRARY} ${GFLAGS_LIBRARY} ${Caffe_LIBS} ${MKL_LIBS}) if (WIN32) set_property(TARGET ${EXE_NAME} PROPERTY FOLDER "Examples/Tutorial/Wrapper") @@ -25,4 +25,4 @@ foreach(EXAMPLE_FILE ${EXAMPLE_FILES}) ${CMAKE_CURRENT_BINARY_DIR}/${EXE_NAME}.vcxproj.user @ONLY) endif (WIN32) -endforeach() \ No newline at end of file +endforeach() diff --git a/include/openpose/core/array.hpp b/include/openpose/core/array.hpp index 2d478b7ca..62e65fea6 100644 --- a/include/openpose/core/array.hpp +++ b/include/openpose/core/array.hpp @@ -4,40 +4,44 @@ #include // std::shared_ptr #include #include // cv::Mat +#include namespace op { /** * Array: The OpenPose Basic Raw Data Container - * This template class implements a multidimensional data array. It is our basic data container, analogous to cv::Mat in OpenCV, Tensor in - * Torch/TensorFlow or Blob in Caffe. - * It wraps a cv::Mat and a std::shared_ptr, both of them pointing to the same raw data. I.e. they both share the same memory, so we can read - * and modify this data in both formats with no performance impact. + * This template class implements a multidimensional data array. It is our basic data container, analogous to + * cv::Mat in OpenCV, Tensor in Torch/TensorFlow or Blob in Caffe. + * It wraps a cv::Mat and a std::shared_ptr, both of them pointing to the same raw data. I.e. they both share the + * same memory, so we can read and modify this data in both formats with no performance impact. * Hence, it keeps high performance while adding high-level functions. */ template class Array { public: - // -------------------------------------------------- Constructors and Data Allocator Functions -------------------------------------------------- // + // ------------------------------ Constructors and Data Allocator Functions ------------------------------ // /** * Array constructor. * Equivalent to default constructor + reset(const int size). - * @param size Integer with the number of T element to be allocated. E.g. size = 5 is internally similar to: new T[5]. + * @param size Integer with the number of T element to be allocated. E.g. size = 5 is internally similar to + * `new T[5]`. */ explicit Array(const int size); /** * Array constructor. * Equivalent to default constructor + reset(const std::vector& size = {}). - * @param sizes Vector with the size of each dimension. E.g. size = {3, 5, 2} is internally similar to: new T[3*5*2]. + * @param sizes Vector with the size of each dimension. E.g. size = {3, 5, 2} is internally similar to + * `new T[3*5*2]`. */ explicit Array(const std::vector& sizes = {}); /** * Array constructor. * Equivalent to default constructor + reset(const int size, const T value). - * @param size Integer with the number of T element to be allocated. E.g. size = 5 is internally similar to: new T[5]. + * @param size Integer with the number of T element to be allocated. E.g. size = 5 is internally similar to + * `new T[5]`. * @param value Initial value for each component of the Array. */ Array(const int size, const T value); @@ -45,14 +49,16 @@ namespace op /** * Array constructor. * Equivalent to default constructor + reset(const std::vector& size, const T value). - * @param sizes Vector with the size of each dimension. E.g. size = {3, 5, 2} is internally similar to: new T[3*5*2]. + * @param sizes Vector with the size of each dimension. E.g. size = {3, 5, 2} is internally similar to: + * `new T[3*5*2]`. * @param value Initial value for each component of the Array. */ Array(const std::vector& sizes, const T value); /** * Copy constructor. - * It performs `fast copy`: For performance purpose, copying a Array or Datum or cv::Mat just copies the reference, it still shares the same internal data. + * It performs `fast copy`: For performance purpose, copying a Array or Datum or cv::Mat just copies the + * reference, it still shares the same internal data. * Modifying the copied element will modify the original one. * Use clone() for a slower but real copy, similarly to cv::Mat and Array. * @param array Array to be copied. @@ -85,7 +91,8 @@ namespace op /** * Clone function. * Similar to cv::Mat::clone and Datum::clone. - * It performs a real but slow copy of the data, i.e., even if the copied element is modified, the original one is not. + * It performs a real but slow copy of the data, i.e., even if the copied element is modified, the original + * one is not. * @return The resulting Array. */ Array clone() const; @@ -93,29 +100,35 @@ namespace op /** * Data allocation function. * It allocates the required space for the memory (it does not initialize that memory). - * @param size Integer with the number of T element to be allocated. E.g. size = 5 is internally similar to: new T[5]. + * @param size Integer with the number of T element to be allocated. E.g. size = 5 is internally similar to + * `new T[5]`. */ void reset(const int size); /** * Data allocation function. - * Similar to reset(const int size), but it allocates a multi-dimensional array of dimensions each of the values of the argument. - * @param sizes Vector with the size of each dimension. E.g. size = {3, 5, 2} is internally similar to: new T[3*5*2]. + * Similar to reset(const int size), but it allocates a multi-dimensional array of dimensions each of the + * values of the argument. + * @param sizes Vector with the size of each dimension. E.g. size = {3, 5, 2} is internally similar to + * `new T[3*5*2]`. */ void reset(const std::vector& sizes = {}); /** * Data allocation function. * Similar to reset(const int size), but initializing the data to the value specified by the second argument. - * @param size Integer with the number of T element to be allocated. E.g. size = 5 is internally similar to: new T[5]. + * @param size Integer with the number of T element to be allocated. E.g. size = 5 is internally similar to + * `new T[5]`. * @param value Initial value for each component of the Array. */ void reset(const int size, const T value); /** * Data allocation function. - * Similar to reset(const std::vector& size), but initializing the data to the value specified by the second argument. - * @param sizes Vector with the size of each dimension. E.g. size = {3, 5, 2} is internally similar to: new T[3*5*2]. + * Similar to reset(const std::vector& size), but initializing the data to the value specified by the + * second argument. + * @param sizes Vector with the size of each dimension. E.g. size = {3, 5, 2} is internally similar to + * `new T[3*5*2]`. * @param value Initial value for each component of the Array. */ void reset(const std::vector& sizes, const T value); @@ -136,7 +149,7 @@ namespace op - // -------------------------------------------------- Data Information Functions -------------------------------------------------- // + // ------------------------------ Data Information Functions ------------------------------ // /** * Check whether memory has been allocated. * @return True if no memory has been allocated, false otherwise. @@ -148,17 +161,26 @@ namespace op /** * Return a vector with the size of each dimension allocated. - * @return A std::vector with the size of each dimension. If no memory has been allocated, it will return an empty std::vector. + * @return A std::vector with the size of each dimension. If no memory has been allocated, it will return + * an empty std::vector. */ inline std::vector getSize() const { return mSize; } + /** + * Return a string with the size of each dimension allocated. + * @return A std::stringwith the size of each dimension. If no memory has been allocated, it will return an + * empty string. + */ + std::string printSize() const; + /** * Return a vector with the size of the desired dimension. * @param index Dimension to check its size. - * @return Size of the desired dimension. It will return 0 if the requested dimension is higher than the number of dimensions. + * @return Size of the desired dimension. It will return 0 if the requested dimension is higher than the number + * of dimensions. */ int getSize(const int index) const; @@ -183,14 +205,16 @@ namespace op /** * Similar to getVolume(), but in this case it just returns the volume between the desired dimensions. - * E.g. for a Array of size = {2,5,3}, the volume or total number of elements for getVolume(1,2) is: 5x3 = 15. - * @return The total volume of the allocated data between the desired dimensions. If the index are out of bounds, it throws an error. + * E.g. for a Array of size = {2,5,3}, the volume or total number of elements for getVolume(1,2) is + * 5x3 = 15. + * @return The total volume of the allocated data between the desired dimensions. If the index are out of + * bounds, it throws an error. */ size_t getVolume(const int indexA, const int indexB) const; - // -------------------------------------------------- Data Access Functions And Operators -------------------------------------------------- // + // ------------------------------ Data Access Functions And Operators ------------------------------ // /** * Return a raw pointer to the data. Similar to: std::shared_ptr::get(). * Note: if you modify the pointer data, you will directly modify it in the Array instance too. @@ -213,8 +237,10 @@ namespace op /** * Return a cv::Mat wrapper to the data. It forbids the data to be modified. - * OpenCV only admits unsigned char, signed char, int, float & double. If the T class is not supported by OpenCV, it will throw an error. - * Note: Array does not return an editable cv::Mat because some OpenCV functions reallocate memory and it would not longer point to the Array instance. + * OpenCV only admits unsigned char, signed char, int, float & double. If the T class is not supported by + * OpenCV, it will throw an error. + * Note: Array does not return an editable cv::Mat because some OpenCV functions reallocate memory and it + * would not longer point to the Array instance. * If you want to perform some OpenCV operation on the Array data, you can use: * editedCvMat = array.getConstCvMat().clone(); * // modify data @@ -226,7 +252,8 @@ namespace op /** * Analogous to getConstCvMat, but in this case it returns a editable cv::Mat. * Very important: Only allowed functions which do not provoke data reallocation. - * E.g. resizing functions will not work and they would provoke an undefined behaviour and/or execution crashes. + * E.g. resizing functions will not work and they would provoke an undefined behaviour and/or execution + * crashes. * @return A cv::Mat pointing to the data. */ cv::Mat& getCvMat(); @@ -234,7 +261,8 @@ namespace op /** * [] operator * Similar to the [] operator for raw pointer data. - * If debug mode is enabled, then it will check that the desired index is in the data range, and it will throw an exception otherwise (similar to the at operator). + * If debug mode is enabled, then it will check that the desired index is in the data range, and it will throw + * an exception otherwise (similar to the at operator). * @param index The desired memory location. * @return A editable reference to the data on the desired index location. */ @@ -249,7 +277,8 @@ namespace op /** * [] operator - * Same functionality as operator[](const int index), but it forbids modifying the value. Otherwise, const functions would not be able to call the [] operator. + * Same functionality as operator[](const int index), but it forbids modifying the value. Otherwise, const + * functions would not be able to call the [] operator. * @param index The desired memory location. * @return A non-editable reference to the data on the desired index location. */ @@ -264,7 +293,8 @@ namespace op /** * [] operator - * Same functionality as operator[](const int index), but it lets the user introduce the multi-dimensional index. + * Same functionality as operator[](const int index), but it lets the user introduce the multi-dimensional + * index. * E.g. given a (10 x 10 x 10) array, array[11] is equivalent to array[{1,1,0}] * @param indexes Vector with the desired memory location. * @return A editable reference to the data on the desired index location. @@ -276,7 +306,8 @@ namespace op /** * [] operator - * Same functionality as operator[](const std::vector& indexes), but it forbids modifying the value. Otherwise, const functions would not be able to call the [] operator. + * Same functionality as operator[](const std::vector& indexes), but it forbids modifying the value. + * Otherwise, const functions would not be able to call the [] operator. * @param indexes Vector with the desired memory location. * @return A non-editable reference to the data on the desired index location. */ @@ -287,7 +318,8 @@ namespace op /** * at() function - * Same functionality as operator[](const int index), but it always check whether the indexes are within the data bounds. Otherwise, it will throw an error. + * Same functionality as operator[](const int index), but it always check whether the indexes are within the + * data bounds. Otherwise, it will throw an error. * @param index The desired memory location. * @return A editable reference to the data on the desired index location. */ @@ -298,7 +330,8 @@ namespace op /** * at() function - * Same functionality as operator[](const int index) const, but it always check whether the indexes are within the data bounds. Otherwise, it will throw an error. + * Same functionality as operator[](const int index) const, but it always check whether the indexes are within + * the data bounds. Otherwise, it will throw an error. * @param index The desired memory location. * @return A non-editable reference to the data on the desired index location. */ @@ -309,7 +342,8 @@ namespace op /** * at() function - * Same functionality as operator[](const std::vector& indexes), but it always check whether the indexes are within the data bounds. Otherwise, it will throw an error. + * Same functionality as operator[](const std::vector& indexes), but it always check whether the indexes + * are within the data bounds. Otherwise, it will throw an error. * @param indexes Vector with the desired memory location. * @return A editable reference to the data on the desired index location. */ @@ -320,7 +354,8 @@ namespace op /** * at() function - * Same functionality as operator[](const std::vector& indexes) const, but it always check whether the indexes are within the data bounds. Otherwise, it will throw an error. + * Same functionality as operator[](const std::vector& indexes) const, but it always check whether the + * indexes are within the data bounds. Otherwise, it will throw an error. * @param indexes Vector with the desired memory location. * @return A non-editable reference to the data on the desired index location. */ @@ -350,7 +385,8 @@ namespace op std::pair mCvMatData; /** - * Auxiliar function that both operator[](const std::vector& indexes) and operator[](const std::vector& indexes) const use. + * Auxiliar function that both operator[](const std::vector& indexes) and + * operator[](const std::vector& indexes) const use. * It turn the multi-dimensions indexes into the 1-dimension equivalent index. * @param indexes Vector with the desired memory location. * @return The equivalent 1-D index. @@ -358,7 +394,8 @@ namespace op int getIndex(const std::vector& indexes) const; /** - * Similar to getIndex(const std::vector& indexes) const, but used for at(const std::vector& indexes) and at(const std::vector& indexes) const. + * Similar to getIndex(const std::vector& indexes) const, but used for at(const std::vector& indexes) + * and at(const std::vector& indexes) const. * It also checks whether the index is within the allocated memory. * @param indexes Vector with the desired memory location. * @return The equivalent 1-D index. @@ -373,10 +410,14 @@ namespace op T& commonAt(const int index) const; /** - * Private auxiliar function that sets the cv::Mat wrapper and makes it point to the same data than std::shared_ptr points to. + * Private auxiliar function that sets the cv::Mat wrapper and makes it point to the same data than + * std::shared_ptr points to. */ void setCvMatFromSharedPtr(); }; + + // Static methods + OVERLOAD_C_OUT(Array) } #endif // OPENPOSE_CORE_ARRAY_HPP diff --git a/include/openpose/core/common.hpp b/include/openpose/core/common.hpp index 4cec181c2..db2703035 100644 --- a/include/openpose/core/common.hpp +++ b/include/openpose/core/common.hpp @@ -8,11 +8,12 @@ #include // OpenPose most used classes #include +#include #include #include #include #include -// Macros at the end, otherwise circular dependency with array, point & rectangle -#include +// Datum at the end, otherwise circular dependency with array, point & rectangle +#include #endif // OPENPOSE_CORE_COMMON_HPP diff --git a/include/openpose/core/datum.hpp b/include/openpose/core/datum.hpp index b184c0e33..072227206 100644 --- a/include/openpose/core/datum.hpp +++ b/include/openpose/core/datum.hpp @@ -304,6 +304,12 @@ namespace op return id != datum.id; } }; + + // Defines for Datum. Added here rather than in `macros.hpp` to avoid circular dependencies + #define DATUM_BASE_NO_PTR std::vector + #define DATUM_BASE std::shared_ptr + #define DEFINE_TEMPLATE_DATUM(templateName) template class OP_API templateName + #define COMPILE_TEMPLATE_DATUM(templateName) extern DEFINE_TEMPLATE_DATUM(templateName) } #endif // OPENPOSE_CORE_DATUM_HPP diff --git a/include/openpose/core/macros.hpp b/include/openpose/core/macros.hpp index 0e4ee35ab..1b21b162c 100644 --- a/include/openpose/core/macros.hpp +++ b/include/openpose/core/macros.hpp @@ -1,6 +1,10 @@ #ifndef OPENPOSE_CORE_MACROS_HPP #define OPENPOSE_CORE_MACROS_HPP +#include // std::shared_ptr +#include +#include + #ifndef _WIN32 #define OP_API #elif defined OP_EXPORTS @@ -15,11 +19,6 @@ #pragma warning( disable: 4275 ) // non dll-interface structXXX used as base #endif -#define DATUM_BASE_NO_PTR std::vector -#define DATUM_BASE std::shared_ptr -#define DEFINE_TEMPLATE_DATUM(templateName) template class OP_API templateName -#define COMPILE_TEMPLATE_DATUM(templateName) extern DEFINE_TEMPLATE_DATUM(templateName) - #define UNUSED(unusedVariable) (void)(unusedVariable) #define DELETE_COPY(className) \ @@ -45,6 +44,17 @@ template classType OP_API className; \ template classType OP_API className +/** + * cout operator overload calling toString() function + * @return std::ostream containing output from toString() + */ +#define OVERLOAD_C_OUT(className) \ + template std::ostream &operator<<(std::ostream& ostream, const op::className& obj) \ + { \ + ostream << obj.toString(); \ + return ostream; \ + } + // Instantiate a class with float and double specifications #define COMPILE_TEMPLATE_FLOATING_TYPES_CLASS(className) COMPILE_TEMPLATE_FLOATING_TYPES(className, class) #define COMPILE_TEMPLATE_FLOATING_TYPES_STRUCT(className) COMPILE_TEMPLATE_FLOATING_TYPES(className, struct) @@ -65,12 +75,4 @@ namespace boost template class shared_ptr; // E.g., boost::shared_ptr> } -// Includes at the end, since this macros class does not need them, but the files that call this -// file. However, keeping the files at the beginning might create a circular include linking problem. -#include // std::shared_ptr -#include -#include -#include -#include - #endif // OPENPOSE_CORE_MACROS_HPP diff --git a/include/openpose/core/point.hpp b/include/openpose/core/point.hpp index 55f62000e..f5486dade 100644 --- a/include/openpose/core/point.hpp +++ b/include/openpose/core/point.hpp @@ -2,6 +2,7 @@ #define OPENPOSE_CORE_POINT_HPP #include +#include namespace op { @@ -15,7 +16,8 @@ namespace op /** * Copy constructor. - * It performs `fast copy`: For performance purpose, copying a Point or Point or cv::Mat just copies the reference, it still shares the same internal data. + * It performs `fast copy`: For performance purpose, copying a Point or Point or cv::Mat just copies the + * reference, it still shares the same internal data. * Modifying the copied element will modify the original one. * Use clone() for a slower but real copy, similarly to cv::Mat and Point. * @param point Point to be copied. @@ -61,7 +63,7 @@ namespace op - // -------------------------------------------------- Comparison operators -------------------------------------------------- // + // ------------------------------ Comparison operators ------------------------------ // /** * Less comparison operator. * @param point Point to be compared. @@ -126,7 +128,7 @@ namespace op - // -------------------------------------------------- Basic Operators -------------------------------------------------- // + // ------------------------------ Basic Operators ------------------------------ // Point& operator+=(const Point& point); Point operator+(const Point& point) const; @@ -151,6 +153,9 @@ namespace op Point operator/(const T value) const; }; + + // Static methods + OVERLOAD_C_OUT(Point) } #endif // OPENPOSE_CORE_POINT_HPP diff --git a/include/openpose/core/rectangle.hpp b/include/openpose/core/rectangle.hpp index 9887f847e..b5ffe0a5e 100644 --- a/include/openpose/core/rectangle.hpp +++ b/include/openpose/core/rectangle.hpp @@ -2,6 +2,7 @@ #define OPENPOSE_CORE_RECTANGLE_HPP #include +#include #include namespace op @@ -18,7 +19,8 @@ namespace op /** * Copy constructor. - * It performs `fast copy`: For performance purpose, copying a Rectangle or Datum or cv::Mat just copies the reference, it still shares the same internal data. + * It performs `fast copy`: For performance purpose, copying a Rectangle or Datum or cv::Mat just copies + * the reference, it still shares the same internal data. * Modifying the copied element will modify the original one. * Use clone() for a slower but real copy, similarly to cv::Mat and Rectangle. * @param rectangle Rectangle to be copied. @@ -71,7 +73,7 @@ namespace op */ std::string toString() const; - // -------------------------------------------------- Basic Operators -------------------------------------------------- // + // ------------------------------ Basic Operators ------------------------------ // Rectangle& operator*=(const T value); Rectangle operator*(const T value) const; @@ -84,6 +86,8 @@ namespace op // Static methods template Rectangle recenter(const Rectangle& rectangle, const T newWidth, const T newHeight); + + OVERLOAD_C_OUT(Rectangle) } #endif // OPENPOSE_CORE_RECTANGLE_HPP diff --git a/src/openpose/CMakeLists.txt b/src/openpose/CMakeLists.txt index a32c516c6..576e83317 100644 --- a/src/openpose/CMakeLists.txt +++ b/src/openpose/CMakeLists.txt @@ -16,12 +16,16 @@ foreach (FILE ${OP_HEADERS_UNFILTERED}) endif () endforeach(FILE ${OP_HEADERS_UNFILTERED}) -cuda_add_library(openpose ${SOURCES_OPENPOSE} ${OP_HEADERS}) +if (${GPU_MODE} MATCHES "CUDA") + cuda_add_library(openpose ${SOURCES_OPENPOSE} ${OP_HEADERS}) +else() + add_library(openpose ${SOURCES_OPENPOSE} ${OP_HEADERS}) +endif () # Ubuntu if(UNIX AND NOT APPLE) target_link_libraries(openpose ${OpenCV_LIBS} ${Caffe_LIBS} - ${GFLAGS_LIBRARY} ${GLOG_LIBRARY}) + ${GFLAGS_LIBRARY} ${GLOG_LIBRARY} ${MKL_LIBS}) if (CMAKE_COMPILER_IS_GNUCXX) foreach (SUB_DIR ${SUB_DIRS}) set_target_properties(openpose_${SUB_DIR} PROPERTIES COMPILE_FLAGS ${OP_CXX_FLAGS}) diff --git a/src/openpose/core/CMakeLists.txt b/src/openpose/core/CMakeLists.txt index 28fe32ff8..505f353a1 100644 --- a/src/openpose/core/CMakeLists.txt +++ b/src/openpose/core/CMakeLists.txt @@ -28,10 +28,15 @@ set(SOURCES_OP_CORE_WITH_CP ${SOURCES_OP_CORE_WITH_CP} PARENT_SCOPE) set(SOURCES_OPENPOSE ${SOURCES_OPENPOSE} ${SOURCES_OP_CORE_WITH_CP} PARENT_SCOPE) if (UNIX AND NOT APPLE) - cuda_add_library(openpose_core ${SOURCES_OP_CORE}) + if (${GPU_MODE} MATCHES "CUDA") + cuda_add_library(openpose_core ${SOURCES_OP_CORE}) + else() + add_library(openpose_core ${SOURCES_OP_CORE}) + endif () + add_library(caffe SHARED IMPORTED) set_property(TARGET caffe PROPERTY IMPORTED_LOCATION ${Caffe_LIBS}) - target_link_libraries(openpose_core caffe) + target_link_libraries(openpose_core caffe ${MKL_LIBS}) if (BUILD_CAFFE) add_dependencies(openpose_core openpose_caffe) diff --git a/src/openpose/core/array.cpp b/src/openpose/core/array.cpp index e40619edf..97492a74f 100644 --- a/src/openpose/core/array.cpp +++ b/src/openpose/core/array.cpp @@ -1,6 +1,5 @@ #include // typeid #include // std::accumulate -#include #include #include @@ -396,6 +395,29 @@ namespace op } } + template + std::string Array::printSize() const + { + try + { + auto counter = 0u; + std::string sizeString = "[ "; + for (const auto& i : mSize) + { + sizeString += std::to_string(i); + if (++counter < mSize.size()) + sizeString += " x "; + } + sizeString += " ]"; + return sizeString; + } + catch (const std::exception& e) + { + error(e.what(), __LINE__, __FUNCTION__, __FILE__); + return ""; + } + } + template int Array::getIndex(const std::vector& indexes) const { diff --git a/src/openpose/core/maximumBase.cpp b/src/openpose/core/maximumBase.cpp index eded94480..38908ac16 100644 --- a/src/openpose/core/maximumBase.cpp +++ b/src/openpose/core/maximumBase.cpp @@ -9,51 +9,45 @@ namespace op { try { - UNUSED(targetPtr); - UNUSED(sourcePtr); - UNUSED(targetSize); - UNUSED(sourceSize); - error("CPU version not completely implemented.", __LINE__, __FUNCTION__, __FILE__); - // // TODO: ideally done, try, debug & compare to *.cu - // TODO: (maybe): remove thrust dependencies for computers without CUDA? - // const auto height = sourceSize[2]; - // const auto width = sourceSize[3]; - // const auto imageOffset = height * width; - // const auto num = targetSize[0]; - // const auto channels = targetSize[1]; - // const auto numberParts = targetSize[2]; - // const auto numberSubparts = targetSize[3]; + const auto height = sourceSize[2]; + const auto width = sourceSize[3]; + const auto imageOffset = height * width; + const auto num = targetSize[0]; + const auto channels = targetSize[1]; + const auto numberParts = targetSize[2]; + const auto numberSubparts = targetSize[3]; + + // log("sourceSize[0]: " + std::to_string(sourceSize[0])); // = 1 + // log("sourceSize[1]: " + std::to_string(sourceSize[1])); // = #body_parts+bck=22(hands) or 71(face) + // log("sourceSize[2]: " + std::to_string(sourceSize[2])); // = 368 = height + // log("sourceSize[3]: " + std::to_string(sourceSize[3])); // = 368 = width + // log("targetSize[0]: " + std::to_string(targetSize[0])); // = 1 + // log("targetSize[1]: " + std::to_string(targetSize[1])); // = 1 + // log("targetSize[2]: " + std::to_string(targetSize[2])); // = 21(hands) or 70 (face) + // log("targetSize[3]: " + std::to_string(targetSize[3])); // = 3 = [x, y, score] + // log(" "); - // // log("sourceSize[0]: " + std::to_string(sourceSize[0])); // = 1 - // // log("sourceSize[1]: " + std::to_string(sourceSize[1])); // = #body_parts+bck=22(hands) or 71(face) - // // log("sourceSize[2]: " + std::to_string(sourceSize[2])); // = 368 = height - // // log("sourceSize[3]: " + std::to_string(sourceSize[3])); // = 368 = width - // // log("targetSize[0]: " + std::to_string(targetSize[0])); // = 1 - // // log("targetSize[1]: " + std::to_string(targetSize[1])); // = 1 - // // log("targetSize[2]: " + std::to_string(targetSize[2])); // = 21(hands) or 70 (face) - // // log("targetSize[3]: " + std::to_string(targetSize[3])); // = 3 = [x, y, score] - // // log(" "); - // for (auto n = 0; n < num; n++) - // { - // for (auto c = 0; c < channels; c++) - // { - // // // Parameters - // const auto offsetChannel = (n * channels + c); - // for (auto part = 0; part < numberParts; part++) - // { - // auto* targetPtrOffsetted = targetPtr + (offsetChannel + part) * numberSubparts; - // const auto* const sourcePtrOffsetted = sourcePtr + (offsetChannel + part) * imageOffset; - // // Option a - 6.3 fps - // const auto sourceIndexIterator = thrust::max_element(thrust::host, sourcePtrOffsetted, - // sourcePtrOffsetted + imageOffset); - // const auto sourceIndex = (int)(sourceIndexIterator - sourcePtrOffsetted); - // targetPtrOffsetted[0] = sourceIndex % width; - // targetPtrOffsetted[1] = sourceIndex / width; - // targetPtrOffsetted[2] = sourcePtrOffsetted[sourceIndex]; - // } - // } - // } + for (auto n = 0; n < num; n++) + { + for (auto c = 0; c < channels; c++) + { + // Parameters + const auto offsetChannel = (n * channels + c); + for (auto part = 0; part < numberParts; part++) + { + auto* targetPtrOffsetted = targetPtr + (offsetChannel + part) * numberSubparts; + const auto* const sourcePtrOffsetted = sourcePtr + (offsetChannel + part) * imageOffset; + cv::Mat source(cv::Size(width, height), CV_32FC1, const_cast(sourcePtrOffsetted)); + double minVal, maxVal; + cv::Point minLoc, maxLoc; + cv::minMaxLoc(source, &minVal, &maxVal, &minLoc, &maxLoc); + targetPtrOffsetted[0] = maxLoc.x; + targetPtrOffsetted[1] = maxLoc.y; + targetPtrOffsetted[2] = maxVal; + } + } + } } catch (const std::exception& e) { diff --git a/src/openpose/core/netCaffe.cpp b/src/openpose/core/netCaffe.cpp index 00fd4c30a..1452c41f2 100644 --- a/src/openpose/core/netCaffe.cpp +++ b/src/openpose/core/netCaffe.cpp @@ -64,7 +64,9 @@ namespace op { caffeNet->blobs()[0]->Reshape(dimensions); caffeNet->Reshape(); - cudaCheck(__LINE__, __FUNCTION__, __FILE__); + #ifdef USE_CUDA + cudaCheck(__LINE__, __FUNCTION__, __FILE__); + #endif } catch (const std::exception& e) { @@ -116,13 +118,17 @@ namespace op #endif upImpl->upCaffeNet.reset(new caffe::Net{upImpl->mCaffeProto, caffe::TEST}); upImpl->upCaffeNet->CopyTrainedLayersFrom(upImpl->mCaffeTrainedModel); - cudaCheck(__LINE__, __FUNCTION__, __FILE__); + #ifdef USE_CUDA + cudaCheck(__LINE__, __FUNCTION__, __FILE__); + #endif // Set spOutputBlob upImpl->spOutputBlob = upImpl->upCaffeNet->blob_by_name(upImpl->mLastBlobName); if (upImpl->spOutputBlob == nullptr) error("The output blob is a nullptr. Did you use the same name than the prototxt? (Used: " + upImpl->mLastBlobName + ").", __LINE__, __FUNCTION__, __FILE__); - cudaCheck(__LINE__, __FUNCTION__, __FILE__); + #ifdef USE_CUDA + cudaCheck(__LINE__, __FUNCTION__, __FILE__); + #endif #endif } catch (const std::exception& e) diff --git a/src/openpose/core/nmsBase.cpp b/src/openpose/core/nmsBase.cpp index db9995a32..74b4a3b0f 100644 --- a/src/openpose/core/nmsBase.cpp +++ b/src/openpose/core/nmsBase.cpp @@ -1,58 +1,164 @@ #include +#include namespace op { template - void nmsCpu(T* targetPtr, int* kernelPtr, const T* const sourcePtr, const T threshold, const std::array& targetSize, const std::array& sourceSize) + void nmsRegisterKernelCPU(int* kernelPtr, const T* const sourcePtr, const int w, const int h, + const T& threshold, const int x, const int y) + { + // We have three scenarios for NMS, one for the border, 1 for the 1st inner border, and + // 1 for the rest. cv::resize adds artifacts around the 1st inner border, causing two + // maximas to occur side by side. Eg. [1 1 0.8 0.8 0.5 ..]. The CUDA kernel gives + // [0.8 1 0.8 0.8 0.5 ..] Hence for this special case in the 1st inner border, we look at the + // visible regions. + + const auto index = y*w + x; + if (1 < x && x < (w-2) && 1 < y && y < (h-2)) + { + const auto value = sourcePtr[index]; + if (value > threshold) + { + const auto topLeft = sourcePtr[(y-1)*w + x-1]; + const auto top = sourcePtr[(y-1)*w + x]; + const auto topRight = sourcePtr[(y-1)*w + x+1]; + const auto left = sourcePtr[ y*w + x-1]; + const auto right = sourcePtr[ y*w + x+1]; + const auto bottomLeft = sourcePtr[(y+1)*w + x-1]; + const auto bottom = sourcePtr[(y+1)*w + x]; + const auto bottomRight = sourcePtr[(y+1)*w + x+1]; + + if (value > topLeft && value > top && value > topRight + && value > left && value > right + && value > bottomLeft && value > bottom && value > bottomRight) + kernelPtr[index] = 1; + else + kernelPtr[index] = 0; + } + else + kernelPtr[index] = 0; + } + else if (x == 1 || x == (w-2) || y == 1 || y == (h-2)) + { + //kernelPtr[index] = 0; + const auto value = sourcePtr[index]; + if (value > threshold) + { + const auto topLeft = ((0 < x && 0 < y) ? sourcePtr[(y-1)*w + x-1] : threshold); + const auto top = (0 < y ? sourcePtr[(y-1)*w + x] : threshold); + const auto topRight = ((0 < y && x < (w-1)) ? sourcePtr[(y-1)*w + x+1] : threshold); + const auto left = (0 < x ? sourcePtr[ y*w + x-1] : threshold); + const auto right = (x < (w-1) ? sourcePtr[y*w + x+1] : threshold); + const auto bottomLeft = ((y < (h-1) && 0 < x) ? sourcePtr[(y+1)*w + x-1] : threshold); + const auto bottom = (y < (h-1) ? sourcePtr[(y+1)*w + x] : threshold); + const auto bottomRight = ((x < (w-1) && y < (h-1)) ? sourcePtr[(y+1)*w + x+1] : threshold); + + if (value >= topLeft && value >= top && value >= topRight + && value >= left && value >= right + && value >= bottomLeft && value >= bottom && value >= bottomRight) + kernelPtr[index] = 1; + else + kernelPtr[index] = 0; + } + else + kernelPtr[index] = 0; + } + else + kernelPtr[index] = 0; + } + + template + void nmsAccuratePeakPosition(const T* const sourcePtr, const int& peakLocX, const int& peakLocY, + const int& width, const int& height, T* output) + { + T xAcc = 0.f; + T yAcc = 0.f; + T scoreAcc = 0.f; + const auto dWidth = 3; + const auto dHeight = 3; + for (auto dy = -dHeight ; dy <= dHeight ; dy++) + { + const auto y = peakLocY + dy; + if (0 <= y && y < height) // Default height = 368 + { + for (auto dx = -dWidth ; dx <= dWidth ; dx++) + { + const auto x = peakLocX + dx; + if (0 <= x && x < width) // Default width = 656 + { + const auto score = sourcePtr[y * width + x]; + if (score > 0) + { + xAcc += x*score; + yAcc += y*score; + scoreAcc += score; + } + } + } + } + } + + output[0] = xAcc / scoreAcc; + output[1] = yAcc / scoreAcc; + output[2] = sourcePtr[peakLocY*width + peakLocX]; + } + + template + void nmsCpu(T* targetPtr, int* kernelPtr, const T* const sourcePtr, const T threshold, + const std::array& targetSize, const std::array& sourceSize) { try { - UNUSED(targetPtr); - UNUSED(kernelPtr); - UNUSED(sourcePtr); - UNUSED(threshold); - UNUSED(targetSize); - UNUSED(sourceSize); - error("CPU version not completely implemented.", __LINE__, __FUNCTION__, __FILE__); + // Security checks + if (sourceSize.empty()) + error("sourceSize cannot be empty.", __LINE__, __FUNCTION__, __FILE__); + if (targetSize.empty()) + error("targetSize cannot be empty.", __LINE__, __FUNCTION__, __FILE__); + if (threshold < 0 || threshold > 1.0) + error("threshold value invalid.", __LINE__, __FUNCTION__, __FILE__); + + // Params + const auto channels = targetSize[1]; // 57 + const auto sourceHeight = sourceSize[2]; // 368 + const auto sourceWidth = sourceSize[3]; // 496 + const auto targetPeaks = targetSize[2]; // 97 + const auto targetPeakVec = targetSize[3]; // 3 + const auto sourceChannelOffset = sourceWidth * sourceHeight; + const auto targetChannelOffset = targetPeaks * targetPeakVec; + + // Per channel operation + for (auto c = 0 ; c < channels ; c++) + { + auto* currKernelPtr = &kernelPtr[c*sourceChannelOffset]; + const T* currSourcePtr = &sourcePtr[c*sourceChannelOffset]; - // TODO: THIS CODE IS WORKING, BUT IT DOES NOT CONSIDER THE MAX NUMBER OF PEAKS - // const int num = bottom->shape(0); - // //const int channel = bottom->shape(1); - // const int oriSpatialHeight = bottom->shape(2); - // const int oriSpatialWidth = bottom->shape(3); + for (auto y = 0; y < sourceHeight; y++) + for (auto x = 0; x < sourceWidth; x++) + nmsRegisterKernelCPU(currKernelPtr, currSourcePtr, sourceWidth, sourceHeight, threshold, x, y); - // T* dst_pointer = top->mutable_cpu_data(); - // const T* const src_pointer = bottom->cpu_data(); - // const int offset2 = oriSpatialHeight * oriSpatialWidth; - // const int offset2_dst = (mMaxPeaks+1)*2; + auto currentPeakCount = 1; + auto* currTargetPtr = &targetPtr[c*targetChannelOffset]; + for (auto y = 0; y < sourceHeight; y++) + { + for (auto x = 0; x < sourceWidth; x++) + { + const auto index = y*sourceWidth + x; + // Find high intensity points + if (currentPeakCount < targetPeaks) + { + if (currKernelPtr[index] == 1) + { + // Accurate Peak Position + nmsAccuratePeakPosition(currSourcePtr, x, y, sourceWidth, sourceHeight, + &currTargetPtr[currentPeakCount*3]); + currentPeakCount++; + } + } - //stupid method - // for (int n = 0; n < num; n++) - // { - // //assume only one channel - // int peakCount = 0; - // for (int y = 0; y < oriSpatialHeight; y++) - // { - // for (int x = 0; x < oriSpatialWidth; x++) - // { - // const T value = src_pointer[n * offset2 + y*oriSpatialWidth + x]; - // if (value >= mThreshold) - // { - // const T top = (y == 0) ? 0 : src_pointer[n * offset2 + (y-1)*oriSpatialWidth + x]; - // const T bottom = (y == oriSpatialHeight - 1) ? 0 : src_pointer[n * offset2 + (y+1)*oriSpatialWidth + x]; - // const T left = (x == 0) ? 0 : src_pointer[n * offset2 + y*oriSpatialWidth + (x-1)]; - // const T right = (x == oriSpatialWidth - 1) ? 0 : src_pointer[n * offset2 + y*oriSpatialWidth + (x+1)]; - // if (value > top && value > bottom && value > left && value > right) - // { - // dst_pointer[n*offset2_dst + (peakCount + 1) * 2] = x; - // dst_pointer[n*offset2_dst + (peakCount + 1) * 2 + 1] = y; - // peakCount++; - // } - // } - // } - // } - // dst_pointer[n*offset2_dst] = peakCount; - // } + } + } + currTargetPtr[0] = currentPeakCount-1; + } } catch (const std::exception& e) { @@ -60,6 +166,8 @@ namespace op } } - template void nmsCpu(float* targetPtr, int* kernelPtr, const float* const sourcePtr, const float threshold, const std::array& targetSize, const std::array& sourceSize); - template void nmsCpu(double* targetPtr, int* kernelPtr, const double* const sourcePtr, const double threshold, const std::array& targetSize, const std::array& sourceSize); + template void nmsCpu(float* targetPtr, int* kernelPtr, const float* const sourcePtr, const float threshold, + const std::array& targetSize, const std::array& sourceSize); + template void nmsCpu(double* targetPtr, int* kernelPtr, const double* const sourcePtr, const double threshold, + const std::array& targetSize, const std::array& sourceSize); } diff --git a/src/openpose/core/point.cpp b/src/openpose/core/point.cpp index dcf42dbd2..a2398b638 100644 --- a/src/openpose/core/point.cpp +++ b/src/openpose/core/point.cpp @@ -1,6 +1,6 @@ -#include #include #include +#include namespace op { diff --git a/src/openpose/core/rectangle.cpp b/src/openpose/core/rectangle.cpp index 48938fc63..29670f9cc 100644 --- a/src/openpose/core/rectangle.cpp +++ b/src/openpose/core/rectangle.cpp @@ -1,4 +1,3 @@ -#include #include #include diff --git a/src/openpose/core/resizeAndMergeBase.cpp b/src/openpose/core/resizeAndMergeBase.cpp index 18da77fbb..bdbfa08ec 100644 --- a/src/openpose/core/resizeAndMergeBase.cpp +++ b/src/openpose/core/resizeAndMergeBase.cpp @@ -1,4 +1,7 @@ -// #include +#include +#include +#include +#include #include namespace op @@ -11,50 +14,98 @@ namespace op { try { - UNUSED(targetPtr); - UNUSED(sourcePtrs); - UNUSED(scaleInputToNetInputs); - UNUSED(targetSize); - UNUSED(sourceSizes); - error("CPU version not completely implemented.", __LINE__, __FUNCTION__, __FILE__); + // Security checks + if (sourceSizes.empty()) + error("sourceSizes cannot be empty.", __LINE__, __FUNCTION__, __FILE__); + if (sourcePtrs.size() != sourceSizes.size() || sourceSizes.size() != scaleInputToNetInputs.size()) + error("Size(sourcePtrs) must match size(sourceSizes) and size(scaleInputToNetInputs). Currently: " + + std::to_string(sourcePtrs.size()) + " vs. " + std::to_string(sourceSizes.size()) + " vs. " + + std::to_string(scaleInputToNetInputs.size()) + ".", __LINE__, __FUNCTION__, __FILE__); - // TODO: THIS CODE IS WORKING, BUT IT DOES NOT CONSIDER THE SCALES (I.E. SCALE NUMBER, START AND GAP) - // const int num = bottom->shape(0); - // const int channel = bottom->shape(1); - // const int sourceHeight = bottom->shape(2); - // const int sourceWidth = bottom->shape(3); - // const int targetHeight = top->shape(2); - // const int targetWidth = top->shape(3); + // Params + const auto nums = (signed)sourceSizes.size(); + const auto channels = targetSize[1]; // 57 + const auto targetHeight = targetSize[2]; // 368 + const auto targetWidth = targetSize[3]; // 496 + const auto targetChannelOffset = targetWidth * targetHeight; - // //stupid method - // for (int n = 0; n < num; n++) - // { - // for (int c = 0; c < channel; c++) - // { - // //fill source - // cv::Mat source(sourceWidth, sourceHeight, CV_32FC1); - // const auto sourceOffsetChannel = sourceHeight * sourceWidth; - // const auto sourceOffsetNum = sourceOffsetChannel * channel; - // const auto sourceOffset = n*sourceOffsetNum + c*sourceOffsetChannel; - // const T* const sourcePtrs = bottom->cpu_data(); - // for (int y = 0; y < sourceHeight; y++) - // for (int x = 0; x < sourceWidth; x++) - // source.at(x,y) = sourcePtrs[sourceOffset + y*sourceWidth + x]; + // No multi-scale merging or no merging required + if (sourceSizes.size() == 1) + { + // Params + const auto& sourceSize = sourceSizes[0]; + const auto sourceHeight = sourceSize[2]; // 368/8 .. + const auto sourceWidth = sourceSize[3]; // 496/8 .. + const auto sourceChannelOffset = sourceHeight * sourceWidth; + if (sourceSize[0] != 1) + error("It should never reache this point. Notify us otherwise.", + __LINE__, __FUNCTION__, __FILE__); - // // spatial resize - // cv::Mat target; - // cv::resize(source, target, {targetWidth, targetHeight}, 0, 0, CV_INTER_CUBIC); + // Per channel resize + const T* sourcePtr = sourcePtrs[0]; + for (auto c = 0 ; c < channels ; c++) + { + cv::Mat source(cv::Size(sourceWidth, sourceHeight), CV_32FC1, + const_cast(&sourcePtr[c*sourceChannelOffset])); + cv::Mat target(cv::Size(targetWidth, targetHeight), CV_32FC1, + (&targetPtr[c*targetChannelOffset])); + cv::resize(source, target, {targetWidth, targetHeight}, 0, 0, CV_INTER_CUBIC); + } + } + // Multi-scale merging + else + { + // Construct temp targets. We resuse targetPtr to store first scale + std::vector> tempTargetPtrs; + for (auto n = 1; n < nums; n++){ + tempTargetPtrs.emplace_back(std::unique_ptr(new T[targetChannelOffset * channels]())); + } - // //fill top - // const auto targetOffsetChannel = targetHeight * targetWidth; - // const auto targetOffsetNum = targetOffsetChannel * channel; - // const auto targetOffset = n*targetOffsetNum + c*targetOffsetChannel; - // T* targetPtr = top->mutable_cpu_data(); - // for (int y = 0; y < targetHeight; y++) - // for (int x = 0; x < targetWidth; x++) - // targetPtr[targetOffset + y*targetWidth + x] = target.at(x,y); - // } - // } + // Resize and sum + for (auto n = 0; n < nums; n++){ + + // Params + const auto& sourceSize = sourceSizes[n]; + const auto sourceHeight = sourceSize[2]; // 368/6 .. + const auto sourceWidth = sourceSize[3]; // 496/8 .. + const auto sourceChannelOffset = sourceHeight * sourceWidth; + + // Access pointers + const T* sourcePtr = sourcePtrs[n]; + T* tempTargetPtr; + if(n != 0) + tempTargetPtr = tempTargetPtrs[n-1].get(); + else + tempTargetPtr = targetPtr; + + T* firstTempTargetPtr = targetPtr; + for (auto c = 0 ; c < channels ; c++) + { + // Resize + cv::Mat source(cv::Size(sourceWidth, sourceHeight), CV_32FC1, + const_cast(&sourcePtr[c*sourceChannelOffset])); + cv::Mat target(cv::Size(targetWidth, targetHeight), CV_32FC1, + (&tempTargetPtr[c*targetChannelOffset])); + cv::resize(source, target, {targetWidth, targetHeight}, 0, 0, CV_INTER_CUBIC); + + // Add + if (n != 0) + { + cv::Mat addTarget(cv::Size(targetWidth, targetHeight), CV_32FC1, + (&firstTempTargetPtr[c*targetChannelOffset])); + cv::add(target, addTarget, addTarget); + } + } + } + + // Average + for (auto c = 0 ; c < channels ; c++) + { + cv::Mat target(cv::Size(targetWidth, targetHeight), CV_32FC1, (&targetPtr[c*targetChannelOffset])); + target /= (float)nums; + } + + } } catch (const std::exception& e) { diff --git a/src/openpose/face/CMakeLists.txt b/src/openpose/face/CMakeLists.txt index 421e59df7..d8a3625fb 100644 --- a/src/openpose/face/CMakeLists.txt +++ b/src/openpose/face/CMakeLists.txt @@ -15,7 +15,11 @@ set(SOURCES_OP_FACE_WITH_CP ${SOURCES_OP_FACE_WITH_CP} PARENT_SCOPE) set(SOURCES_OPENPOSE ${SOURCES_OPENPOSE} ${SOURCES_OP_FACE_WITH_CP} PARENT_SCOPE) if (UNIX AND NOT APPLE) - cuda_add_library(openpose_face ${SOURCES_OP_FACE}) + if (${GPU_MODE} MATCHES "CUDA") + cuda_add_library(openpose_face ${SOURCES_OP_FACE}) + else() + add_library(openpose_face ${SOURCES_OP_FACE}) + endif () if (BUILD_CAFFE) add_dependencies(openpose_face openpose_caffe) diff --git a/src/openpose/face/faceExtractorCaffe.cpp b/src/openpose/face/faceExtractorCaffe.cpp index 372ff4e1b..236a8e501 100644 --- a/src/openpose/face/faceExtractorCaffe.cpp +++ b/src/openpose/face/faceExtractorCaffe.cpp @@ -1,4 +1,4 @@ -#if defined USE_CAFFE && defined USE_CUDA +#if defined USE_CAFFE #include #endif #include // CV_WARP_INVERSE_MAP, CV_INTER_LINEAR @@ -15,7 +15,7 @@ namespace op { struct FaceExtractorCaffe::ImplFaceExtractorCaffe { - #if defined USE_CAFFE && defined USE_CUDA + #if defined USE_CAFFE bool netInitialized; std::shared_ptr spNetCaffe; std::shared_ptr> spResizeAndMergeCaffe; @@ -36,7 +36,7 @@ namespace op #endif }; - #if defined USE_CAFFE && defined USE_CUDA + #if defined USE_CAFFE void updateFaceHeatMapsForPerson(Array& heatMaps, const int person, const ScaleMode heatMapScaleMode, const float* heatMapsGpuPtr) { @@ -47,8 +47,13 @@ namespace op const auto volumeBodyParts = FACE_NUMBER_PARTS * channelOffset; auto totalOffset = 0u; auto* heatMapsPtr = &heatMaps.getPtr()[person*volumeBodyParts]; - // Copy face parts - cudaMemcpy(heatMapsPtr, heatMapsGpuPtr, volumeBodyParts * sizeof(float), cudaMemcpyDeviceToHost); + // Copy face parts + #ifdef USE_CUDA + cudaMemcpy(heatMapsPtr, heatMapsGpuPtr, volumeBodyParts * sizeof(float), cudaMemcpyDeviceToHost); + #else + //std::memcpy(heatMapsPtr, heatMapsGpuPtr, volumeBodyParts * sizeof(float)); + std::copy(heatMapsGpuPtr, heatMapsGpuPtr + volumeBodyParts, heatMapsPtr); + #endif // Change from [0,1] to [-1,1] if (heatMapScaleMode == ScaleMode::PlusMinusOne) for (auto i = 0u ; i < volumeBodyParts ; i++) @@ -84,7 +89,9 @@ namespace op // Pose extractor blob and layer maximumCaffe->Reshape({heatMapsBlob.get()}, {peaksBlob.get()}); // Cuda check - cudaCheck(__LINE__, __FUNCTION__, __FILE__); + #ifdef USE_CUDA + cudaCheck(__LINE__, __FUNCTION__, __FILE__); + #endif } catch (const std::exception& e) { @@ -98,13 +105,13 @@ namespace op const std::vector& heatMapTypes, const ScaleMode heatMapScale, const bool enableGoogleLogging) : FaceExtractor{netInputSize, netOutputSize, heatMapTypes, heatMapScale} - #if defined USE_CAFFE && defined USE_CUDA + #if defined USE_CAFFE , upImpl{new ImplFaceExtractorCaffe{modelFolder, gpuId, enableGoogleLogging}} #endif { try { - #if !defined USE_CAFFE || !defined USE_CUDA + #if !defined USE_CAFFE UNUSED(netInputSize); UNUSED(netOutputSize); UNUSED(modelFolder); @@ -129,17 +136,21 @@ namespace op { try { - #if defined USE_CAFFE && defined USE_CUDA + #if defined USE_CAFFE // Logging log("Starting initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__); // Initialize Caffe net upImpl->spNetCaffe->initializationOnThread(); - cudaCheck(__LINE__, __FUNCTION__, __FILE__); + #ifdef USE_CUDA + cudaCheck(__LINE__, __FUNCTION__, __FILE__); + #endif // Initialize blobs upImpl->spCaffeNetOutputBlob = upImpl->spNetCaffe->getOutputBlob(); upImpl->spHeatMapsBlob = {std::make_shared>(1,1,1,1)}; upImpl->spPeaksBlob = {std::make_shared>(1,1,1,1)}; - cudaCheck(__LINE__, __FUNCTION__, __FILE__); + #ifdef USE_CUDA + cudaCheck(__LINE__, __FUNCTION__, __FILE__); + #endif // Logging log("Finished initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__); #endif @@ -156,7 +167,7 @@ namespace op { try { - #if defined USE_CAFFE && defined USE_CUDA + #if defined USE_CAFFE if (!faceRectangles.empty()) { // Security checks @@ -273,9 +284,15 @@ namespace op mFaceKeypoints[baseIndex+2] = score; } // HeatMaps: storing - if (!mHeatMapTypes.empty()) - updateFaceHeatMapsForPerson(mHeatMaps, person, mHeatMapScaleMode, - upImpl->spHeatMapsBlob->gpu_data()); + if (!mHeatMapTypes.empty()){ + #ifdef USE_CUDA + updateFaceHeatMapsForPerson(mHeatMaps, person, mHeatMapScaleMode, + upImpl->spHeatMapsBlob->gpu_data()); + #else + updateFaceHeatMapsForPerson(mHeatMaps, person, mHeatMapScaleMode, + upImpl->spHeatMapsBlob->cpu_data()); + #endif + } } } // // Debugging diff --git a/src/openpose/hand/CMakeLists.txt b/src/openpose/hand/CMakeLists.txt index 52b95c2aa..59d7dd4d1 100644 --- a/src/openpose/hand/CMakeLists.txt +++ b/src/openpose/hand/CMakeLists.txt @@ -15,7 +15,11 @@ set(SOURCES_OP_HAND_WITH_CP ${SOURCES_OP_HAND_WITH_CP} PARENT_SCOPE) set(SOURCES_OPENPOSE ${SOURCES_OPENPOSE} ${SOURCES_OP_HAND_WITH_CP} PARENT_SCOPE) if (UNIX AND NOT APPLE) - cuda_add_library(openpose_hand ${SOURCES_OP_HAND}) + if (${GPU_MODE} MATCHES "CUDA") + cuda_add_library(openpose_hand ${SOURCES_OP_HAND}) + else() + add_library(openpose_hand ${SOURCES_OP_HAND}) + endif () if (BUILD_CAFFE) add_dependencies(openpose_hand openpose_caffe) diff --git a/src/openpose/hand/handExtractorCaffe.cpp b/src/openpose/hand/handExtractorCaffe.cpp index 26ff909a6..c0d2a466b 100644 --- a/src/openpose/hand/handExtractorCaffe.cpp +++ b/src/openpose/hand/handExtractorCaffe.cpp @@ -1,4 +1,4 @@ -#if defined USE_CAFFE && defined USE_CUDA +#if defined USE_CAFFE #include #endif #include // CV_WARP_INVERSE_MAP, CV_INTER_LINEAR @@ -16,7 +16,7 @@ namespace op { struct HandExtractorCaffe::ImplHandExtractorCaffe { - #if defined USE_CAFFE && defined USE_CUDA + #if defined USE_CAFFE bool netInitialized; std::shared_ptr spNetCaffe; std::shared_ptr> spResizeAndMergeCaffe; @@ -38,7 +38,7 @@ namespace op #endif }; - #if defined USE_CAFFE && defined USE_CUDA + #if defined USE_CAFFE void cropFrame(Array& handImageCrop, cv::Mat& affineMatrix, const cv::Mat& cvInputData, const Rectangle& handRectangle, const int netInputSide, const Point& netOutputSize, const bool mirrorImage) @@ -134,7 +134,12 @@ namespace op auto totalOffset = 0u; auto* heatMapsPtr = &heatMaps.getPtr()[person*volumeBodyParts]; // Copy hand parts - cudaMemcpy(heatMapsPtr, heatMapsGpuPtr, volumeBodyParts * sizeof(float), cudaMemcpyDeviceToHost); + #ifdef USE_CUDA + cudaMemcpy(heatMapsPtr, heatMapsGpuPtr, volumeBodyParts * sizeof(float), cudaMemcpyDeviceToHost); + #else + //std::memcpy(heatMapsPtr, heatMapsGpuPtr, volumeBodyParts * sizeof(float)); + std::copy(heatMapsGpuPtr, heatMapsGpuPtr + volumeBodyParts, heatMapsPtr); + #endif // Change from [0,1] to [-1,1] if (heatMapScaleMode == ScaleMode::PlusMinusOne) for (auto i = 0u ; i < volumeBodyParts ; i++) @@ -170,7 +175,9 @@ namespace op // Pose extractor blob and layer maximumCaffe->Reshape({heatMapsBlob.get()}, {peaksBlob.get()}); // Cuda check - cudaCheck(__LINE__, __FUNCTION__, __FILE__); + #ifdef USE_CUDA + cudaCheck(__LINE__, __FUNCTION__, __FILE__); + #endif } catch (const std::exception& e) { @@ -186,13 +193,13 @@ namespace op const ScaleMode heatMapScale, const bool enableGoogleLogging) : HandExtractor{netInputSize, netOutputSize, numberScales, rangeScales, heatMapTypes, heatMapScale} - #if defined USE_CAFFE && defined USE_CUDA + #if defined USE_CAFFE , upImpl{new ImplHandExtractorCaffe{modelFolder, gpuId, enableGoogleLogging}} #endif { try { - #if !defined USE_CAFFE || !defined USE_CUDA + #if !defined USE_CAFFE UNUSED(netInputSize); UNUSED(netOutputSize); UNUSED(modelFolder); @@ -219,17 +226,21 @@ namespace op { try { - #if defined USE_CAFFE && defined USE_CUDA + #if defined USE_CAFFE // Logging log("Starting initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__); // Initialize Caffe net upImpl->spNetCaffe->initializationOnThread(); - cudaCheck(__LINE__, __FUNCTION__, __FILE__); + #if defined USE_CUDA + cudaCheck(__LINE__, __FUNCTION__, __FILE__); + #endif // Initialize blobs upImpl->spCaffeNetOutputBlob = upImpl->spNetCaffe->getOutputBlob(); upImpl->spHeatMapsBlob = {std::make_shared>(1,1,1,1)}; upImpl->spPeaksBlob = {std::make_shared>(1,1,1,1)}; - cudaCheck(__LINE__, __FUNCTION__, __FILE__); + #if defined USE_CUDA + cudaCheck(__LINE__, __FUNCTION__, __FILE__); + #endif // Logging log("Finished initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__); #endif @@ -246,7 +257,7 @@ namespace op { try { - #if defined USE_CAFFE && defined USE_CUDA + #if defined USE_CAFFE if (!handRectangles.empty()) { // Security checks @@ -353,9 +364,15 @@ namespace op } } // HeatMaps: storing - if (!mHeatMapTypes.empty()) - updateHandHeatMapsForPerson(mHeatMaps[hand], person, mHeatMapScaleMode, - upImpl->spHeatMapsBlob->gpu_data()); + if (!mHeatMapTypes.empty()){ + #ifdef USE_CUDA + updateHandHeatMapsForPerson(mHeatMaps[hand], person, mHeatMapScaleMode, + upImpl->spHeatMapsBlob->gpu_data()); + #else + updateHandHeatMapsForPerson(mHeatMaps[hand], person, mHeatMapScaleMode, + upImpl->spHeatMapsBlob->cpu_data()); + #endif + } } } } @@ -384,7 +401,7 @@ namespace op { try { - #if defined USE_CAFFE && defined USE_CUDA + #if defined USE_CAFFE // 1. Deep net upImpl->spNetCaffe->forwardPass(mHandImageCrop); diff --git a/src/openpose/pose/CMakeLists.txt b/src/openpose/pose/CMakeLists.txt index 684ecfddd..df4fcf60e 100644 --- a/src/openpose/pose/CMakeLists.txt +++ b/src/openpose/pose/CMakeLists.txt @@ -19,7 +19,11 @@ set(SOURCES_OP_POSE_WITH_CP ${SOURCES_OP_POSE_WITH_CP} PARENT_SCOPE) set(SOURCES_OPENPOSE ${SOURCES_OPENPOSE} ${SOURCES_OP_POSE_WITH_CP} PARENT_SCOPE) if (UNIX AND NOT APPLE) - cuda_add_library(openpose_pose ${SOURCES_OP_POSE}) + if (${GPU_MODE} MATCHES "CUDA") + cuda_add_library(openpose_pose ${SOURCES_OP_POSE}) + else() + add_library(openpose_pose ${SOURCES_OP_POSE}) + endif () if (BUILD_CAFFE) add_dependencies(openpose_pose openpose_caffe) diff --git a/src/openpose/pose/poseExtractorCaffe.cpp b/src/openpose/pose/poseExtractorCaffe.cpp index db9e2ad64..f03c4c2df 100644 --- a/src/openpose/pose/poseExtractorCaffe.cpp +++ b/src/openpose/pose/poseExtractorCaffe.cpp @@ -80,6 +80,7 @@ namespace op try { // HeatMaps extractor blob and layer + // Caffe modifies bottom - Heatmap gets resized const auto caffeNetOutputBlobs = caffeNetSharedToPtr(caffeNetOutputBlob); resizeAndMergeCaffe->Reshape(caffeNetOutputBlobs, {heatMapsBlob.get()}, getPoseNetDecreaseFactor(poseModel), 1.f/scaleInputToNetInput); @@ -227,6 +228,7 @@ namespace op // Reshape blobs if required // Note: In order to resize to input size to have same results as Matlab, uncomment the commented // lines + // Note: For dynamic sizes (e.g. a folder with images of different aspect ratio) if (!vectorsAreEqual(upImpl->mNetInput4DSizes.at(i), inputNetData[i].getSize())) // || !vectorsAreEqual(upImpl->mScaleInputToNetInputs, scaleInputToNetInputs)) { @@ -247,11 +249,9 @@ namespace op const std::vector floatScaleRatios(scaleInputToNetInputs.begin(), scaleInputToNetInputs.end()); upImpl->spResizeAndMergeCaffe->setScaleRatios(floatScaleRatios); #ifdef USE_CUDA - upImpl->spResizeAndMergeCaffe->Forward_gpu(caffeNetOutputBlobs, // ~5ms - {upImpl->spHeatMapsBlob.get()}); - cudaCheck(__LINE__, __FUNCTION__, __FILE__); + upImpl->spResizeAndMergeCaffe->Forward_gpu(caffeNetOutputBlobs, {upImpl->spHeatMapsBlob.get()}); // ~5ms #else - error("ResizeAndMergeCaffe CPU version not implemented yet.", __LINE__, __FUNCTION__, __FILE__); + upImpl->spResizeAndMergeCaffe->Forward_cpu(caffeNetOutputBlobs, {upImpl->spHeatMapsBlob.get()}); // ~20ms #endif // 3. Get peaks by Non-Maximum Suppression @@ -260,7 +260,7 @@ namespace op upImpl->spNmsCaffe->Forward_gpu({upImpl->spHeatMapsBlob.get()}, {upImpl->spPeaksBlob.get()});// ~2ms cudaCheck(__LINE__, __FUNCTION__, __FILE__); #else - error("NmsCaffe CPU version not implemented yet.", __LINE__, __FUNCTION__, __FILE__); + upImpl->spNmsCaffe->Forward_cpu({upImpl->spHeatMapsBlob.get()}, {upImpl->spPeaksBlob.get()}); // ~ 7ms #endif // Get scale net to output (i.e. image input) diff --git a/src/openpose/utilities/CMakeLists.txt b/src/openpose/utilities/CMakeLists.txt index 20256741c..08e8e7cc0 100644 --- a/src/openpose/utilities/CMakeLists.txt +++ b/src/openpose/utilities/CMakeLists.txt @@ -14,7 +14,12 @@ set(SOURCES_OP_UTILITIES_WITH_CP ${SOURCES_OP_UTILITIES_WITH_CP} PARENT_SCOPE) set(SOURCES_OPENPOSE ${SOURCES_OPENPOSE} ${SOURCES_OP_UTILITIES_WITH_CP} PARENT_SCOPE) if (UNIX AND NOT APPLE) - cuda_add_library(openpose_utilities ${SOURCES_OP_UTILITIES}) + if (${GPU_MODE} MATCHES "CUDA") + cuda_add_library(openpose_utilities ${SOURCES_OP_UTILITIES}) + else() + add_library(openpose_utilities ${SOURCES_OP_UTILITIES}) + endif () + target_link_libraries(openpose_utilities openpose_producer openpose_filestream) install(TARGETS openpose_utilities