Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
Browse files Browse the repository at this point in the history
  • Loading branch information
alalek committed Jan 28, 2020
2 parents f856c96 + 223790e commit 560f85f
Show file tree
Hide file tree
Showing 40 changed files with 3,101 additions and 708 deletions.
16 changes: 0 additions & 16 deletions 3rdparty/carotene/hal/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,22 +60,6 @@ function(compile_carotene)
endif()

add_subdirectory("${CAROTENE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}/carotene")

if(ARM OR AARCH64)
if(CMAKE_BUILD_TYPE)
set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE})
endif()
check_cxx_compiler_flag("-mfpu=neon" CXX_HAS_MFPU_NEON)
check_c_compiler_flag("-mfpu=neon" C_HAS_MFPU_NEON)
if(${CXX_HAS_MFPU_NEON} AND ${C_HAS_MFPU_NEON} AND NOT "${CMAKE_CXX_FLAGS} " MATCHES "-mfpu=neon[^ ]*")
get_target_property(old_flags "carotene_objs" COMPILE_FLAGS)
if(old_flags)
set_target_properties("carotene_objs" PROPERTIES COMPILE_FLAGS "${old_flags} -mfpu=neon")
else()
set_target_properties("carotene_objs" PROPERTIES COMPILE_FLAGS "-mfpu=neon")
endif()
endif()
endif()
endfunction()

compile_carotene()
Expand Down
10 changes: 7 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -792,9 +792,13 @@ endif()

foreach(hal ${OpenCV_HAL})
if(hal STREQUAL "carotene")
add_subdirectory(3rdparty/carotene/hal)
ocv_hal_register(CAROTENE_HAL_LIBRARIES CAROTENE_HAL_HEADERS CAROTENE_HAL_INCLUDE_DIRS)
list(APPEND OpenCV_USED_HAL "carotene (ver ${CAROTENE_HAL_VERSION})")
if(";${CPU_BASELINE_FINAL};" MATCHES ";NEON;")
add_subdirectory(3rdparty/carotene/hal)
ocv_hal_register(CAROTENE_HAL_LIBRARIES CAROTENE_HAL_HEADERS CAROTENE_HAL_INCLUDE_DIRS)
list(APPEND OpenCV_USED_HAL "carotene (ver ${CAROTENE_HAL_VERSION})")
else()
message(STATUS "Carotene: NEON is not available, disabling carotene...")
endif()
elseif(hal STREQUAL "openvx")
add_subdirectory(3rdparty/openvx)
ocv_hal_register(OPENVX_HAL_LIBRARIES OPENVX_HAL_HEADERS OPENVX_HAL_INCLUDE_DIRS)
Expand Down
20 changes: 20 additions & 0 deletions modules/calib3d/misc/python/test/test_solvepnp.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,26 @@ def test_regression_16040_2(self):
obj_points, img_points, cameraMatrix, distCoeffs, reprojectionError=r
)

def test_regression_16049(self):
obj_points = np.array([[0, 0, 0], [0, 1, 0], [1, 1, 0], [1, 0, 0]], dtype=np.float32)
img_points = np.array(
[[[700, 400], [700, 600], [900, 600], [900, 400]]], dtype=np.float32
)

cameraMatrix = np.array(
[[712.0634, 0, 800], [0, 712.540, 500], [0, 0, 1]], dtype=np.float32
)
distCoeffs = np.array([[0, 0, 0, 0]], dtype=np.float32)
x, r, t, e = cv.solvePnPGeneric(
obj_points, img_points, cameraMatrix, distCoeffs
)
if e is None:
# noArray() is supported, see https://github.com/opencv/opencv/issues/16049
pass
else:
eDump = cv.utils.dumpInputArray(e)
self.assertEqual(eDump, "InputArray: empty()=false kind=0x00010000 flags=0x01010000 total(-1)=1 dims(-1)=2 size(-1)=1x1 type(-1)=CV_32FC1")


if __name__ == '__main__':
NewOpenCVTests.bootstrap()
5 changes: 4 additions & 1 deletion modules/calib3d/src/solvepnp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,10 @@ int solvePnPGeneric( InputArray _opoints, InputArray _ipoints,

if (reprojectionError.needed())
{
int type = reprojectionError.type();
int type = (reprojectionError.fixedType() || !reprojectionError.empty())
? reprojectionError.type()
: (max(_ipoints.depth(), _opoints.depth()) == CV_64F ? CV_64F : CV_32F);

reprojectionError.create(solutions, 1, type);
CV_CheckType(reprojectionError.type(), type == CV_32FC1 || type == CV_64FC1,
"Type of reprojectionError must be CV_32FC1 or CV_64FC1!");
Expand Down
151 changes: 115 additions & 36 deletions modules/core/src/matmul.simd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2078,6 +2078,10 @@ MulTransposedR(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double
deltastep = deltastep ? 4 : 0;
}

#if CV_SIMD_64F
v_float64x2 v_scale = v_setall_f64(scale);
#endif

if( !delta )
for( i = 0; i < size.width; i++, tdst += dststep )
{
Expand All @@ -2086,22 +2090,41 @@ MulTransposedR(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double

for( j = i; j <= size.width - 4; j += 4 )
{
double s0 = 0, s1 = 0, s2 = 0, s3 = 0;
const sT *tsrc = src + j;
#if CV_SIMD_64F
if (DataType<sT>::depth == CV_64F && DataType<dT>::depth == CV_64F)
{
v_float64x2 s0 = v_setzero_f64(), s1 = v_setzero_f64();
const double *tsrc = (double*)(src + j);

for( k = 0; k < size.height; k++, tsrc += srcstep )
for( k = 0; k < size.height; k++, tsrc += srcstep )
{
v_float64x2 a = v_setall_f64((double)col_buf[k]);
s0 += a * v_load(tsrc+0);
s1 += a * v_load(tsrc+2);
}

v_store((double*)(tdst+j), s0*v_scale);
v_store((double*)(tdst+j+2), s1*v_scale);
} else
#endif
{
double a = col_buf[k];
s0 += a * tsrc[0];
s1 += a * tsrc[1];
s2 += a * tsrc[2];
s3 += a * tsrc[3];
}
double s0 = 0, s1 = 0, s2 = 0, s3 = 0;
const sT *tsrc = src + j;

tdst[j] = (dT)(s0*scale);
tdst[j+1] = (dT)(s1*scale);
tdst[j+2] = (dT)(s2*scale);
tdst[j+3] = (dT)(s3*scale);
for( k = 0; k < size.height; k++, tsrc += srcstep )
{
double a = col_buf[k];
s0 += a * tsrc[0];
s1 += a * tsrc[1];
s2 += a * tsrc[2];
s3 += a * tsrc[3];
}

tdst[j] = (dT)(s0*scale);
tdst[j+1] = (dT)(s1*scale);
tdst[j+2] = (dT)(s2*scale);
tdst[j+3] = (dT)(s3*scale);
}
}

for( ; j < size.width; j++ )
Expand All @@ -2127,23 +2150,45 @@ MulTransposedR(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double

for( j = i; j <= size.width - 4; j += 4 )
{
double s0 = 0, s1 = 0, s2 = 0, s3 = 0;
const sT *tsrc = src + j;
const dT *d = delta_buf ? delta_buf : delta + j;

for( k = 0; k < size.height; k++, tsrc+=srcstep, d+=deltastep )
#if CV_SIMD_64F
if (DataType<sT>::depth == CV_64F && DataType<dT>::depth == CV_64F)
{
double a = col_buf[k];
s0 += a * (tsrc[0] - d[0]);
s1 += a * (tsrc[1] - d[1]);
s2 += a * (tsrc[2] - d[2]);
s3 += a * (tsrc[3] - d[3]);
v_float64x2 s0 = v_setzero_f64(), s1 = v_setzero_f64();
const double *tsrc = (double*)(src + j);
const double *d = (double*)(delta_buf ? delta_buf : delta + j);

for( k = 0; k < size.height; k++, tsrc+=srcstep, d+=deltastep )
{
v_float64x2 a = v_setall_f64((double)col_buf[k]);
s0 += a * (v_load(tsrc+0) - v_load(d+0));
s1 += a * (v_load(tsrc+2) - v_load(d+2));
}

v_store((double*)(tdst+j), s0*v_scale);
v_store((double*)(tdst+j+2), s1*v_scale);
}
else
#endif

tdst[j] = (dT)(s0*scale);
tdst[j+1] = (dT)(s1*scale);
tdst[j+2] = (dT)(s2*scale);
tdst[j+3] = (dT)(s3*scale);
{
double s0 = 0, s1 = 0, s2 = 0, s3 = 0;
const sT *tsrc = src + j;
const dT *d = delta_buf ? delta_buf : delta + j;

for( k = 0; k < size.height; k++, tsrc+=srcstep, d+=deltastep )
{
double a = col_buf[k];
s0 += a * (tsrc[0] - d[0]);
s1 += a * (tsrc[1] - d[1]);
s2 += a * (tsrc[2] - d[2]);
s3 += a * (tsrc[3] - d[3]);
}

tdst[j] = (dT)(s0*scale);
tdst[j+1] = (dT)(s1*scale);
tdst[j+2] = (dT)(s2*scale);
tdst[j+3] = (dT)(s3*scale);
}
}

for( ; j < size.width; j++ )
Expand Down Expand Up @@ -2182,10 +2227,25 @@ MulTransposedL(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double
double s = 0;
const sT *tsrc1 = src + i*srcstep;
const sT *tsrc2 = src + j*srcstep;

for( k = 0; k <= size.width - 4; k += 4 )
s += (double)tsrc1[k]*tsrc2[k] + (double)tsrc1[k+1]*tsrc2[k+1] +
(double)tsrc1[k+2]*tsrc2[k+2] + (double)tsrc1[k+3]*tsrc2[k+3];
#if CV_SIMD_64F
if (DataType<sT>::depth == CV_64F && DataType<dT>::depth == CV_64F)
{
const double *v_tsrc1 = (double *)(tsrc1);
const double *v_tsrc2 = (double *)(tsrc2);
v_float64x2 v_s = v_setzero_f64();

for( k = 0; k <= size.width - 4; k += 4 )
v_s += (v_load(v_tsrc1+k) * v_load(v_tsrc2+k)) +
(v_load(v_tsrc1+k+2) * v_load(v_tsrc2+k+2));
s += v_reduce_sum(v_s);
}
else
#endif
{
for( k = 0; k <= size.width - 4; k += 4 )
s += (double)tsrc1[k]*tsrc2[k] + (double)tsrc1[k+1]*tsrc2[k+1] +
(double)tsrc1[k+2]*tsrc2[k+2] + (double)tsrc1[k+3]*tsrc2[k+3];
}
for( ; k < size.width; k++ )
s += (double)tsrc1[k] * tsrc2[k];
tdst[j] = (dT)(s*scale);
Expand Down Expand Up @@ -2220,11 +2280,30 @@ MulTransposedL(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double
delta_buf[2] = delta_buf[3] = tdelta2[0];
tdelta2 = delta_buf;
}
for( k = 0; k <= size.width-4; k += 4, tdelta2 += delta_shift )
s += (double)row_buf[k]*(tsrc2[k] - tdelta2[0]) +
(double)row_buf[k+1]*(tsrc2[k+1] - tdelta2[1]) +
(double)row_buf[k+2]*(tsrc2[k+2] - tdelta2[2]) +
(double)row_buf[k+3]*(tsrc2[k+3] - tdelta2[3]);
#if CV_SIMD_64F
if (DataType<sT>::depth == CV_64F && DataType<dT>::depth == CV_64F)
{
const double *v_tsrc2 = (double *)(tsrc2);
const double *v_tdelta2 = (double *)(tdelta2);
const double *v_row_buf = (double *)(row_buf);
v_float64x2 v_s = v_setzero_f64();

for( k = 0; k <= size.width - 4; k += 4, v_tdelta2 += delta_shift )
v_s += ((v_load(v_tsrc2+k) - v_load(v_tdelta2)) * v_load(v_row_buf+k)) +
((v_load(v_tsrc2+k+2) - v_load(v_tdelta2+2)) * v_load(v_row_buf+k+2));
s += v_reduce_sum(v_s);

tdelta2 = (const dT *)(v_tdelta2);
}
else
#endif
{
for( k = 0; k <= size.width-4; k += 4, tdelta2 += delta_shift )
s += (double)row_buf[k]*(tsrc2[k] - tdelta2[0]) +
(double)row_buf[k+1]*(tsrc2[k+1] - tdelta2[1]) +
(double)row_buf[k+2]*(tsrc2[k+2] - tdelta2[2]) +
(double)row_buf[k+3]*(tsrc2[k+3] - tdelta2[3]);
}
for( ; k < size.width; k++, tdelta2++ )
s += (double)row_buf[k]*(tsrc2[k] - tdelta2[0]);
tdst[j] = (dT)(s*scale);
Expand Down
2 changes: 1 addition & 1 deletion modules/core/src/parallel_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ DECLARE_CV_PAUSE
#endif
#ifndef CV_PAUSE
# if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
# if !defined(__SSE__)
# if !defined(__SSE2__)
static inline void cv_non_sse_mm_pause() { __asm__ __volatile__ ("rep; nop"); }
# define _mm_pause cv_non_sse_mm_pause
# endif
Expand Down
3 changes: 2 additions & 1 deletion modules/dnn/include/opencv2/dnn/all_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,8 @@ CV__DNN_INLINE_NS_BEGIN
std::vector<size_t> pads_begin, pads_end;
CV_DEPRECATED_EXTERNAL Size kernel, stride, pad;
CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b;
bool globalPooling;
bool globalPooling; //!< Flag is true if at least one of the axes is global pooled.
std::vector<bool> isGlobalPooling;
bool computeMaxIdx;
String padMode;
bool ceilMode;
Expand Down
2 changes: 1 addition & 1 deletion modules/dnn/include/opencv2/dnn/version.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#define OPENCV_DNN_VERSION_HPP

/// Use with major OpenCV version only.
#define OPENCV_DNN_API_VERSION 20191202
#define OPENCV_DNN_API_VERSION 20200128

#if !defined CV_DOXYGEN && !defined CV_STATIC_ANALYSIS && !defined CV_DNN_DONT_ADD_INLINE_NS
#define CV__DNN_INLINE_NS __CV_CAT(dnn4_v, OPENCV_DNN_API_VERSION)
Expand Down
28 changes: 7 additions & 21 deletions modules/dnn/src/layers/convolution_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1555,19 +1555,6 @@ class DeConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
const int group = numOutput / outGroupCn;

if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
if (padMode.empty()) {
for (int i = 0; i < adjust_pads.size(); i++) {
if (pads_end[i] < adjust_pads[i])
return false;
}
} else if (padMode == "SAME") {
for (int i = 0; i < adjust_pads.size(); i++) {
if (kernel_size[i] < pads_begin[i] + 1 + adjust_pads[i])
return false;
}
} else if (padMode == "VALID")
return false;

return group == 1;
}

Expand Down Expand Up @@ -2334,28 +2321,27 @@ class DeConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, kernel_shape, newWeights.data);
}
std::vector<size_t> paddings_end;
if (padMode.empty())
{
for (int i = 0; i < pads_end.size(); i++) {
paddings_end.push_back(pads_end[i] - adjust_pads[i]);
}
}
else if (padMode == "SAME")
if (padMode == "SAME")
{
for (int i = 0; i < pads_begin.size(); i++) {
paddings_end.push_back(kernel_size[i] - pads_begin[i] - 1 - adjust_pads[i]);
}
adjust_pads = std::vector<size_t>(pads_begin.size(), 0);
} else {
paddings_end = pads_end;
}
ngraph::op::PadType pad_type = padMode == "VALID" ? ngraph::op::PadType::VALID : ngraph::op::PadType::EXPLICIT;

auto deconv = std::make_shared<ngraph::op::v1::ConvolutionBackpropData>(
ieInpNode,
ieWeights,
ngraph::Strides(strides),
ngraph::CoordinateDiff(std::vector<std::ptrdiff_t>(pads_begin.begin(), pads_begin.end())),
ngraph::CoordinateDiff(std::vector<std::ptrdiff_t>(paddings_end.begin(), paddings_end.end())),
ngraph::Strides(dilations));
ngraph::Strides(dilations),
pad_type,
ngraph::CoordinateDiff(std::vector<std::ptrdiff_t>(adjust_pads.begin(), adjust_pads.end())));

if (hasBias() || fusedBias)
{
std::vector<size_t> shape(deconv->get_shape().size(), 1);
Expand Down
Loading

0 comments on commit 560f85f

Please sign in to comment.