diff --git a/modules/gapi/src/backends/fluid/gfluidimgproc.cpp b/modules/gapi/src/backends/fluid/gfluidimgproc.cpp index df44528278ef..de8beb1650a4 100644 --- a/modules/gapi/src/backends/fluid/gfluidimgproc.cpp +++ b/modules/gapi/src/backends/fluid/gfluidimgproc.cpp @@ -1442,7 +1442,9 @@ static void run_medianblur( Buffer& dst, const View & src, int ksize) { - static const int kmax = 9; + static_assert(std::is_same::value, "unsupported combination of types"); + + constexpr int kmax = 9; GAPI_Assert(ksize <= kmax); const SRC *in[ kmax ]; @@ -1460,24 +1462,33 @@ static void run_medianblur( Buffer& dst, int width = dst.length(); int chan = dst.meta().chan; - for (int w=0; w < width; w++) + // optimized: if 3x3 + + if (3 == ksize) { - // TODO: make this cycle innermost - for (int c=0; c < chan; c++) - { - SRC neighbours[kmax * kmax]; + run_medblur3x3_impl(out, in, width, chan); + return; + } - for (int i=0; i < ksize; i++) - for (int j=0; j < ksize; j++) - { - neighbours[i*ksize + j] = in[i][(w + j - border)*chan + c]; - } + // reference: any ksize + + int length = width * chan; + int klength = ksize * ksize; + int klenhalf = klength / 2; - int length = ksize * ksize; - std::nth_element(neighbours, neighbours + length/2, neighbours + length); + for (int l=0; l < length; l++) + { + SRC neighbours[kmax * kmax]; - out[w*chan + c] = saturate(neighbours[length/2], rintf); + for (int i=0; i < ksize; i++) + for (int j=0; j < ksize; j++) + { + neighbours[i*ksize + j] = in[i][l + (j - border)*chan]; } + + std::nth_element(neighbours, neighbours + klenhalf, neighbours + klength); + + out[l] = saturate(neighbours[klenhalf], rintf); } } diff --git a/modules/gapi/src/backends/fluid/gfluidimgproc_func.dispatch.cpp b/modules/gapi/src/backends/fluid/gfluidimgproc_func.dispatch.cpp index ccebc3fe06e6..835fb8203ce9 100644 --- a/modules/gapi/src/backends/fluid/gfluidimgproc_func.dispatch.cpp +++ b/modules/gapi/src/backends/fluid/gfluidimgproc_func.dispatch.cpp @@ -134,6 +134,26 @@ RUN_MORPHOLOGY3X3_IMPL( float) #undef RUN_MORPHOLOGY3X3_IMPL +//--------------------------- +// +// Fluid kernels: Median blur +// +//--------------------------- + +#define RUN_MEDBLUR3X3_IMPL(T) \ +void run_medblur3x3_impl(T out[], const T *in[], int width, int chan) \ +{ \ + CV_CPU_DISPATCH(run_medblur3x3_impl, (out, in, width, chan), \ + CV_CPU_DISPATCH_MODES_ALL); \ +} + +RUN_MEDBLUR3X3_IMPL(uchar ) +RUN_MEDBLUR3X3_IMPL(ushort) +RUN_MEDBLUR3X3_IMPL( short) +RUN_MEDBLUR3X3_IMPL( float) + +#undef RUN_MEDBLUR3X3_IMPL + } // namespace fliud } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidimgproc_func.hpp b/modules/gapi/src/backends/fluid/gfluidimgproc_func.hpp index 0fd8b65f0ca5..191ac0834449 100644 --- a/modules/gapi/src/backends/fluid/gfluidimgproc_func.hpp +++ b/modules/gapi/src/backends/fluid/gfluidimgproc_func.hpp @@ -99,6 +99,22 @@ RUN_MORPHOLOGY3X3_IMPL( float) #undef RUN_MORPHOLOGY3X3_IMPL +//--------------------------- +// +// Fluid kernels: Median blur +// +//--------------------------- + +#define RUN_MEDBLUR3X3_IMPL(T) \ +void run_medblur3x3_impl(T out[], const T *in[], int width, int chan); + +RUN_MEDBLUR3X3_IMPL(uchar ) +RUN_MEDBLUR3X3_IMPL(ushort) +RUN_MEDBLUR3X3_IMPL( short) +RUN_MEDBLUR3X3_IMPL( float) + +#undef RUN_MEDBLUR3X3_IMPL + } // namespace fluid } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidimgproc_func.simd.hpp b/modules/gapi/src/backends/fluid/gfluidimgproc_func.simd.hpp index 79b474ee88e5..397d3b0df035 100644 --- a/modules/gapi/src/backends/fluid/gfluidimgproc_func.simd.hpp +++ b/modules/gapi/src/backends/fluid/gfluidimgproc_func.simd.hpp @@ -117,6 +117,22 @@ RUN_MORPHOLOGY3X3_IMPL( float) #undef RUN_MORPHOLOGY3X3_IMPL +//--------------------------- +// +// Fluid kernels: Median blur +// +//--------------------------- + +#define RUN_MEDBLUR3X3_IMPL(T) \ +void run_medblur3x3_impl(T out[], const T *in[], int width, int chan); + +RUN_MEDBLUR3X3_IMPL(uchar ) +RUN_MEDBLUR3X3_IMPL(ushort) +RUN_MEDBLUR3X3_IMPL( short) +RUN_MEDBLUR3X3_IMPL( float) + +#undef RUN_MEDBLUR3X3_IMPL + //---------------------------------------------------------------------- #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY @@ -1580,6 +1596,180 @@ RUN_MORPHOLOGY3X3_IMPL( float) #undef RUN_MORPHOLOGY3X3_IMPL +//--------------------------- +// +// Fluid kernels: Median blur +// +//--------------------------- + +template +static void run_medblur3x3_reference(T out[], const T *in[], int width, int chan) +{ + constexpr int ksize = 3; + constexpr int border = (ksize - 1) / 2; + + const int length = width * chan; + const int shift = border * chan; + + for (int l=0; l < length; l++) + { + T t[3][3]; + + // neighbourhood 3x3 + t[0][0] = in[0][l - shift]; t[0][1] = in[0][l]; t[0][2] = in[0][l + shift]; + t[1][0] = in[1][l - shift]; t[1][1] = in[1][l]; t[1][2] = in[1][l + shift]; + t[2][0] = in[2][l - shift]; t[2][1] = in[2][l]; t[2][2] = in[2][l + shift]; + + // sort 2 values + auto sort = [](T& a, T& b) + { + T u=a, v=b; + a = (std::min)(u, v); + b = (std::max)(u, v); + }; + + // horizontal: 3-elements bubble-sort per each row + sort(t[0][0], t[0][1]); sort(t[0][1], t[0][2]); sort(t[0][0], t[0][1]); + sort(t[1][0], t[1][1]); sort(t[1][1], t[1][2]); sort(t[1][0], t[1][1]); + sort(t[2][0], t[2][1]); sort(t[2][1], t[2][2]); sort(t[2][0], t[2][1]); + + // vertical: columns bubble-sort (although partial) + sort(t[0][0], t[1][0]); sort(t[0][1], t[1][1]); /*sort(t[0][2], t[1][2]);*/ + sort(t[1][0], t[2][0]); sort(t[1][1], t[2][1]); sort(t[1][2], t[2][2]); + /*sort(t[0][0], t[1][0]);*/ sort(t[0][1], t[1][1]); sort(t[0][2], t[1][2]); + + // diagonal: bubble-sort (in opposite order!) + sort(t[1][1], t[0][2]); sort(t[2][0], t[1][1]); sort(t[1][1], t[0][2]); + + out[l] = t[1][1]; + } +} + +#if CV_SIMD +template +static void run_medblur3x3_simd(T out[], const T *in[], int width, int chan) +{ + constexpr int ksize = 3; + constexpr int border = (ksize - 1) / 2; + + const int length = width * chan; + const int shift = border * chan; + + for (int l=0; l < length;) + { + constexpr int nlanes = VT::nlanes; + + // main part of output row + for (; l <= length - nlanes; l += nlanes) + { + VT t00, t01, t02, t10, t11, t12, t20, t21, t22; + + // neighbourhood 3x3 + + t00 = vx_load(&in[0][l - shift]); + t01 = vx_load(&in[0][l ]); + t02 = vx_load(&in[0][l + shift]); + + t10 = vx_load(&in[1][l - shift]); + t11 = vx_load(&in[1][l ]); + t12 = vx_load(&in[1][l + shift]); + + t20 = vx_load(&in[2][l - shift]); + t21 = vx_load(&in[2][l ]); + t22 = vx_load(&in[2][l + shift]); + + // sort 2 values + auto sort = [](VT& a, VT& b) + { + VT u=a, v=b; + a = v_min(u, v); + b = v_max(u, v); + }; + + // horizontal: 3-elements bubble-sort per each row + sort(t00, t01); sort(t01, t02); sort(t00, t01); + sort(t10, t11); sort(t11, t12); sort(t10, t11); + sort(t20, t21); sort(t21, t22); sort(t20, t21); + + // vertical: columns bubble-sort (although partial) + sort(t00, t10); sort(t01, t11); /*sort(t02, t12);*/ + sort(t10, t20); sort(t11, t21); sort(t12, t22); + /*sort(t00, t10);*/ sort(t01, t11); sort(t02, t12); + + // diagonal: bubble-sort (in opposite order!) + sort(t11, t02); sort(t20, t11); sort(t11, t02); + + v_store(&out[l], t11); + } + + // tail (if any) + if (l < length) + { + GAPI_DbgAssert(length >= nlanes); + l = length - nlanes; + } + } +} +#endif + +template +static void run_medblur3x3_code(T out[], const T *in[], int width, int chan) +{ +#if CV_SIMD + int length = width * chan; + + // length variable may be unused if types do not match at 'if' statements below + (void) length; + + if (std::is_same::value && length >= v_float32::nlanes) + { + run_medblur3x3_simd(reinterpret_cast(out), + reinterpret_cast(in), + width, chan); + return; + } + + if (std::is_same::value && length >= v_int16::nlanes) + { + run_medblur3x3_simd(reinterpret_cast(out), + reinterpret_cast(in), + width, chan); + return; + } + + if (std::is_same::value && length >= v_uint16::nlanes) + { + run_medblur3x3_simd(reinterpret_cast(out), + reinterpret_cast(in), + width, chan); + return; + } + + if (std::is_same::value && length >= v_uint8::nlanes) + { + run_medblur3x3_simd(reinterpret_cast(out), + reinterpret_cast(in), + width, chan); + return; + } +#endif + + run_medblur3x3_reference(out, in, width, chan); +} + +#define RUN_MEDBLUR3X3_IMPL(T) \ +void run_medblur3x3_impl(T out[], const T *in[], int width, int chan) \ +{ \ + run_medblur3x3_code(out, in, width, chan); \ +} + +RUN_MEDBLUR3X3_IMPL(uchar ) +RUN_MEDBLUR3X3_IMPL(ushort) +RUN_MEDBLUR3X3_IMPL( short) +RUN_MEDBLUR3X3_IMPL( float) + +#undef RUN_MEDBLUR3X3_IMPL + //------------------------------------------------------------------------------ #endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY