Merge pull request opencv#16223 from l-bat:lip_jppnet

maxgoedjen · Jan 27, 2020 · 5429b1f · 5429b1f
2 parents 02f8a94 + 24166ac
commit 5429b1f
Show file tree

Hide file tree

Showing 9 changed files with 364 additions and 40 deletions.
diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -250,7 +250,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
         std::vector<size_t> pads_begin, pads_end;
         CV_DEPRECATED_EXTERNAL Size kernel, stride, pad;
         CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b;
-        bool globalPooling;
+        bool globalPooling; //!< Flag is true if at least one of the axes is global pooled.
+        std::vector<bool> isGlobalPooling;
         bool computeMaxIdx;
         String padMode;
         bool ceilMode;

diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -47,9 +47,9 @@
 #include "opencv2/core/async.hpp"
 
 #if !defined CV_DOXYGEN && !defined CV_STATIC_ANALYSIS && !defined CV_DNN_DONT_ADD_EXPERIMENTAL_NS
-#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_34_v15 {
+#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_34_v16 {
 #define CV__DNN_EXPERIMENTAL_NS_END }
-namespace cv { namespace dnn { namespace experimental_dnn_34_v15 { } using namespace experimental_dnn_34_v15; }}
+namespace cv { namespace dnn { namespace experimental_dnn_34_v16 { } using namespace experimental_dnn_34_v16; }}
 #else
 #define CV__DNN_EXPERIMENTAL_NS_BEGIN
 #define CV__DNN_EXPERIMENTAL_NS_END

diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp
@@ -144,26 +144,37 @@ void getStrideAndPadding(const LayerParams &params, std::vector<size_t>& pads_be
 }
 }
 
-void getPoolingKernelParams(const LayerParams &params, std::vector<size_t>& kernel, bool &globalPooling,
+void getPoolingKernelParams(const LayerParams &params, std::vector<size_t>& kernel, std::vector<bool>& globalPooling,
                             std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end,
                             std::vector<size_t>& strides, cv::String &padMode)
 {
-    globalPooling = params.has("global_pooling") &&
-                    params.get<bool>("global_pooling");
+    bool is_global = params.get<bool>("global_pooling", false);
+    globalPooling.resize(3);
+    globalPooling[0] = params.get<bool>("global_pooling_d", is_global);
+    globalPooling[1] = params.get<bool>("global_pooling_h", is_global);
+    globalPooling[2] = params.get<bool>("global_pooling_w", is_global);
 
-    if (globalPooling)
+    if (globalPooling[0] || globalPooling[1] || globalPooling[2])
     {
         util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode);
-        if(params.has("kernel_h") || params.has("kernel_w") || params.has("kernel_size"))
-        {
+        if ((globalPooling[0] && params.has("kernel_d")) ||
+            (globalPooling[1] && params.has("kernel_h")) ||
+            (globalPooling[2] && params.has("kernel_w")) ||
+            params.has("kernel_size")) {
             CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified");
         }
-        for (int i = 0; i < pads_begin.size(); i++) {
-            if (pads_begin[i] != 0 || pads_end[i] != 0)
+
+        kernel.resize(3);
+        kernel[0] = params.get<int>("kernel_d", 1);
+        kernel[1] = params.get<int>("kernel_h", 1);
+        kernel[2] = params.get<int>("kernel_w", 1);
+
+        for (int i = 0, j = globalPooling.size() - pads_begin.size(); i < pads_begin.size(); i++, j++) {
+            if ((pads_begin[i] != 0 || pads_end[i] != 0) && globalPooling[j])
                 CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pads must be = 0");
         }
-        for (int i = 0; i < strides.size(); i++) {
-            if (strides[i] != 1)
+        for (int i = 0, j = globalPooling.size() - strides.size(); i < strides.size(); i++, j++) {
+            if (strides[i] != 1 && globalPooling[j])
                 CV_Error(cv::Error::StsBadArg, "In global_pooling mode, strides must be = 1");
         }
     }

diff --git a/modules/dnn/src/layers/layers_common.hpp b/modules/dnn/src/layers/layers_common.hpp
@@ -63,7 +63,7 @@ void getConvolutionKernelParams(const LayerParams &params, std::vector<size_t>&
                                 std::vector<size_t>& pads_end, std::vector<size_t>& strides, std::vector<size_t>& dilations,
                                 cv::String &padMode, std::vector<size_t>& adjust_pads);
 
-void getPoolingKernelParams(const LayerParams &params, std::vector<size_t>& kernel, bool &globalPooling,
+void getPoolingKernelParams(const LayerParams &params, std::vector<size_t>& kernel, std::vector<bool>& globalPooling,
                             std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end, std::vector<size_t>& strides, cv::String &padMode);
 
 void getConvPoolOutParams(const std::vector<int>& inp, const std::vector<size_t>& kernel,

diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp
@@ -79,6 +79,7 @@ class PoolingLayerImpl CV_FINAL : public PoolingLayer
     {
         computeMaxIdx = true;
         globalPooling = false;
+        isGlobalPooling = std::vector<bool>(3, false);
         stride = Size(1, 1);
         pad_t = pad_l = pad_b = pad_r = 0;
 
@@ -95,7 +96,8 @@ class PoolingLayerImpl CV_FINAL : public PoolingLayer
             else
                 CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
 
-            getPoolingKernelParams(params, kernel_size, globalPooling, pads_begin, pads_end, strides, padMode);
+            getPoolingKernelParams(params, kernel_size, isGlobalPooling, pads_begin, pads_end, strides, padMode);
+            globalPooling = isGlobalPooling[0] || isGlobalPooling[1] || isGlobalPooling[2];
             if (kernel_size.size() == 2) {
                 kernel = Size(kernel_size[1], kernel_size[0]);
                 stride = Size(strides[1], strides[0]);
@@ -147,9 +149,14 @@ class PoolingLayerImpl CV_FINAL : public PoolingLayer
             out.push_back(outputs[0].size[i]);
         }
         if (globalPooling) {
-            kernel = Size(inp[1], inp[0]);
-            kernel_size = std::vector<size_t>(inp.begin(), inp.end());
-        }
+            std::vector<size_t> finalKernel;
+            for (int i = 0; i < inp.size(); i++) {
+                int idx = isGlobalPooling.size() - inp.size() + i;
+                finalKernel.push_back(isGlobalPooling[idx] ? inp[i] : kernel_size[idx]);
+             }
+             kernel_size = finalKernel;
+             kernel = Size(kernel_size[1], kernel_size[0]);
+         }
 
         getConvPoolPaddings(inp, kernel_size, strides, padMode, pads_begin, pads_end);
         if (pads_begin.size() == 2) {
@@ -995,20 +1002,25 @@ virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inp
         std::vector<int> inpShape(inputs[0].begin() + 2, inputs[0].end());
         std::vector<int> outShape(inputs[0].begin(), inputs[0].begin() + 2);
 
-        if (globalPooling)
-        {
-            outShape.push_back(1);
-            outShape.push_back(1);
+        std::vector<size_t> local_kernel;
+        if (globalPooling) {
+            for (int i = 0; i < inpShape.size(); i++) {
+                int idx = isGlobalPooling.size() - inpShape.size() + i;
+                local_kernel.push_back(isGlobalPooling[idx] ? inpShape[i] : kernel_size[idx]);
+            }
+        } else {
+            local_kernel = kernel_size;
         }
-        else if (type == ROI || type == PSROI)
+
+        if (type == ROI || type == PSROI)
         {
             outShape.push_back(pooledSize.height);
             outShape.push_back(pooledSize.width);
         }
         else if (padMode.empty())
         {
-            for (int i = 0; i < kernel_size.size(); i++) {
-                float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - kernel_size[i]) / strides[i];
+            for (int i = 0; i < local_kernel.size(); i++) {
+                float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i];
                 outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst)));
             }
 
@@ -1023,7 +1035,7 @@ virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inp
         }
         else
         {
-            getConvPoolOutParams(inpShape, kernel_size, strides, padMode, std::vector<size_t>(kernel_size.size(), 1), outShape);
+            getConvPoolOutParams(inpShape, local_kernel, strides, padMode, std::vector<size_t>(local_kernel.size(), 1), outShape);
         }
         if (type == ROI)
         {

diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp
@@ -114,7 +114,8 @@ class SliceLayerImpl : public SliceLayer
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
         return backendId == DNN_BACKEND_OPENCV ||
-               ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) &&
+               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && sliceRanges.size() == 1) ||
+               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 &&
 #ifdef HAVE_INF_ENGINE
                 INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) &&
 #endif

diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp
@@ -1936,37 +1936,151 @@ void TFImporter::populateNet(Net dstNet)
         }
         else if (type == "Mean")
         {
+            // Computes the mean of elements across dimensions of a tensor.
+            // If keepdims is false (default) reduces input_tensor along the dimensions given in axis,
+            // else the reduced dimensions are retained with length 1.
+            // if indices = [1, 2] in NHWC layout we use global pooling: NxCxHxW --Pooling--> NxCx1x1
+            // if keepdims is false we use Flatten after Pooling: out_shape = NxC
+            // if indices = [0] we use a global pooling by indices.
+            // To return correct shape, we use Reshape after Pooling. To determine input shape use Slice for input,
+            // if keepdims is false we use Flatten after Slice.
+            // Example: input_shape = NxCxHxW
+            // determine out shape: NxCxHxW --Slice--> 1xCxHxW
+            //                      out_shape = 1xCxHxW if keepDims else (1xCxHxW --Flatten--> CxHxW)
+            // global pool: NxCxHxW --Flatten--> Nx(C*H*W) --Reshape--> 1x1xNx(C*H*W) --Pooling--> 1x1x1x(C*H*W) --Reshape--> out_shape
+
             Mat indices = getTensorContent(getConstBlob(layer, value_id, 1));
             CV_Assert(indices.type() == CV_32SC1);
 
-            if (indices.total() != 2 || indices.at<int>(0) != 1 || indices.at<int>(1) != 2)
-                CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation.");
-
-            layerParams.set("pool", "ave");
-            layerParams.set("global_pooling", true);
-
-            int id = dstNet.addLayer(name, "Pooling", layerParams);
-            layer_id[name] = id;
-
-            connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
-
             // There are two attributes, "keepdims" and a deprecated "keep_dims".
             bool keepDims = false;
             if (hasLayerAttr(layer, "keepdims"))
                 keepDims = getLayerAttr(layer, "keepdims").b();
             else if (hasLayerAttr(layer, "keep_dims"))
                 keepDims = getLayerAttr(layer, "keep_dims").b();
 
-            if (!keepDims)
+            if (indices.total() == 1 && indices.at<int>(0) == 0)
             {
                 LayerParams flattenLp;
                 std::string flattenName = name + "/flatten";
                 CV_Assert(layer_id.find(flattenName) == layer_id.end());
                 int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp);
                 layer_id[flattenName] = flattenId;
-                connect(layer_id, dstNet, Pin(name), flattenId, 0);
+                connect(layer_id, dstNet, parsePin(layer.input(0)), flattenId, 0);
+
+                LayerParams reshapeLp;
+                std::string reshapeName = name + "/reshape";
+                CV_Assert(layer_id.find(reshapeName) == layer_id.end());
+                reshapeLp.set("axis", 0);
+                reshapeLp.set("num_axes", 1);
+                int newShape[] = {1, 1, -1};
+                reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 3));
+
+                int reshapeId = dstNet.addLayer(reshapeName, "Reshape", reshapeLp);
+                layer_id[reshapeName] = reshapeId;
+                connect(layer_id, dstNet, Pin(flattenName), reshapeId, 0);
+
+                LayerParams avgLp;
+                std::string avgName = name + "/avg";
+                CV_Assert(layer_id.find(avgName) == layer_id.end());
+                avgLp.set("pool", "ave");
+                // pooling kernel H x 1
+                avgLp.set("global_pooling_h", true);
+                avgLp.set("kernel_w", 1);
+                int avgId = dstNet.addLayer(avgName, "Pooling", avgLp);
+                layer_id[avgName] = avgId;
+                connect(layer_id, dstNet, Pin(reshapeName), avgId, 0);
+
+                LayerParams sliceLp;
+                std::string layerShapeName = name + "/slice";
+                CV_Assert(layer_id.find(layerShapeName) == layer_id.end());
+                sliceLp.set("axis", 0);
+                int begin[] = {0};
+                int size[] = {1};
+                sliceLp.set("begin", DictValue::arrayInt(&begin[0], 1));
+                sliceLp.set("size", DictValue::arrayInt(&size[0], 1));
+                int sliceId = dstNet.addLayer(layerShapeName, "Slice", sliceLp);
+                layer_id[layerShapeName] = sliceId;
+                connect(layer_id, dstNet, Pin(layer.input(0)), sliceId, 0);
+
+                if (!keepDims)
+                {
+                    LayerParams squeezeLp;
+                    std::string squeezeName = name + "/squeeze";
+                    CV_Assert(layer_id.find(squeezeName) == layer_id.end());
+                    squeezeLp.set("axis", 0);
+                    squeezeLp.set("end_axis", 1);
+                    int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp);
+                    layer_id[squeezeName] = squeezeId;
+                    connect(layer_id, dstNet, Pin(layerShapeName), squeezeId, 0);
+                    layerShapeName = squeezeName;
+                }
+
+                int id = dstNet.addLayer(name, "Reshape", layerParams);
+                layer_id[name] = id;
+                connect(layer_id, dstNet, Pin(avgName), id, 0);
+                connect(layer_id, dstNet, Pin(layerShapeName), id, 1);
+            } else {
+                if (indices.total() != 2 || indices.at<int>(0) != 1 || indices.at<int>(1) != 2)
+                    CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation.");
+
+                layerParams.set("pool", "ave");
+                layerParams.set("global_pooling", true);
+                int id = dstNet.addLayer(name, "Pooling", layerParams);
+                layer_id[name] = id;
+                connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
+
+                if (!keepDims)
+                {
+                    LayerParams flattenLp;
+                    std::string flattenName = name + "/flatten";
+                    CV_Assert(layer_id.find(flattenName) == layer_id.end());
+                    int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp);
+                    layer_id[flattenName] = flattenId;
+                    connect(layer_id, dstNet, Pin(name), flattenId, 0);
+                }
             }
         }
+        else if (type == "Pack")
+        {
+            // op: tf.stack(list of tensors, axis=0)
+            // Join a list of inputs along a new axis.
+            // The "axis" specifies the index of the new axis in the dimensions of the output.
+            // Example: given a list with "N" tensors of shape (C, H, W):
+            // if axis == 0 then the output tensor will have the shape (N, C, H, W),
+            // if axis == 1 then the output tensor will have the shape (C, N, H, W).
+            CV_Assert(hasLayerAttr(layer, "axis"));
+            int dim = (int)getLayerAttr(layer, "axis").i();
+            if (dim != 0)
+                CV_Error(Error::StsNotImplemented, "Unsupported mode of pack operation.");
+
+            CV_Assert(hasLayerAttr(layer, "N"));
+            int num = (int)getLayerAttr(layer, "N").i();
+            CV_Assert(layer.input_size() == num);
+            std::string base_name = name + "/reshape_";
+            std::vector<int> reshape_ids;
+            for (int i = 0; i < num; i++) {
+                std::ostringstream ss;
+                ss << i;
+                std::string reshape_name = base_name + ss.str();
+                LayerParams reshapeLP;
+                reshapeLP.set("axis", dim);
+                reshapeLP.set("num_axes", 1);
+                int outShape[] = {1, -1};
+                reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], 2));
+                int id = dstNet.addLayer(reshape_name, "Reshape", reshapeLP);
+                layer_id[reshape_name] = id;
+                reshape_ids.push_back(id);
+                connect(layer_id, dstNet, parsePin(layer.input(i)), id, 0);
+            }
+
+            layerParams.set("axis", dim);
+            int id = dstNet.addLayer(name, "Concat", layerParams);
+            layer_id[name] = id;
+
+            for (int li = 0; li < num; li++)
+                dstNet.connect(reshape_ids[li], 0, id, li);
+        }
         else if (type == "ClipByValue")
         {
             // op: "ClipByValue"

diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
@@ -121,6 +121,13 @@ class Test_TensorFlow_layers : public DNNTestLayer
     }
 };
 
+TEST_P(Test_TensorFlow_layers, reduce_mean)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    runTensorFlowNet("global_pool_by_axis");
+}
+
 TEST_P(Test_TensorFlow_layers, conv)
 {
     runTensorFlowNet("single_conv");