[GNA] Fix handling of 0-3-2-1 transpose in GNA plugin (openvinotoolki…

…t#12587) * [GNA] Fix handling of 0-3-2-1 transpose in GNA plugin * Remove unnessesary checks * Review comments * Make GNATransposeFusable string
vurusovs · Oct 3, 2022 · 782615f · 782615f
1 parent 45052f1
commit 782615f
Show file tree

Hide file tree

Showing 11 changed files with 186 additions and 20 deletions.
diff --git a/src/plugins/intel_gna/gna_graph_patterns.hpp b/src/plugins/intel_gna/gna_graph_patterns.hpp
@@ -9,6 +9,7 @@
 #include "gna_plugin_log.hpp"
 #include "gna_upstream_iterator.hpp"
 #include "layers/gna_layer_info.hpp"
+#include "ops/util/util.hpp"
 
 namespace GNAPluginNS {
 
@@ -218,8 +219,7 @@ inline InferenceEngine::CNNLayerPtr FindPermutationAfterConvolutionInKaldiModel(
     }
 
     // Check if the found layer is NCHW to NWHC permute
-    if (!LayerInfo(next).isPermute() || next->input()->getLayout() != InferenceEngine::Layout::NCHW ||
-        next->GetParamAsInts("order") != std::vector<int>{0, 3, 2, 1}) {
+    if (!LayerInfo(next).isPermuteFusable() || next->input()->getLayout() != InferenceEngine::Layout::NCHW) {
         return nullptr;
     }
 
@@ -240,8 +240,8 @@ inline bool MustBeConvertedFromNCHWToNHWC(const std::vector<InferenceEngine::CNN
         // If a convolution has only 1-dimension input and output we should skip it
         auto in_dims = l->insData.begin()->lock()->getDims();
         auto out_dims = l->outData.front()->getDims();
-        if (std::count_if(std::begin(in_dims), std::end(in_dims), [](size_t dim) { return dim != 1; }) <= 1 &&
-            std::count_if(std::begin(out_dims), std::end(out_dims), [](size_t dim) { return dim != 1; }) <= 1) {
+
+        if (ov::intel_gna::ngraph_util::is_one_dim_shapes(in_dims, out_dims)) {
             continue;
         }
 

diff --git a/src/plugins/intel_gna/gna_plugin.cpp b/src/plugins/intel_gna/gna_plugin.cpp
@@ -92,6 +92,7 @@
 #include "transformations/unfuse_reshape_and_transpose.hpp"
 #include "transformations/insert_copy_layer.hpp"
 #include "transformations/split_eltwise.hpp"
+#include "transformations/markup_fusable_transpose.hpp"
 
 #include <ngraph/opsets/opset7.hpp>
 
@@ -723,6 +724,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
         manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
         manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
         manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
+        manager.register_pass<ov::intel_gna::pass::MarkupFusableTranspose>();
         manager.register_pass<ov::intel_gna::pass::RemoveExtraReshapes>();
         /*
           Put BroadcastAddMultiplyConst here after ConvertOpSet..() transformations since there are conficts with them.

diff --git a/src/plugins/intel_gna/layers/gna_layer_info.hpp b/src/plugins/intel_gna/layers/gna_layer_info.hpp
@@ -19,6 +19,7 @@
 #include "ops/pwl.hpp"
 #include "layers/gna_crop_layer.hpp"
 #include "backend/gna_limitations.hpp"
+#include "transformations/rt_info/gna_transpose_fusable.hpp"
 
 namespace GNAPluginNS {
 
@@ -285,6 +286,9 @@ class LayerInfo {
     bool isPermute() const noexcept {
         return isOfType("permute");
     }
+    bool isPermuteFusable() const noexcept {
+        return isPermute() && (layer->params.count(ov::intel_gna::rt_info::GNATransposeFusable::get_type_info_static()) > 0);
+    }
     bool isPermuteViaReshape() const {
         if (!isOfType("reshape")) return false;
 
@@ -303,15 +307,14 @@ class LayerInfo {
         }
         return true;
     }
+
     // @brief this not only mathematically trivial, has some WA for kaldi case
     bool isTrivialPermute() const {
         if (!isPermute()) return false;
 
-        auto layerOrder = layer->GetParamAsInts("order");
+        if (isPermuteFusable()) return true;
 
-        if (layerOrder == std::vector<int>({ 0, 3, 2, 1 })) {
-            return true;  // supported case
-        }
+        auto layerOrder = layer->GetParamAsInts("order");
         if (layer->insData.empty()) {
             return false;  // unsupported case
         }

diff --git a/src/plugins/intel_gna/ops/util/util.hpp b/src/plugins/intel_gna/ops/util/util.hpp
@@ -9,6 +9,7 @@
 #include "backend/gna_limitations.hpp"
 #include "layers/gna_permute.hpp"
 #include <transformations/utils/utils.hpp>
+#include <transformations/rt_info/gna_transpose_fusable.hpp>
 #include <ngraph/opsets/opset8.hpp>
 #include <vector>
 #include <memory>
@@ -91,6 +92,9 @@ static bool is_trivial_transpose(std::shared_ptr<ngraph::Node> node) {
     if (transpose->get_input_size() == 0)
         return false; // unsupported case
 
+    if (ov::intel_gna::rt_info::is_transpose_fusable(transpose))
+        return true;
+
     auto transpose_const = std::dynamic_pointer_cast<ngraph::op::Constant>(transpose->input_value(1).get_node_shared_ptr());
     if (!transpose_const) return false;
 
@@ -111,6 +115,30 @@ inline std::shared_ptr<ov::Node> get_prev_node_skipping_certain(const std::share
     return current_node;
 }
 
+inline std::shared_ptr<ov::Node> get_next_node_skipping_certain(const std::shared_ptr<ngraph::Node>& node,
+                                                                const std::function<bool(std::shared_ptr<ngraph::Node>)>& skip) {
+    auto current_node = node;
+    while (skip(current_node)) {
+        current_node = current_node->output(0).get_target_inputs().begin()->get_node()->shared_from_this();
+    }
+    return current_node;
+}
+
+inline bool is_gna_non_functional_node(const std::shared_ptr<ngraph::Node>& node) {
+    return std::dynamic_pointer_cast<ngraph::opset8::Reshape>(node) ||
+            std::dynamic_pointer_cast<ngraph::opset8::Squeeze>(node) ||
+            std::dynamic_pointer_cast<ngraph::opset8::Unsqueeze>(node) ||
+            is_trivial_transpose(node);
+}
+
+inline bool is_one_dim_shape(const ov::Shape& dims) {
+    return std::count_if(std::begin(dims), std::end(dims), [](size_t dim) { return dim != 1; }) <= 1;
+}
+
+inline bool is_one_dim_shapes(const ov::Shape& in_dims, const ov::Shape& out_dims) {
+    return is_one_dim_shape(in_dims) && is_one_dim_shape(out_dims);
+}
+
 } // namespace ngraph_util
 } // namespace intel_gna
 } // namespace ov
diff --git a/src/plugins/intel_gna/transformations/insert_copy_layer.cpp b/src/plugins/intel_gna/transformations/insert_copy_layer.cpp
@@ -44,13 +44,6 @@ namespace {
 
         output_op->input(index).replace_source_output(copy_op);
     }
-
-    bool is_gna_non_functional_node(std::shared_ptr<ngraph::Node> node) {
-        return std::dynamic_pointer_cast<ngraph::opset8::Reshape>(node) ||
-               std::dynamic_pointer_cast<ngraph::opset8::Squeeze>(node) ||
-               std::dynamic_pointer_cast<ngraph::opset8::Unsqueeze>(node) ||
-               is_trivial_transpose(node);
-    }
 }// namespace
 
 InsertCopyBeforeAssignLayer::InsertCopyBeforeAssignLayer() {

diff --git a/src/plugins/intel_gna/transformations/markup_fusable_transpose.cpp b/src/plugins/intel_gna/transformations/markup_fusable_transpose.cpp
@@ -0,0 +1,55 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include <ngraph/opsets/opset9.hpp>
+#include <legacy/ngraph_ops/convolution_ie.hpp>
+#include <legacy/ngraph_ops/fully_connected.hpp>
+#include <legacy/ngraph_ops/scaleshift.hpp>
+#include <openvino/cc/ngraph/itt.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ops/util/util.hpp>
+#include <transformations/utils/transformation_helper.hpp>
+#include <transformations/rt_info/gna_transpose_fusable.hpp>
+
+#include "markup_fusable_transpose.hpp"
+
+using namespace ov::intel_gna::pass;
+using namespace ov::intel_gna::pass::helper;
+using namespace ov::intel_gna::ngraph_util;
+using namespace ov::intel_gna::rt_info;
+
+namespace {
+bool is_skip_operation(const std::shared_ptr<ngraph::Node>& node) {
+    return (!std::dynamic_pointer_cast<ngraph::opset9::Transpose>(node) &&
+            !std::dynamic_pointer_cast<ngraph::op::FullyConnected>(node) &&
+            !std::dynamic_pointer_cast<ngraph::op::ScaleShiftIE>(node) &&
+            !std::dynamic_pointer_cast<ngraph::opset9::Result>(node) &&
+            (!is_gna_non_functional_node(node) ||
+                node->output(0).get_shape().size() == node->input(0).get_shape().size()));
+}
+} // namespace
+
+bool MarkupFusableTranspose::run_on_model(const std::shared_ptr<ngraph::Function>& f) {
+    RUN_ON_FUNCTION_SCOPE(MarkupFusableTranspose);
+
+    for (auto& node : f->get_ordered_ops()) {
+        if (!std::dynamic_pointer_cast<ngraph::opset9::Convolution>(node) &&
+            !std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node)) {
+            continue;
+        }
+        auto in_dims = node->input(0).get_shape();
+        auto out_dims = node->output(0).get_shape();
+
+        if (is_one_dim_shapes(in_dims, out_dims)) {
+            continue;
+        }
+
+        auto current_node = get_next_node_skipping_certain(node, is_skip_operation);
+        if (!TransposeOrderMatches(std::dynamic_pointer_cast<ngraph::opset9::Transpose>(current_node), {0, 3, 2, 1})) {
+            continue;
+        }
+        add_transpose_fusable(current_node);
+    }
+
+    return false;
+}
diff --git a/src/plugins/intel_gna/transformations/markup_fusable_transpose.hpp b/src/plugins/intel_gna/transformations/markup_fusable_transpose.hpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ov {
+namespace intel_gna {
+namespace pass {
+
+/**
+ * @brief Markup fusable tranpose
+ * This transformation is written to support old IRs for Kaldi models
+ * with specific 0-3-2-1 transpose after Convolution and mark it up
+ * for special handling in compiler for backward compatibility purposes
+ */
+class MarkupFusableTranspose : public ngraph::pass::FunctionPass {
+public:
+    OPENVINO_RTTI("MarkupFusableTranspose", "0");
+    bool run_on_model(const std::shared_ptr<ngraph::Function>& f) override;
+};
+
+} // namespace pass
+} // namespace intel_gna
+} // namespace ov
diff --git a/src/plugins/intel_gna/transformations/rt_info/gna_transpose_fusable.cpp b/src/plugins/intel_gna/transformations/rt_info/gna_transpose_fusable.cpp
@@ -0,0 +1,20 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "gna_transpose_fusable.hpp"
+
+void ov::intel_gna::rt_info::add_transpose_fusable(const std::shared_ptr<Node>& node) {
+    auto& rt_info = node->get_rt_info();
+    rt_info[GNATransposeFusable::get_type_info_static()] = std::string();
+}
+
+void ov::intel_gna::rt_info::remove_transpose_fusable(const std::shared_ptr<Node>& node) {
+    auto& rt_info = node->get_rt_info();
+    rt_info.erase(GNATransposeFusable::get_type_info_static());
+}
+
+bool ov::intel_gna::rt_info::is_transpose_fusable(const std::shared_ptr<Node>& node) {
+    const auto& rt_info = node->get_rt_info();
+    return rt_info.count(GNATransposeFusable::get_type_info_static());
+}
diff --git a/src/plugins/intel_gna/transformations/rt_info/gna_transpose_fusable.hpp b/src/plugins/intel_gna/transformations/rt_info/gna_transpose_fusable.hpp
@@ -0,0 +1,38 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/core/node.hpp"
+#include "openvino/core/runtime_attribute.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace intel_gna {
+namespace rt_info {
+
+void add_transpose_fusable(const std::shared_ptr<Node>& node);
+
+void remove_transpose_fusable(const std::shared_ptr<Node>& node);
+
+bool is_transpose_fusable(const std::shared_ptr<Node>& node);
+
+/**
+ * @ingroup ie_runtime_attr_api
+ * @brief GNATransposeFusable class represents runtime info attribute that marks operation
+ * as fusable with functional layer
+ */
+class GNATransposeFusable : public RuntimeAttribute {
+public:
+    OPENVINO_RTTI("gna_transpose_fusable", "0");
+
+    GNATransposeFusable() = default;
+
+    bool is_copyable() const override {
+        return false;
+    }
+};
+} // namespace rt_info
+} // namespace intel_gna
+} // namespace ov
diff --git a/...ctional/plugin/gna/shared_tests_instances/subgraph_tests/split_trivial_permute_concat.cpp b/...ctional/plugin/gna/shared_tests_instances/subgraph_tests/split_trivial_permute_concat.cpp
@@ -18,10 +18,10 @@ namespace {
     };
 
     std::vector<std::vector<size_t>> inputSizes = {
-        { 4, 2, 64, 6 },
-        { 4, 16, 4, 128},
-        { 2, 10, 16, 64},
-        { 2, 32, 64, 2},
+        { 1, 128, 1, 8 },
+        { 1, 4, 1, 128 },
+        { 1, 16, 1, 128 },
+        { 1, 128, 1, 2 },
     };
 
     std::vector<size_t> split_axes = { 1 }; // only channels split is currently supported by gna for 4d inputs

diff --git a/src/tests/functional/shared_test_classes/src/subgraph/split_trivial_permute_concat.cpp b/src/tests/functional/shared_test_classes/src/subgraph/split_trivial_permute_concat.cpp
@@ -42,7 +42,7 @@ namespace SubgraphTestsDefinitions {
         auto permute_0 = std::make_shared<ngraph::opset1::Transpose>(split->output(0), permute_in_params);
         auto permute_1 = std::make_shared<ngraph::opset1::Transpose>(split->output(1), permute_in_params);
 
-        auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{ split->output(0), split->output(1) }, concatAxis);
+        auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{ permute_0, permute_1 }, concatAxis);
         auto act = ngraph::builder::makeActivation(concat, ngPrc, ngraph::helpers::ActivationTypes::Relu);
         function = std::make_shared<ngraph::Function>(act, input, "split_trivial_permute_concat");
     }