Skip to content

Commit

Permalink
[GNA] Fix handling of 0-3-2-1 transpose in GNA plugin (openvinotoolki…
Browse files Browse the repository at this point in the history
…t#12587)

* [GNA] Fix handling of 0-3-2-1 transpose in GNA plugin

* Remove unnessesary checks

* Review comments

* Make GNATransposeFusable string
  • Loading branch information
nkogteva authored Oct 3, 2022
1 parent 45052f1 commit 782615f
Show file tree
Hide file tree
Showing 11 changed files with 186 additions and 20 deletions.
8 changes: 4 additions & 4 deletions src/plugins/intel_gna/gna_graph_patterns.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "gna_plugin_log.hpp"
#include "gna_upstream_iterator.hpp"
#include "layers/gna_layer_info.hpp"
#include "ops/util/util.hpp"

namespace GNAPluginNS {

Expand Down Expand Up @@ -218,8 +219,7 @@ inline InferenceEngine::CNNLayerPtr FindPermutationAfterConvolutionInKaldiModel(
}

// Check if the found layer is NCHW to NWHC permute
if (!LayerInfo(next).isPermute() || next->input()->getLayout() != InferenceEngine::Layout::NCHW ||
next->GetParamAsInts("order") != std::vector<int>{0, 3, 2, 1}) {
if (!LayerInfo(next).isPermuteFusable() || next->input()->getLayout() != InferenceEngine::Layout::NCHW) {
return nullptr;
}

Expand All @@ -240,8 +240,8 @@ inline bool MustBeConvertedFromNCHWToNHWC(const std::vector<InferenceEngine::CNN
// If a convolution has only 1-dimension input and output we should skip it
auto in_dims = l->insData.begin()->lock()->getDims();
auto out_dims = l->outData.front()->getDims();
if (std::count_if(std::begin(in_dims), std::end(in_dims), [](size_t dim) { return dim != 1; }) <= 1 &&
std::count_if(std::begin(out_dims), std::end(out_dims), [](size_t dim) { return dim != 1; }) <= 1) {

if (ov::intel_gna::ngraph_util::is_one_dim_shapes(in_dims, out_dims)) {
continue;
}

Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_gna/gna_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
#include "transformations/unfuse_reshape_and_transpose.hpp"
#include "transformations/insert_copy_layer.hpp"
#include "transformations/split_eltwise.hpp"
#include "transformations/markup_fusable_transpose.hpp"

#include <ngraph/opsets/opset7.hpp>

Expand Down Expand Up @@ -723,6 +724,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
manager.register_pass<ov::intel_gna::pass::MarkupFusableTranspose>();
manager.register_pass<ov::intel_gna::pass::RemoveExtraReshapes>();
/*
Put BroadcastAddMultiplyConst here after ConvertOpSet..() transformations since there are conficts with them.
Expand Down
11 changes: 7 additions & 4 deletions src/plugins/intel_gna/layers/gna_layer_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "ops/pwl.hpp"
#include "layers/gna_crop_layer.hpp"
#include "backend/gna_limitations.hpp"
#include "transformations/rt_info/gna_transpose_fusable.hpp"

namespace GNAPluginNS {

Expand Down Expand Up @@ -285,6 +286,9 @@ class LayerInfo {
bool isPermute() const noexcept {
return isOfType("permute");
}
bool isPermuteFusable() const noexcept {
return isPermute() && (layer->params.count(ov::intel_gna::rt_info::GNATransposeFusable::get_type_info_static()) > 0);
}
bool isPermuteViaReshape() const {
if (!isOfType("reshape")) return false;

Expand All @@ -303,15 +307,14 @@ class LayerInfo {
}
return true;
}

// @brief this not only mathematically trivial, has some WA for kaldi case
bool isTrivialPermute() const {
if (!isPermute()) return false;

auto layerOrder = layer->GetParamAsInts("order");
if (isPermuteFusable()) return true;

if (layerOrder == std::vector<int>({ 0, 3, 2, 1 })) {
return true; // supported case
}
auto layerOrder = layer->GetParamAsInts("order");
if (layer->insData.empty()) {
return false; // unsupported case
}
Expand Down
28 changes: 28 additions & 0 deletions src/plugins/intel_gna/ops/util/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "backend/gna_limitations.hpp"
#include "layers/gna_permute.hpp"
#include <transformations/utils/utils.hpp>
#include <transformations/rt_info/gna_transpose_fusable.hpp>
#include <ngraph/opsets/opset8.hpp>
#include <vector>
#include <memory>
Expand Down Expand Up @@ -91,6 +92,9 @@ static bool is_trivial_transpose(std::shared_ptr<ngraph::Node> node) {
if (transpose->get_input_size() == 0)
return false; // unsupported case

if (ov::intel_gna::rt_info::is_transpose_fusable(transpose))
return true;

auto transpose_const = std::dynamic_pointer_cast<ngraph::op::Constant>(transpose->input_value(1).get_node_shared_ptr());
if (!transpose_const) return false;

Expand All @@ -111,6 +115,30 @@ inline std::shared_ptr<ov::Node> get_prev_node_skipping_certain(const std::share
return current_node;
}

inline std::shared_ptr<ov::Node> get_next_node_skipping_certain(const std::shared_ptr<ngraph::Node>& node,
const std::function<bool(std::shared_ptr<ngraph::Node>)>& skip) {
auto current_node = node;
while (skip(current_node)) {
current_node = current_node->output(0).get_target_inputs().begin()->get_node()->shared_from_this();
}
return current_node;
}

inline bool is_gna_non_functional_node(const std::shared_ptr<ngraph::Node>& node) {
return std::dynamic_pointer_cast<ngraph::opset8::Reshape>(node) ||
std::dynamic_pointer_cast<ngraph::opset8::Squeeze>(node) ||
std::dynamic_pointer_cast<ngraph::opset8::Unsqueeze>(node) ||
is_trivial_transpose(node);
}

inline bool is_one_dim_shape(const ov::Shape& dims) {
return std::count_if(std::begin(dims), std::end(dims), [](size_t dim) { return dim != 1; }) <= 1;
}

inline bool is_one_dim_shapes(const ov::Shape& in_dims, const ov::Shape& out_dims) {
return is_one_dim_shape(in_dims) && is_one_dim_shape(out_dims);
}

} // namespace ngraph_util
} // namespace intel_gna
} // namespace ov
7 changes: 0 additions & 7 deletions src/plugins/intel_gna/transformations/insert_copy_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,6 @@ namespace {

output_op->input(index).replace_source_output(copy_op);
}

bool is_gna_non_functional_node(std::shared_ptr<ngraph::Node> node) {
return std::dynamic_pointer_cast<ngraph::opset8::Reshape>(node) ||
std::dynamic_pointer_cast<ngraph::opset8::Squeeze>(node) ||
std::dynamic_pointer_cast<ngraph::opset8::Unsqueeze>(node) ||
is_trivial_transpose(node);
}
}// namespace

InsertCopyBeforeAssignLayer::InsertCopyBeforeAssignLayer() {
Expand Down
55 changes: 55 additions & 0 deletions src/plugins/intel_gna/transformations/markup_fusable_transpose.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <ngraph/opsets/opset9.hpp>
#include <legacy/ngraph_ops/convolution_ie.hpp>
#include <legacy/ngraph_ops/fully_connected.hpp>
#include <legacy/ngraph_ops/scaleshift.hpp>
#include <openvino/cc/ngraph/itt.hpp>
#include <ngraph/rt_info.hpp>
#include <ops/util/util.hpp>
#include <transformations/utils/transformation_helper.hpp>
#include <transformations/rt_info/gna_transpose_fusable.hpp>

#include "markup_fusable_transpose.hpp"

using namespace ov::intel_gna::pass;
using namespace ov::intel_gna::pass::helper;
using namespace ov::intel_gna::ngraph_util;
using namespace ov::intel_gna::rt_info;

namespace {
bool is_skip_operation(const std::shared_ptr<ngraph::Node>& node) {
return (!std::dynamic_pointer_cast<ngraph::opset9::Transpose>(node) &&
!std::dynamic_pointer_cast<ngraph::op::FullyConnected>(node) &&
!std::dynamic_pointer_cast<ngraph::op::ScaleShiftIE>(node) &&
!std::dynamic_pointer_cast<ngraph::opset9::Result>(node) &&
(!is_gna_non_functional_node(node) ||
node->output(0).get_shape().size() == node->input(0).get_shape().size()));
}
} // namespace

bool MarkupFusableTranspose::run_on_model(const std::shared_ptr<ngraph::Function>& f) {
RUN_ON_FUNCTION_SCOPE(MarkupFusableTranspose);

for (auto& node : f->get_ordered_ops()) {
if (!std::dynamic_pointer_cast<ngraph::opset9::Convolution>(node) &&
!std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node)) {
continue;
}
auto in_dims = node->input(0).get_shape();
auto out_dims = node->output(0).get_shape();

if (is_one_dim_shapes(in_dims, out_dims)) {
continue;
}

auto current_node = get_next_node_skipping_certain(node, is_skip_operation);
if (!TransposeOrderMatches(std::dynamic_pointer_cast<ngraph::opset9::Transpose>(current_node), {0, 3, 2, 1})) {
continue;
}
add_transpose_fusable(current_node);
}

return false;
}
27 changes: 27 additions & 0 deletions src/plugins/intel_gna/transformations/markup_fusable_transpose.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <ngraph/pass/graph_rewrite.hpp>

namespace ov {
namespace intel_gna {
namespace pass {

/**
* @brief Markup fusable tranpose
* This transformation is written to support old IRs for Kaldi models
* with specific 0-3-2-1 transpose after Convolution and mark it up
* for special handling in compiler for backward compatibility purposes
*/
class MarkupFusableTranspose : public ngraph::pass::FunctionPass {
public:
OPENVINO_RTTI("MarkupFusableTranspose", "0");
bool run_on_model(const std::shared_ptr<ngraph::Function>& f) override;
};

} // namespace pass
} // namespace intel_gna
} // namespace ov
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "gna_transpose_fusable.hpp"

void ov::intel_gna::rt_info::add_transpose_fusable(const std::shared_ptr<Node>& node) {
auto& rt_info = node->get_rt_info();
rt_info[GNATransposeFusable::get_type_info_static()] = std::string();
}

void ov::intel_gna::rt_info::remove_transpose_fusable(const std::shared_ptr<Node>& node) {
auto& rt_info = node->get_rt_info();
rt_info.erase(GNATransposeFusable::get_type_info_static());
}

bool ov::intel_gna::rt_info::is_transpose_fusable(const std::shared_ptr<Node>& node) {
const auto& rt_info = node->get_rt_info();
return rt_info.count(GNATransposeFusable::get_type_info_static());
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "openvino/core/node.hpp"
#include "openvino/core/runtime_attribute.hpp"
#include "transformations_visibility.hpp"

namespace ov {
namespace intel_gna {
namespace rt_info {

void add_transpose_fusable(const std::shared_ptr<Node>& node);

void remove_transpose_fusable(const std::shared_ptr<Node>& node);

bool is_transpose_fusable(const std::shared_ptr<Node>& node);

/**
* @ingroup ie_runtime_attr_api
* @brief GNATransposeFusable class represents runtime info attribute that marks operation
* as fusable with functional layer
*/
class GNATransposeFusable : public RuntimeAttribute {
public:
OPENVINO_RTTI("gna_transpose_fusable", "0");

GNATransposeFusable() = default;

bool is_copyable() const override {
return false;
}
};
} // namespace rt_info
} // namespace intel_gna
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ namespace {
};

std::vector<std::vector<size_t>> inputSizes = {
{ 4, 2, 64, 6 },
{ 4, 16, 4, 128},
{ 2, 10, 16, 64},
{ 2, 32, 64, 2},
{ 1, 128, 1, 8 },
{ 1, 4, 1, 128 },
{ 1, 16, 1, 128 },
{ 1, 128, 1, 2 },
};

std::vector<size_t> split_axes = { 1 }; // only channels split is currently supported by gna for 4d inputs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ namespace SubgraphTestsDefinitions {
auto permute_0 = std::make_shared<ngraph::opset1::Transpose>(split->output(0), permute_in_params);
auto permute_1 = std::make_shared<ngraph::opset1::Transpose>(split->output(1), permute_in_params);

auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{ split->output(0), split->output(1) }, concatAxis);
auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{ permute_0, permute_1 }, concatAxis);
auto act = ngraph::builder::makeActivation(concat, ngPrc, ngraph::helpers::ActivationTypes::Relu);
function = std::make_shared<ngraph::Function>(act, input, "split_trivial_permute_concat");
}
Expand Down

0 comments on commit 782615f

Please sign in to comment.