[GNA] Support new kaldi irs (openvinotoolkit#9474)

* Support new kaldi IRs (generated in NHWC layout) * Update tests with activation and fq * Cleanup * Fix reordering FQ and MaxPool and problem with overflow * Fix win * Update src/plugins/intel_gna/transformations/unfuse_reshape_and_transpose.hpp Co-authored-by: Elizaveta Lobanova <[email protected]> * Update src/plugins/intel_gna/transformations/unfuse_reshape_and_transpose.cpp Co-authored-by: Elizaveta Lobanova <[email protected]> * Update inference-engine/tests/unit/gna/ngraph/transformations/gna_unfuse_reshape_and_transpose.cpp Co-authored-by: Elizaveta Lobanova <[email protected]> * Code review Co-authored-by: Elizaveta Lobanova <[email protected]>
vixadd · Jan 17, 2022 · 56581db · 56581db
1 parent 3c75891
commit 56581db
Show file tree

Hide file tree

Showing 16 changed files with 697 additions and 79 deletions.
diff --git a/inference-engine/tests/unit/gna/ngraph/transformations/gna_unfuse_reshape_and_transpose.cpp b/inference-engine/tests/unit/gna/ngraph/transformations/gna_unfuse_reshape_and_transpose.cpp
@@ -0,0 +1,225 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "transformations/unfuse_reshape_and_transpose.hpp"
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset8.hpp>
+#include <ngraph/opsets/opset7.hpp>
+#include <ngraph/pass/manager.hpp>
+#include <transformations/init_node_info.hpp>
+
+namespace testing {
+namespace {
+
+class IActivationFactory {
+public:
+    virtual ~IActivationFactory() = default;
+    virtual std::shared_ptr<ngraph::Node> createNode(const ngraph::Output<ngraph::Node>& in) = 0;
+};
+
+template <typename T>
+class ActivationFactory : public IActivationFactory {
+public:
+    ActivationFactory() = default;
+    std::shared_ptr<ngraph::Node> createNode(const ngraph::Output<ngraph::Node>& operation_before) override {
+        return std::make_shared<T>(operation_before);
+    }
+private:
+    ActivationFactory(const ActivationFactory&) = delete;
+    ActivationFactory& operator=(const ActivationFactory& ) = delete;
+};
+
+template <>
+class ActivationFactory <ngraph::opset8::Clamp> : public IActivationFactory {
+public:
+    ActivationFactory(const double min, const double max) : min_(min), max_(max) {}
+    std::shared_ptr<ngraph::Node> createNode(const ngraph::Output<ngraph::Node>& operation_before) override {
+        return std::make_shared<ngraph::opset8::Clamp>(operation_before, min_, max_);
+    }
+private:
+    ActivationFactory(const ActivationFactory&) = delete;
+    ActivationFactory& operator=(const ActivationFactory& ) = delete;
+private:
+    const double min_;
+    const double max_;
+};
+
+using ActivationFactoryPtr = std::shared_ptr<IActivationFactory>;
+
+template <typename T, typename ... Args>
+ActivationFactoryPtr createActivationFactory(Args&& ... args) {
+    return std::make_shared<ActivationFactory<T>>(std::forward<Args>(args) ...);
+}
+
+static std::shared_ptr<ngraph::Function> createFunction(const ngraph::Shape& conv_input_shape,
+                                                        const ngraph::Shape& conv_filter_shape,
+                                                        bool with_bias,
+                                                        bool with_pool,
+                                                        ActivationFactoryPtr activation_factory,
+                                                        bool with_fq,
+                                                        bool single_reshape_before,
+                                                        bool single_reshape_after) {
+    size_t total_in = std::accumulate(std::begin(conv_input_shape), std::end(conv_input_shape), 1, std::multiplies<int>());
+    auto input = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{1, total_in});
+    std::shared_ptr<ngraph::Node> last_node, last_const;
+    auto add_fake_quantize = [&](const std::shared_ptr<ngraph::Node>& node) {
+        auto input_low = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1});
+        auto input_high = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {5});
+        auto output_low = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0});
+        auto output_high = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {10});
+        return std::make_shared<ngraph::opset8::FakeQuantize>(node, input_low, input_high, output_low, output_high, 11);
+    };
+    if (single_reshape_before) {
+        auto reshape_in_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{4}, conv_input_shape);
+        auto reshape_in = std::make_shared<ngraph::opset8::Reshape>(input, reshape_in_const, false);
+        last_node = reshape_in;
+    } else {
+        auto reshape_in_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{4},
+            ngraph::Shape{conv_input_shape[0], conv_input_shape[2], conv_input_shape[3], conv_input_shape[1]});
+        auto reshape_in = std::make_shared<ngraph::opset8::Reshape>(input, reshape_in_const, false);
+        auto transpose_in_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{4}, ngraph::Shape{0, 3, 1, 2});
+        auto transpose_in = std::make_shared<ngraph::opset8::Transpose>(reshape_in, transpose_in_const);
+        last_node = transpose_in;
+    }
+    auto conv_weights = ngraph::opset8::Constant::create(ngraph::element::f32, conv_filter_shape, {1});
+    last_const = conv_weights;
+    if (with_fq) {
+        auto conv_input_fq = add_fake_quantize(last_node);
+        last_node = conv_input_fq;
+        auto conv_weights_fq = add_fake_quantize(conv_weights);
+        last_const = conv_weights_fq;
+    }
+    auto conv = std::make_shared<ngraph::opset8::Convolution>(last_node,
+                                                              last_const,
+                                                              ngraph::Strides{1, 1},
+                                                              ngraph::CoordinateDiff{0, 0},
+                                                              ngraph::CoordinateDiff{0, 0},
+                                                              ngraph::Strides{1, 1});
+    last_node = conv;
+    auto conv_output_shape = conv->get_output_shape(0);
+    size_t total_out = std::accumulate(std::begin(conv_output_shape), std::end(conv_output_shape), 1, std::multiplies<int>());
+    if (with_bias) {
+        auto add_const = ngraph::opset8::Constant::create(ngraph::element::f32, ngraph::Shape{1, conv_output_shape.at(1), 1, 1}, {1});
+        auto add = std::make_shared<ngraph::opset8::Add>(conv, add_const);
+        last_node = add;
+    }
+    if (with_fq) {
+        auto conv_bias_fq = add_fake_quantize(last_node);
+        last_node = conv_bias_fq;
+    }
+    if (with_pool) {
+        auto pool = std::make_shared<ngraph::opset7::MaxPool>(last_node,
+            ngraph::Strides{1, 1}, ngraph::Shape{0, 0}, ngraph::Shape{0, 0}, ngraph::Shape{1, 1});
+        last_node = pool;
+    }
+    if (activation_factory) {
+        if (with_fq) {
+            auto act_fq_in = add_fake_quantize(last_node);
+            last_node = act_fq_in;
+        }
+        auto act = activation_factory->createNode(last_node);
+        last_node = act;
+        if (with_fq) {
+            auto act_fq_out = add_fake_quantize(last_node);
+            last_node = act_fq_out;
+        }
+    }
+    auto reshape_out_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, total_out});
+    if (!single_reshape_after) {
+        auto transpose_out_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{4}, ngraph::Shape{0, 2, 3, 1});
+        auto transpose_out = std::make_shared<ngraph::opset8::Transpose>(last_node, transpose_out_const);
+        last_node = transpose_out;
+    }
+    auto reshape_out = std::make_shared<ngraph::opset8::Reshape>(last_node, reshape_out_const, false);
+
+    auto result = std::make_shared<ngraph::opset8::Result>(reshape_out);
+    auto func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input});
+
+    return func;
+}
+
+typedef std::tuple<
+        std::tuple<ngraph::Shape, ngraph::Shape, bool, bool>,
+        bool,                               // with bias
+        bool,                               // with pooling
+        ActivationFactoryPtr,               // with activation
+        bool                                // with fq
+> UnfuseReshapeAndTransposeParams;
+
+class UnfuseReshapeAndTransposeTestSuiteFixture: public CommonTestUtils::TestsCommon,
+                               public ::testing::WithParamInterface<UnfuseReshapeAndTransposeParams> {
+public:
+    void SetUp() override;
+public:
+    std::shared_ptr<ngraph::Function> function, reference_function;
+};
+
+void UnfuseReshapeAndTransposeTestSuiteFixture::SetUp() {
+    std::tuple<ngraph::Shape, ngraph::Shape, bool, bool> conv_data;
+    bool with_bias;
+    bool with_pool;
+    bool with_fq;
+    ActivationFactoryPtr af;
+    std::tie(conv_data, with_bias, with_pool, af, with_fq) = this->GetParam();
+    ngraph::Shape conv_input_shape;
+    ngraph::Shape conv_filter_shape;
+    bool replace_before;
+    bool replace_after;
+    std::tie(conv_input_shape, conv_filter_shape, replace_before, replace_after) = conv_data;
+    function = createFunction(conv_input_shape, conv_filter_shape, with_bias, with_pool, af, with_fq, true, true);
+    reference_function = createFunction(conv_input_shape, conv_filter_shape, with_bias, with_pool, af, with_fq, !replace_before, !replace_after);
+}
+
+void execute_test(std::shared_ptr<ngraph::Function> function,
+                  std::shared_ptr<ngraph::Function> reference_function) {
+    ngraph::pass::Manager manager;
+    manager.register_pass<ngraph::pass::InitNodeInfo>();
+    manager.register_pass<GNAPluginNS::Unfuse2dto4dReshapeAndTranspose>();
+    manager.register_pass<GNAPluginNS::Unfuse4dto2dReshapeAndTranspose>();
+    manager.run_passes(function);
+    const FunctionsComparator func_comparator = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES);
+    const FunctionsComparator::Result result = func_comparator(function, reference_function);
+    ASSERT_TRUE(result.valid) << result.message;
+}
+
+TEST_P(UnfuseReshapeAndTransposeTestSuiteFixture, CompareFunctions) {
+    execute_test(function, reference_function);
+}
+
+const std::vector<ActivationFactoryPtr> activationFactories = {
+    nullptr,
+    createActivationFactory<ngraph::opset8::Relu>(),
+    createActivationFactory<ngraph::opset8::Sigmoid>(),
+    createActivationFactory<ngraph::opset8::Tanh>(),
+    createActivationFactory<ngraph::opset8::Abs>(),
+    createActivationFactory<ngraph::opset8::Log>(),
+    createActivationFactory<ngraph::opset8::Exp>(),
+    createActivationFactory<ngraph::opset8::Sign>(),
+    createActivationFactory<ngraph::opset8::Clamp>(0.1, 0.2)
+};
+
+INSTANTIATE_TEST_SUITE_P(UnfuseReshapeAndTransposeTestSuite, UnfuseReshapeAndTransposeTestSuiteFixture,
+                        ::testing::Combine(
+                            ::testing::ValuesIn(
+                                std::vector<std::tuple<ngraph::Shape, ngraph::Shape, bool, bool>>{
+                                            {ngraph::Shape{1, 1, 1, 168}, ngraph::Shape{12, 1, 1, 8}, true, false},
+                                            {ngraph::Shape{1, 1, 1, 640}, ngraph::Shape{256, 1, 1, 512}, true, false},
+                                            {ngraph::Shape{1, 1, 1, 1024}, ngraph::Shape{256, 1, 1, 512}, true, false},
+                                            {ngraph::Shape{1, 1, 33, 32}, ngraph::Shape{128, 1, 33, 9}, true, false},
+                                            {ngraph::Shape{1, 1, 11, 13}, ngraph::Shape{128, 1, 11, 9}, true, false},
+                                            {ngraph::Shape{1, 1, 33, 23}, ngraph::Shape{128, 1, 11, 5}, true, false},
+                                            {ngraph::Shape{1, 1, 33, 32}, ngraph::Shape{1, 1, 33, 9}, true, true},
+                                            {ngraph::Shape{1, 1, 1, 1024}, ngraph::Shape{256, 1, 1, 1024}, true, true},
+                                            {ngraph::Shape{1, 1, 33, 32}, ngraph::Shape{1, 1, 33, 9}, true, true}}),
+                            ::testing::ValuesIn(std::vector<bool>{true, false}),   // with bias
+                            ::testing::ValuesIn(std::vector<bool>{true, false}),   // with max pool
+                            ::testing::ValuesIn(activationFactories),              // with activation
+                            ::testing::ValuesIn(std::vector<bool>{true, false}))); // with fq
+
+} // namespace
+} // namespace testing
diff --git a/src/plugins/intel_gna/gna_graph_compiler.cpp b/src/plugins/intel_gna/gna_graph_compiler.cpp
@@ -266,13 +266,21 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
         std::swap(convolution._dilation_x, convolution._dilation_y);
     }
 
+    auto in_kernel_w = convolution._kernel_x;
+    auto in_kernel_h = convolution._kernel_y;
+    bool transpose_h_w = false;
+
     // Map 2d convolution to 1d if it's possible
-    if (GNAConvolutionLayer::isMappableFrom2DTo1D(in_height, in_width, convolution._kernel_x, convolution._stride_x)) {
+    if (GNAConvolutionLayer::isMappableFrom2DTo1D(in_height, in_width, in_channels,
+                                                  convolution._kernel_y, convolution._kernel_x,
+                                                  convolution._stride_y, convolution._stride_x)) {
+        transpose_h_w = (in_height == convolution._kernel_y);
         in_width *= in_height;
         in_height = 1;
         out_width *= out_height;
         out_height = 1;
-        convolution._stride_x *= (convolution._stride_y * convolution._kernel_x);
+        convolution._stride_x *= transpose_h_w ? (convolution._stride_y * convolution._kernel_y) :
+                                                 (convolution._stride_y * convolution._kernel_x);
         convolution._kernel_x *= convolution._kernel_y;
         convolution._kernel_y = 1;
     }
@@ -304,19 +312,20 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
         in_height != 1) {
         // TensorFlow default layout is NHWC
         // OpenVino Default layout is   NCHW
-        // GNA Convolution input is     NHCW
+        // GNA Convolution input is     NHCW (old) or NHWC (new)
         // When layer layout is in NHWC it means that is was created by PassManager
         return finalizeConvolution2DPrimitive(layer, in_batch, in_channels, in_height, in_width,
                                               out_batch, out_channels, out_height, out_width);
         THROW_GNA_LAYER_EXCEPTION(layer) << "Convolution 2D is not supported on GNA 1.0 library";
     }
     finalizeConvolution1DPrimitive(layer, in_batch, in_channels, in_width,
-                                   out_batch, out_channels, out_width);
+                                   out_batch, out_channels, out_width, in_kernel_w, in_kernel_h, transpose_h_w);
 }
 
 void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerPtr layer,
     uint32_t in_batch, uint32_t in_channels, uint32_t in_width,
-    uint32_t out_batch, uint32_t out_channels, uint32_t out_width) {
+    uint32_t out_batch, uint32_t out_channels, uint32_t out_width,
+    uint32_t in_kernel_w, uint32_t in_kernel_h, bool transpose_h_w) {
     auto& convolution = dynamic_cast<ConvolutionLayer&>(*layer.get());
     printConvolutionLayer(convolution);
 
@@ -429,7 +438,10 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
         ptr_weights,
         ptr_biases);
 
-    if (inputs->getLayout() == Layout::NHWC) {
+    // Keep both variants of kaldi models working:
+    // Old one has layout which is different from NHWC
+    // New one has layout NHWC, but it is mapped from 2d by H
+    if (inputs->getLayout() == Layout::NHWC && !transpose_h_w) {
         currentComponent.orientation_in  = kDnnInterleavedOrientation;
         currentComponent.orientation_out = kDnnInterleavedOrientation;
     }
@@ -447,7 +459,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
 
     // TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that
     if (!dnn->do_rotate_input) {
-        if (inputs->getLayout() != Layout::NHWC && LayerInfo(connectedInputLayer).isInput()) {
+         if ((inputs->getLayout() != Layout::NHWC || transpose_h_w) && LayerInfo(connectedInputLayer).isInput()) {
             //  Kaldi features are opposite orientation
             dnn->do_rotate_input = true;
             dnn->num_rotate_rows = effectiveStride;
@@ -459,12 +471,16 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
 
     connectOutput(layer, ptr_outputs, num_data_bytes_out);
 
+    // Transpose H with W or C with HW
+    auto A = transpose_h_w ? in_kernel_h : in_channels;
+    auto B = transpose_h_w ? in_kernel_w : convolution._kernel[X_AXIS];
+
     std::vector<uint8_t> transposedWeights;
     for (uint32_t k = 0; k < convolution._out_depth; k++) {
         uint8_t * ptr_filt_current
             = convolution._weights->cbuffer().as<uint8_t*>() +
-            k * in_channels * convolution._kernel[X_AXIS] * convolution.precision.size();
-        auto transposedPart = transposeMatrix(ptr_filt_current, convolution.precision.size(), in_channels, convolution._kernel[X_AXIS]);
+            k * A * B * convolution.precision.size();
+        auto transposedPart = transposeMatrix(ptr_filt_current, convolution.precision.size(), A, B);
         transposedWeights.insert(transposedWeights.end(), transposedPart.begin(), transposedPart.end());
     }
     if (transposedWeights.size() != convolution._weights->byteSize()) {

diff --git a/src/plugins/intel_gna/gna_graph_compiler.hpp b/src/plugins/intel_gna/gna_graph_compiler.hpp
@@ -128,8 +128,8 @@ class GNAGraphCompiler {
 
     void finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerPtr,
         uint32_t in_batch, uint32_t in_channels, uint32_t in_width,
-        uint32_t out_batch, uint32_t out_channels, uint32_t out_width);
-
+        uint32_t out_batch, uint32_t out_channels, uint32_t out_width,
+        uint32_t in_kernel_x, uint32_t in_kernel_y, bool transpose);
     void finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerPtr,
         uint32_t in_batch, uint32_t in_channels, uint32_t in_height, uint32_t in_width,
         uint32_t out_batch, uint32_t out_channels, uint32_t out_height, uint32_t out_width);