publish master branch snapshot, revision 8d31237e2c3f673cbb0f0ba110fc…

…10f5cce1d2bb
gidapataki · May 21, 2020 · deb008a · deb008a
1 parent eab7ef4
commit deb008a
Show file tree

Hide file tree

Showing 423 changed files with 11,008 additions and 5,497 deletions.
diff --git a/.gitignore b/.gitignore
@@ -16,6 +16,7 @@ build/
 .gdb_history
 .vimspector.json
 doc/
+!ngraph/doc
 docs/build_documentation/work_dir/
 inference-engine/plugins/
 inference-engine/temp
@@ -55,4 +56,16 @@ __pycache__
 /model-optimizer/!CMakeLists.txt
 /model-optimizer/*.mapping
 /model-optimizer/*.dat
-/model-optimizer/*.svg
+/model-optimizer/*.svg
+
+# ngraph
+ngraph/src/CPackConfig.cmake
+ngraph/src/CPackSourceConfig.cmake
+ngraph/src/VERSION
+ngraph/src/gtest/
+ngraph/src/json/
+ngraph/src/ngraphConfig.cmake
+ngraph/src/ngraphConfigVersion.cmake
+ngraph/src/protobuf/
+ngraph/src/src/
+ngraph/src/test/
diff --git a/cmake/developer_package.cmake b/cmake/developer_package.cmake
@@ -64,12 +64,11 @@ endmacro()
 
 macro(ie_cpack)
     set(CPACK_GENERATOR "TGZ")
+    string(REPLACE "/" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
     if(WIN32)
         set(CPACK_PACKAGE_NAME inference-engine_${CMAKE_BUILD_TYPE})
-        string(REPLACE "\\" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
     else()
         set(CPACK_PACKAGE_NAME inference-engine)
-        string(REPLACE "/" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
     endif()
     set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF)
     set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)

diff --git a/inference-engine/CMakeLists.txt b/inference-engine/CMakeLists.txt
@@ -159,6 +159,17 @@ if(ENABLE_PYTHON)
             COMPONENT python_samples)
 endif()
 
+# install speech demo files
+
+if(SPEECH_LIBS_AND_DEMOS)
+    ie_cpack_add_component(speech_demo_files REQUIRED)
+
+    install(DIRECTORY ${TEMP}/deployment_tools
+                      ${TEMP}/data_processing
+            DESTINATION .
+            COMPONENT speech_demo_files)
+endif()
+
 #
 # Developer package
 #

diff --git a/inference-engine/ie_bridges/python/CMakeLists.txt b/inference-engine/ie_bridges/python/CMakeLists.txt
@@ -57,7 +57,7 @@ add_subdirectory (src/openvino/inference_engine)
 
 # Check Cython version
 if("${CYTHON_VERSION}" VERSION_LESS "0.29")
-    message(FATAL_ERROR "OpenVINO Python API needs at least Cython version 0.29, found verson ${CYTHON_VERSION}")
+    message(FATAL_ERROR "OpenVINO Python API needs at least Cython version 0.29, found version ${CYTHON_VERSION}")
 else()
     message(STATUS "Found Cython version ${CYTHON_VERSION}")
 endif()

diff --git a/inference-engine/ie_bridges/python/cmake/FindCython.cmake b/inference-engine/ie_bridges/python/cmake/FindCython.cmake
@@ -58,6 +58,6 @@ FIND_PACKAGE_HANDLE_STANDARD_ARGS( Cython REQUIRED_VARS CYTHON_EXECUTABLE )
 
 # Find Cython version
 execute_process(COMMAND ${CYTHON_EXECUTABLE} -V ERROR_VARIABLE CYTHON_OUTPUT OUTPUT_QUIET)
-string(REGEX REPLACE "^Cython version ([0-9]+\\.[0-9]+\\.[0-9]+).*" "\\1" CYTHON_VERSION "${CYTHON_OUTPUT}")
+string(REGEX REPLACE "^Cython version ([0-9]+\\.[0-9]+(\\.[0-9]+)?).*" "\\1" CYTHON_VERSION "${CYTHON_OUTPUT}")
 
 mark_as_advanced( CYTHON_EXECUTABLE CYTHON_VERSION )
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
@@ -23,6 +23,7 @@ foreach(PYX_FILE ${OTHER_SOURCES})
     get_filename_component(PYX_NAME "${PYX_FILE}" NAME_WE)
     set_source_files_properties(${PYX_FILE} PROPERTIES CYTHON_IS_CXX ON)
     cython_add_module(${PYX_NAME} ${PYX_FILE})
+    add_dependencies(${TARGET_NAME} ${PYX_NAME})
     target_include_directories(${PYX_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
     target_link_libraries(${PYX_NAME} PRIVATE ${InferenceEngine_LIBRARIES})
 endforeach()

diff --git a/inference-engine/ie_bridges/python/tests/test_InferRequest.py b/inference-engine/ie_bridges/python/tests/test_InferRequest.py
@@ -3,6 +3,7 @@
 import pytest
 import warnings
 import threading
+from datetime import datetime
 
 from openvino.inference_engine import ie_api as ie
 from conftest import model_path, image_path
@@ -195,11 +196,25 @@ def test_async_infer_wait_finish(device):
 def test_async_infer_wait_time(device):
     ie_core = ie.IECore()
     net = ie_core.read_network(test_net_xml, test_net_bin)
-    exec_net = ie_core.load_network(net, device, num_requests=1)
+    exec_net = ie_core.load_network(net, device, num_requests=2)
     img = read_image()
     request = exec_net.requests[0]
     request.async_infer({'data': img})
-    request.wait(100)
+    start_time = datetime.utcnow()
+    status = request.wait(ie.WaitMode.RESULT_READY)
+    assert status == ie.StatusCode.OK
+    time_delta = datetime.utcnow() - start_time
+    latency_ms = (time_delta.microseconds / 1000) + (time_delta.seconds * 1000)
+    timeout = max(100, latency_ms)
+    request = exec_net.requests[1]
+    request.async_infer({'data': img})
+    max_repeat = 10
+    status = ie.StatusCode.REQUEST_BUSY
+    i = 0
+    while i < max_repeat and status != ie.StatusCode.OK:
+        status = request.wait(timeout)
+        i += 1
+    assert status == ie.StatusCode.OK
     res = request.output_blobs['fc_out'].buffer
     assert np.argmax(res) == 2
     del exec_net

diff --git a/inference-engine/samples/benchmark_app/benchmark_app.hpp b/inference-engine/samples/benchmark_app/benchmark_app.hpp
@@ -100,6 +100,9 @@ static const char dump_config_message[] = "Optional. Path to XML/YAML/JSON file
 static const char shape_message[] = "Optional. Set shape for input. For example, \"input1[1,3,224,224],input2[1,4]\" or \"[1,3,224,224]\""
                                     " in case of one input size.";
 
+// @brief message for quantization bits
+static const char gna_qb_message[] = "Optional. Weight bits for quantization:  8 or 16 (default)";
+
 /// @brief Define flag for showing help message <br>
 DEFINE_bool(h, false, help_message);
 
@@ -184,6 +187,9 @@ DEFINE_string(dump_config, "", dump_config_message);
 /// @brief Define flag for input shape <br>
 DEFINE_string(shape, "", shape_message);
 
+/// @brief Define flag for quantization bits (default 16)
+DEFINE_int32(qb, 16, gna_qb_message);
+
 /**
 * @brief This function show a help message
 */
@@ -221,4 +227,5 @@ static void showUsage() {
     std::cout << "    -dump_config              " << dump_config_message << std::endl;
     std::cout << "    -load_config              " << load_config_message << std::endl;
 #endif
+    std::cout << "    -qb                       " << gna_qb_message << std::endl;
 }
diff --git a/inference-engine/samples/benchmark_app/main.cpp b/inference-engine/samples/benchmark_app/main.cpp
@@ -13,6 +13,7 @@
 #include <inference_engine.hpp>
 #include <vpu/vpu_plugin_config.hpp>
 #include <cldnn/cldnn_config.hpp>
+#include <gna/gna_config.hpp>
 #include <samples/common.hpp>
 #include <samples/slog.hpp>
 #include <samples/args_helper.hpp>
@@ -274,6 +275,14 @@ int main(int argc, char *argv[]) {
                 }
             } else if (device == "MYRIAD") {
                 device_config[CONFIG_KEY(LOG_LEVEL)] = CONFIG_VALUE(LOG_WARNING);
+            } else if (device == "GNA") {
+                if (FLAGS_qb == 8)
+                    device_config[GNA_CONFIG_KEY(PRECISION)] = "I8";
+                else
+                    device_config[GNA_CONFIG_KEY(PRECISION)] = "I16";
+
+                if (isFlagSetInCommandLine("nthreads"))
+                    device_config[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(FLAGS_nthreads);
             }
         }
 

diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@@ -24,6 +24,7 @@
 #include "details/caseless.hpp"
 #include <details/ie_cnn_network_tools.h>
 #include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
 #include <ngraph/op/fused/gelu.hpp>
 #include <generic_ie.hpp>
 #include <transformations/common_optimizations/common_optimizations.hpp>
@@ -73,7 +74,8 @@ InferenceEngine::ICNNNetwork::Ptr clDNNEngine::CloneNetwork(const InferenceEngin
     std::shared_ptr<ICNNNetwork> clonedNetwork(nullptr);
     if (network.getFunction()) {
         const auto transformations_callback = [](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
-            return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) != nullptr;
+            return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) ||
+                   std::dynamic_pointer_cast<const ::ngraph::opset3::ShuffleChannels>(node);
         };
         CNNNetwork net(network.getFunction());
         auto nGraphFunc = net.getFunction();

diff --git a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
@@ -17,6 +17,8 @@
 #include "blob_factory.hpp"
 #include "precision_ex.hpp"
 #include "layers/gna_layer_info.hpp"
+#include "weights_converter.hpp"
+#include "layer_transform.hpp"
 
 namespace GNAPluginNS {
 namespace frontend {
@@ -137,6 +139,48 @@ class Quant<QuantI8> {
     }
 };
 
+template <typename T>
+inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) {
+    auto prec_blob = InferenceEngine::make_shared_blob<T>({ precision,
+        fp32_blob->getTensorDesc().getDims(), fp32_blob->getTensorDesc().getLayout() });
+    prec_blob->allocate();
+
+    int i = 0;
+    for (auto& precValue : *prec_blob) {
+        auto f32Value = fp32_blob->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type*>()[i++] * scale_factor;
+        if (f32Value > std::numeric_limits<T>::max()) {
+            precValue = std::numeric_limits<T>::max();
+        } else if (f32Value < std::numeric_limits<T>::min()) {
+            precValue = std::numeric_limits<T>::min();
+        } else {
+            precValue = static_cast<T>(f32Value);
+        }
+    }
+
+    return  static_cast<InferenceEngine::Blob::Ptr>(prec_blob);
+}
+
+inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) {
+    InferenceEngine::Blob::Ptr result_ptr = nullptr;
+    switch (precision) {
+    case InferenceEngine::Precision::FP32:
+        result_ptr = fp32_to_precision_blob<float>(fp32_blob, precision, scale_factor);
+        break;
+    case InferenceEngine::Precision::I32:
+        result_ptr = fp32_to_precision_blob<int32_t>(fp32_blob, precision, scale_factor);
+        break;
+    case InferenceEngine::Precision::I16:
+        result_ptr = fp32_to_precision_blob<int16_t>(fp32_blob, precision, scale_factor);
+        break;
+    case InferenceEngine::Precision::I8:
+        result_ptr = fp32_to_precision_blob<int8_t>(fp32_blob, precision, scale_factor);
+        break;
+    default:
+        THROW_GNA_EXCEPTION << "FP32 to " << precision << " not supported";
+    }
+    return result_ptr;
+}
+
 template<class QuantDesc, class QuantFunc>
 inline void quantizeWeightsBiases(const QuantDesc & quantDesc,
                                   InferenceEngine::WeightableLayer *wl,
@@ -389,6 +433,18 @@ class DataQuantizer<Desc, InferenceEngine::CNNLayer *> : public DataQuantizerBas
         }
         cnnLayer->precision = Desc::mandatory().getInputPrecision();
 
+        if (cnnLayer->type == "Const") {
+            if (cnnLayer->blobs["custom"]->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
+                cnnLayer->blobs["custom"] = make_fp32_blob(cnnLayer->blobs["custom"]);
+            }
+            auto const_scale_factor = InferenceEngine::getInjectedData<QuantizedLayerParams>(*cnnLayer)->_dst_quant.scale;
+            auto new_const_blob = InferenceEngine::Blob::CreateFromData(cnnLayer->outData[0]);
+            auto const_blob = cnnLayer->blobs["custom"];
+            if (const_blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) {
+                cnnLayer->blobs["custom"] = fp32_to_precision_blob(const_blob, cnnLayer->outData[0]->getPrecision(), const_scale_factor);
+            }
+        }
+
         return true;
     }
 };

diff --git a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@@ -197,6 +197,36 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
             return true;
         }
 
+        if (cnnLayer->type == "Const") {
+            auto blob = cnnLayer->blobs["custom"];
+            if (blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
+                blob = make_fp32_blob(blob);
+            }
+            auto max_val = std::numeric_limits<float>::min();
+            auto min_val = std::numeric_limits<float>::max();
+
+            auto flt_buf = blob->buffer().as<float*>();
+            auto size = blob->size();
+
+            for (int i=0; i < size; i++) {
+                auto val = flt_buf[i];
+                if (val > max_val) max_val = val;
+                if (val < min_val) min_val = val;
+            }
+
+            auto abs_val = std::max(std::abs(max_val), std::abs(min_val));
+            auto scale_val = static_cast<float>(std::numeric_limits<int16_t>::max()) / abs_val;
+
+            // TODO: Investigate what should be the scale in such cases (31910)
+            if (std::isinf(scale_val)) {
+                quant->_dst_quant.scale = quant->_src_quant.scale;
+            } else {
+                quant->_dst_quant.scale = scale_val;
+            }
+
+            return ScaleFactorUpdateResult();
+        }
+
         if (!CNNNetHasPrevLayer(cnnLayer)) {
             quant->_dst_quant.scale = quant->_src_quant.scale;
             return ScaleFactorUpdateResult();
@@ -231,6 +261,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
 
         auto quantParams0 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in0);
         auto quantParams1 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in1);
+
         auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*eltwiseLayer);
 
         switch (eltwiseLayer->_operation) {
@@ -239,6 +270,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
                 quantData->_dst_quant.scale     = quantParams0->_dst_quant.scale * quantParams1->_dst_quant.scale;
                 break;
             }
+            case InferenceEngine::EltwiseLayer::Sub:
             case InferenceEngine::EltwiseLayer::Sum: {
                 // detect which input will be used as biases
                 if (LayerInfo(in0).has32BOutput()) {
@@ -247,6 +279,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
                 }
 
                 // this path might result in significant data loss
+                quantData->_bias_quant.scale = quantParams1->_dst_quant.scale / quantParams0->_dst_quant.scale;
                 quantData->_weights_quant.scale = quantParams1->_dst_quant.scale / quantParams0->_dst_quant.scale;
                 quantData->_dst_quant.scale = quantParams1->_dst_quant.scale;
 

diff --git a/inference-engine/src/gna_plugin/frontend/weights_converter.hpp b/inference-engine/src/gna_plugin/frontend/weights_converter.hpp
@@ -7,22 +7,28 @@
 #include "quantized_layer_params.hpp"
 #include "precision_utils.h"
 
+inline InferenceEngine::Blob::Ptr make_fp32_blob(InferenceEngine::Blob::Ptr fp16_blob) {
+    auto fp32_blob = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
+         fp16_blob->getTensorDesc().getDims(), fp16_blob->getTensorDesc().getLayout() });
+    fp32_blob->allocate();
+
+    int i = 0;
+    for (auto& f32Value : *fp32_blob) {
+        auto f16Value = fp16_blob->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type*>()[i++];
+        f32Value = InferenceEngine::PrecisionUtils::f16tof32(f16Value);
+    }
+
+    return static_cast<InferenceEngine::Blob::Ptr>(fp32_blob);
+}
+
 inline void fp16_to_fp32(InferenceEngine::WeightableLayer *lp) {
     InferenceEngine::BlobMap newBlobs;
     for (auto& blob : lp->blobs) {
         if (blob.second->getTensorDesc().getPrecision() != InferenceEngine::Precision::FP16) {
             THROW_GNA_EXCEPTION << "Unsupported precision. Layer: " << lp->name << " , Blob: " << blob.first;
         }
-        auto tmp =
-                InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                    blob.second->getTensorDesc().getDims(), InferenceEngine::Layout::C });
-        tmp->allocate();
-        int i = 0;
-        for (auto& f32Value : *tmp) {
-            auto f16Value = blob.second->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type*>()[i++];
-            f32Value = InferenceEngine::PrecisionUtils::f16tof32(f16Value);
-        }
-        newBlobs[blob.first] = tmp;
+        auto fp32_blob = make_fp32_blob(blob.second);
+        newBlobs[blob.first] = fp32_blob;
     }
     lp->_biases = newBlobs["biases"];
     lp->_weights = newBlobs["weights"];
@@ -44,6 +50,18 @@ inline bool convertWeights(InferenceEngine::CNNLayer* lp) {
     for (auto& dataItem : lp->outData) {
         dataItem->setPrecision(InferenceEngine::Precision::FP32);
     }
+    InferenceEngine::BlobMap newBlobs;
+    for (auto& blob_pair : lp->blobs) {
+        auto blob_name = blob_pair.first;
+        auto blob_ptr = blob_pair.second;
+        if (blob_ptr->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
+            auto new_blob = make_fp32_blob(blob_ptr);
+            newBlobs[blob_name] = new_blob;
+        } else {
+            newBlobs[blob_name] = blob_ptr;
+        }
+    }
+
     return true;
 }