Skip to content

Commit

Permalink
[CPU] Fixed TensorIterator/Loop dynamism leftovers (openvinotoolkit#9722
Browse files Browse the repository at this point in the history
)
  • Loading branch information
a-sidorova authored Feb 1, 2022
1 parent cc19ff7 commit 44362c9
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 49 deletions.
97 changes: 61 additions & 36 deletions src/plugins/intel_cpu/src/nodes/mkldnn_tensoriterator_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ static NodeConfig make_plain_config(const std::shared_ptr<ov::Node>& op) {
return config;
}

static void redefineToMemories(const std::vector<MKLDNNMemoryPtr>& to_mems, const std::shared_ptr<MemoryDesc> new_desc) {
static void redefineToMemories(const std::vector<MKLDNNMemoryPtr>& to_mems, const MemoryDesc& new_desc) {
const auto &currDesc = to_mems.front()->getDesc();
if (currDesc.getShape().isDynamic() || currDesc.getShape().getStaticDims() != new_desc->getShape().getStaticDims()) {
if (currDesc.getShape().isDynamic() || currDesc.getShape().getStaticDims() != new_desc.getShape().getStaticDims()) {
// WA [DS] : need to rewrite it. Updated copypaste is from MKLDNNNode::redefineOutputMemory
// this path is necessary if there are several edges per one port
// in this case edge memory share same physical memory
Expand All @@ -77,6 +77,20 @@ static void redefineToMemories(const std::vector<MKLDNNMemoryPtr>& to_mems, cons
}
}

// this method get all memory ptrs of childs of one port to redefine descs for them
static std::vector<MKLDNNMemoryPtr> getToMemories(const MKLDNNNode* node, const size_t port) {
std::vector<MKLDNNMemoryPtr> memories;
for (auto& edge : node->getChildEdgesAtPort(port))
memories.push_back(edge->getMemoryPtr());
return memories;
}

static void nullifyUndefinedDims(VectorDims& dims) {
std::transform(dims.begin(), dims.end(), dims.begin(), [](const size_t& dim) {
return dim == Shape::UNDEFINED_DIM ? 0 : dim;
});
}

class PortIteratorHelper : public PortMapHelper {
public:
PortIteratorHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to, bool sliced_src,
Expand Down Expand Up @@ -223,8 +237,8 @@ class staticValueCheck : public PortChecker {
int value;
};

DynamicBuffer::DynamicBuffer(const MKLDNNMemoryPtr &from, const std::vector<MKLDNNMemoryPtr> &to,
const PortMap &map_rule) : from(from), to(to), map_rule(map_rule) {
DynamicBuffer::DynamicBuffer(const MKLDNNMemoryPtr &from_, const std::vector<MKLDNNMemoryPtr> &to_,
const PortMap &map_rule_) : from(from_), to(to_), map_rule(map_rule_) {
elem_size = MKLDNNExtensionUtils::sizeOfDataType(from->GetDataType());
}

Expand Down Expand Up @@ -305,11 +319,21 @@ void DynamicBuffer::move_data() {
}

void DynamicBuffer::transfer(const MKLDNNNode* node) {
const auto desc = node->getBaseMemDescAtOutputPort(map_rule.from)->cloneWithNewDims(
MKLDNNExtensionUtils::convertToVectorDims(mem_holder_buffer->get_desc().dims()));
redefineToMemories(to, desc);
if (mem_holder_buffer) {
const auto desc = node->getBaseMemDescAtOutputPort(map_rule.from)->cloneWithNewDims(
MKLDNNExtensionUtils::convertToVectorDims(mem_holder_buffer->get_desc().dims()));
redefineToMemories(to, *desc);

copy(get_ptr(*mem_holder_buffer.get()), reinterpret_cast<uint8_t*>(to.front()->GetPtr()), 0, 0, 1, to.front()->GetSize());
copy(get_ptr(*mem_holder_buffer.get()), reinterpret_cast<uint8_t*>(to.front()->GetPtr()), 0, 0, 1, to.front()->GetSize());
} else {
VectorDims newDims = to.front()->GetShape().getDims();
nullifyUndefinedDims(newDims);

const auto desc = node->getBaseMemDescAtOutputPort(map_rule.from)->cloneWithNewDims(newDims);
redefineToMemories(to, *desc);
}

mem_holder_buffer.reset();
}

void DynamicBuffer::copy(const uint8_t* src, uint8_t* dst, const size_t src_stride, const size_t dst_stride, const size_t count, const size_t len) {
Expand Down Expand Up @@ -455,8 +479,10 @@ void MKLDNNTensorIteratorNode::initSupportedPrimitiveDescriptors() {
void MKLDNNTensorIteratorNode::createPrimitive() {
if (loopBodyConditionOutputIdx == -1)
continue_cond_check.reset(new staticValueCheck(true)); // always true
if (loopExecutionConditionIdx == -1)
if (loopExecutionConditionIdx == -1) {
initial_cond_check.reset(new staticValueCheck(true));
lastUsedCond = initial_cond_check->getStatus();
}

if (isDynamicNode())
prepareDynamicBuffers();
Expand All @@ -476,22 +502,24 @@ bool MKLDNNTensorIteratorNode::needPrepareParams() const {
}

void MKLDNNTensorIteratorNode::prepareParams() {
reshapeSubgraphInput();
prepareTripCount();
prepareInitialCond();

first_mappers.clear();
before_mappers.clear();
back_mappers.clear();

prepareInputPorts();
prepareInitialCond();
prepareContinueCond();
prepareTripCount();
// special purpose ports
prepareLoopBodyCurrentIteration();
if ((lastUsedCond && lastUsedTripCount != 0) || !isDynamicNode()) {
reshapeSubgraphInput();

prepareInputPorts();
prepareContinueCond();
prepareLoopBodyCurrentIteration();

if (!isDynamicNode()) {
prepareOutputPorts();
prepareBackEdges();
if (!isDynamicNode()) {
prepareOutputPorts();
prepareBackEdges();
}
}
}

Expand Down Expand Up @@ -534,9 +562,6 @@ void MKLDNNTensorIteratorNode::executeDynamicImpl(mkldnn::stream strm) {
for (auto &mapper : first_mappers)
mapper->execute(strm);

if (!continue_cond || max_num_iter == 0)
THROW_ERROR << "has incorrect iteration count for dynamic execution";

// use "i != max_num_iter" only to allow "-1" works like infinite loop
for (int i = 0; i != max_num_iter && continue_cond; i++) {
// copy data to subgraph iteration
Expand Down Expand Up @@ -566,7 +591,7 @@ void MKLDNNTensorIteratorNode::prepareInputPorts() {
const auto &eng = getEngine();
for (auto map_rule : inputPortMap) {
auto &from_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
auto &to_mem = input_mems[map_rule.to].front(); // first memory is enough to get common memory ptr
auto &to_mem = input_mems[map_rule.to].front(); // first memory is enough to access the shared underlying physical memory

if (map_rule.axis == -1)
first_mappers.emplace_back(std::make_shared<BackEdgePortHelper>(from_mem, to_mem, eng));
Expand Down Expand Up @@ -607,7 +632,7 @@ void MKLDNNTensorIteratorNode::prepareDynamicBackEdges() {
auto to_mems = input_mems[map_rule.to];

const auto& desc = from_mem->getDesc();
redefineToMemories(to_mems, desc.clone());
redefineToMemories(to_mems, desc);

// first memory is enough to get common memory ptr
back_mappers.emplace_back(std::make_shared<BackEdgePortHelper>(from_mem, to_mems.front(), eng));
Expand Down Expand Up @@ -668,7 +693,7 @@ void MKLDNNTensorIteratorNode::reshapeSubgraphInput() {
new_dims[map_rule.axis] = abs(map_rule.stride);

const auto desc = std::make_shared<CpuBlockedMemoryDesc>(to_mems.front()->getDesc().getPrecision(), Shape(new_dims));
redefineToMemories(to_mems, desc);
redefineToMemories(to_mems, *desc);
}
}

Expand All @@ -679,11 +704,18 @@ void MKLDNNTensorIteratorNode::reshapeAndFillOutput(mkldnn::stream strm) {
auto to_mems = getToMemories(this, map_rule.from);
auto &from_mem = output_mem[map_rule.to];

const auto desc = getBaseMemDescAtOutputPort(map_rule.from)->cloneWithNewDims(from_mem->getStaticDims());
redefineToMemories(to_mems, desc);
// if Loop or TI isn't executed we should fill dynamic dims by zero
auto newShape = from_mem->GetShape();
auto newDims = newShape.getDims();
nullifyUndefinedDims(newDims);

BackEdgePortHelper mapper(from_mem, to_mems.front(), eng);
mapper.execute(strm);
const auto desc = getBaseMemDescAtOutputPort(map_rule.from)->cloneWithNewDims(newDims);
redefineToMemories(to_mems, *desc);

if (!newShape.isDynamic()) {
BackEdgePortHelper mapper(from_mem, to_mems.front(), eng);
mapper.execute(strm);
}
}
}

Expand Down Expand Up @@ -777,13 +809,6 @@ int MKLDNNTensorIteratorNode::getNumIteration(const std::vector<PortMap>& inputP
return numIterations;
}

std::vector<MKLDNNMemoryPtr> MKLDNNTensorIteratorNode::getToMemories(const MKLDNNNode* node, const size_t port) const {
std::vector<MKLDNNMemoryPtr> memories;
for (auto edge : node->getChildEdgesAtPort(port))
memories.push_back(edge->getMemoryPtr());
return memories;
}

bool MKLDNNTensorIteratorNode::created() const {
return getType() == TensorIterator;
}
Expand Down
5 changes: 1 addition & 4 deletions src/plugins/intel_cpu/src/nodes/mkldnn_tensoriterator_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class PortChecker {
*/
class DynamicBuffer {
public:
DynamicBuffer(const MKLDNNMemoryPtr &from, const std::vector<MKLDNNMemoryPtr> &to, const PortMap &map_rule);
DynamicBuffer(const MKLDNNMemoryPtr &from_, const std::vector<MKLDNNMemoryPtr> &to_, const PortMap &map_rule_);
~DynamicBuffer() = default;

void execute(const mkldnn::engine& eng, const int iter);
Expand Down Expand Up @@ -131,9 +131,6 @@ class MKLDNNTensorIteratorNode : public MKLDNNNode {
void reshapeAndFillOutput(mkldnn::stream strm);
int getNumIteration(const std::vector<PortMap>& inputPortMap, const std::vector<PortMap>& outputPortMap) const;

// this method get all memory ptrs of childs of one port to redefine descs for them
std::vector<MKLDNNMemoryPtr> getToMemories(const MKLDNNNode* node, const size_t port) const;

MKLDNNExtensionManager::Ptr ext_mng;
MKLDNNGraph sub_graph;
std::vector<std::vector<MKLDNNMemoryPtr>> input_mems;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,11 @@ std::vector<std::string> disabledTestPatterns() {
// Issue: 75022
R"(.*OVExecutableNetworkBaseTest.*LoadNetworkToDefaultDeviceNoThrow.*)",
R"(.*IEClassBasicTest.*LoadNetworkToDefaultDeviceNoThrow.*)",
// Issue: 77390
R"(.*LoopLayerCPUTest.*exec_cond=0.*)",
R"(.*LoopLayerCPUTest.*trip_count=0.*)",
R"(.*LoopForDiffShapesLayerCPUTest.*exec_cond=0.*)",
R"(.*LoopForDiffShapesLayerCPUTest.*trip_count=0.*)",
};

#define FIX_62820 0
Expand Down
28 changes: 19 additions & 9 deletions src/tests/functional/plugin/cpu/single_layer_tests/loop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ enum LOOP_IN_TYPE {
using LoopParams = typename std::tuple<
InputLayerType, // TripCount is a constant?
int64_t, // TripCount, -1 means infinity
bool, // Execution condition
std::vector<InputShape>, // InputShapes
std::vector<LOOP_IN_TYPE>, // Type
ElementType>; // Input element type
Expand All @@ -33,10 +34,11 @@ class LoopLayerCPUTest : public testing::WithParamInterface<LoopParams>,
static std::string getTestCaseName(testing::TestParamInfo<LoopParams> obj) {
InputLayerType trip_count_type;
int64_t trip_count;
bool exec_cond;
std::vector<InputShape> shapes;
std::vector<LOOP_IN_TYPE> types;
ElementType netType;
std::tie(trip_count_type, trip_count, shapes, types, netType) = obj.param;
std::tie(trip_count_type, trip_count, exec_cond, shapes, types, netType) = obj.param;

std::ostringstream result;
for (size_t i = 0; i < shapes.size(); i++) {
Expand All @@ -52,6 +54,7 @@ class LoopLayerCPUTest : public testing::WithParamInterface<LoopParams>,
result << type << "_";
result << "trip_count_type=" << trip_count_type << "_";
result << "trip_count=" << trip_count << "_";
result << "exec_cond=" << exec_cond << "_";
result << "netType=" << netType;
return result.str();
}
Expand Down Expand Up @@ -83,10 +86,11 @@ class LoopLayerCPUTest : public testing::WithParamInterface<LoopParams>,
void SetUp() override {
InputLayerType trip_count_type;
int64_t trip_count;
bool exec_cond;
std::vector<InputShape> shapes;
std::vector<LOOP_IN_TYPE> types;
ElementType netType;
std::tie(trip_count_type, trip_count, shapes, types, netType) = this->GetParam();
std::tie(trip_count_type, trip_count, exec_cond, shapes, types, netType) = this->GetParam();

targetDevice = CommonTestUtils::DEVICE_CPU;
init_input_shapes(shapes);
Expand All @@ -102,7 +106,7 @@ class LoopLayerCPUTest : public testing::WithParamInterface<LoopParams>,
}

auto body_condition_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::boolean, ngraph::Shape{1}, true);
auto exec_condition = std::make_shared<ngraph::opset5::Constant>(ngraph::element::boolean, ngraph::Shape{1}, true);
auto exec_condition = std::make_shared<ngraph::opset5::Constant>(ngraph::element::boolean, ngraph::Shape{1}, exec_cond);
std::shared_ptr<ngraph::Node> trip_count_input;
int shift = 0;
if (trip_count_type == InputLayerType::PARAMETER) {
Expand Down Expand Up @@ -163,9 +167,10 @@ class LoopWhileLayerCPUTest : public LoopLayerCPUTest {
void SetUp() override {
InputLayerType trip_count_type;
int64_t trip_count;
bool exec_cond;
std::vector<InputShape> shapes;
std::vector<LOOP_IN_TYPE> types;
std::tie(trip_count_type, trip_count, shapes, types, inType) = this->GetParam();
std::tie(trip_count_type, trip_count, exec_cond, shapes, types, inType) = this->GetParam();

targetDevice = CommonTestUtils::DEVICE_CPU;
init_input_shapes(shapes);
Expand All @@ -181,7 +186,7 @@ class LoopWhileLayerCPUTest : public LoopLayerCPUTest {
body_params.emplace_back(std::make_shared<ngraph::opset1::Parameter>(inType, pshape));
}

auto exec_condition = std::make_shared<ngraph::opset5::Constant>(ngraph::element::boolean, ngraph::Shape{}, true);
auto exec_condition = std::make_shared<ngraph::opset5::Constant>(ngraph::element::boolean, ngraph::Shape{}, exec_cond);
auto trip_count_input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::i64, ngraph::Shape{});
trip_count_input->set_friendly_name("trip_count");
params.insert(params.begin(), trip_count_input);
Expand Down Expand Up @@ -233,9 +238,10 @@ class LoopForDiffShapesLayerCPUTest : public LoopLayerCPUTest {
void SetUp() override {
InputLayerType trip_count_type;
int64_t trip_count;
bool exec_cond;
std::vector<InputShape> shapes;
std::vector<LOOP_IN_TYPE> types;
std::tie(trip_count_type, trip_count, shapes, types, inType) = this->GetParam();
std::tie(trip_count_type, trip_count, exec_cond, shapes, types, inType) = this->GetParam();

targetDevice = CommonTestUtils::DEVICE_CPU;
init_input_shapes(shapes);
Expand All @@ -251,7 +257,7 @@ class LoopForDiffShapesLayerCPUTest : public LoopLayerCPUTest {
}

auto body_condition_const = std::make_shared<ngraph::opset5::Constant>(ngraph::element::boolean, ngraph::Shape{1}, true);
auto exec_condition = std::make_shared<ngraph::opset5::Constant>(ngraph::element::boolean, ngraph::Shape{1}, true);
auto exec_condition = std::make_shared<ngraph::opset5::Constant>(ngraph::element::boolean, ngraph::Shape{1}, exec_cond);
std::shared_ptr<ngraph::Node> trip_count_input;
int shift = 0;
if (trip_count_type == InputLayerType::PARAMETER) {
Expand Down Expand Up @@ -320,10 +326,11 @@ const std::vector<ElementType> inputPrecisions = {
};

std::vector<InputLayerType> trip_count_type { InputLayerType::CONSTANT, InputLayerType::PARAMETER };
std::vector<int64_t> trip_count { 1, 5 }; // works only if trip_count_type is constant
std::vector<int64_t> trip_count { 0, 1, 5 };
std::vector<bool> exec_cond { true, false };

// dim[axis] = 1 because loop supports concatenation only with stride = part_size = 1
// first loop suit test is with output concatenation
// the first loop suit test is with output concatenation
std::vector<std::vector<InputShape>> inputs = {
{ //first test suit
{ //dynamic shape for first input
Expand Down Expand Up @@ -393,6 +400,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_LoopForCommon, LoopLayerCPUTest,
::testing::Combine(
::testing::ValuesIn(trip_count_type),
::testing::ValuesIn(trip_count),
::testing::ValuesIn(exec_cond),
::testing::ValuesIn(inputs),
::testing::Values(types),
::testing::ValuesIn(inputPrecisions)),
Expand Down Expand Up @@ -428,6 +436,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_LoopWhileCommon, LoopWhileLayerCPUTest,
::testing::Combine(
::testing::Values(trip_count_type[0]),
::testing::Values(-1),
::testing::Values(true),
::testing::ValuesIn(inputs_2),
::testing::Values(std::vector<LOOP_IN_TYPE>{}),
::testing::ValuesIn(inputPrecisions)),
Expand Down Expand Up @@ -462,6 +471,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_LoopForDiffShapesConcat, LoopForDiffShapesLayerCP
::testing::Combine(
::testing::ValuesIn(trip_count_type),
::testing::ValuesIn(trip_count),
::testing::ValuesIn(exec_cond),
::testing::ValuesIn(inputs_3),
::testing::Values(std::vector<LOOP_IN_TYPE>{}),
::testing::ValuesIn(inputPrecisions)),
Expand Down

0 comments on commit 44362c9

Please sign in to comment.