Skip to content

Commit

Permalink
[IE CLDNN] Fix regression for supporting eltwise fsv broadcasting (op…
Browse files Browse the repository at this point in the history
  • Loading branch information
hyunback authored Apr 15, 2021
1 parent 5ba5e9b commit 887c8c4
Show file tree
Hide file tree
Showing 2 changed files with 141 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@

namespace kernel_selector {

static inline bool IsBroadcastingPossibleInput(const DataTensor& input, const DataTensor& output) {
if ((input.LogicalSize() == 1) ||
(input.LogicalSize() == output.Feature().v && input.Feature().v == output.Feature().v)) {
return true;
}
return false;
}

ParamsKey EltwiseKernel_b_fs_yx_fsv16::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
Expand All @@ -34,7 +42,7 @@ ParamsKey EltwiseKernel_b_fs_yx_fsv16::GetSupportedKey() const {
static inline size_t GetBlockSize(const eltwise_params& params) {
// Set blocksize 1 when broadcasting X dim
for (size_t i = 0; i < params.inputs.size(); i++) {
if (params.inputs[i].X().v == 1 && params.inputs[i].LogicalSize() != 1) {
if ((params.inputs[i].X().v == 1) && !IsBroadcastingPossibleInput(params.inputs[i], params.output)) {
return 1;
}
}
Expand All @@ -56,9 +64,9 @@ static inline bool OpHasFeatureBroadcast(const eltwise_params& params, const siz
for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) {
const auto &input = ew.inputs[input_idx];
if (input.mode == EltwiseInputMode::INPUT_BUFFER) {
if (params.inputs[input_idx].LogicalSize() != 1
&& params.inputs[input_idx].Feature().v == 1
&& params.output.Feature().v != 1) {
if (params.inputs[input_idx].LogicalSize() != 1 &&
params.inputs[input_idx].Feature().v == 1 &&
params.output.Feature().v != 1) {
return true;
}
}
Expand Down Expand Up @@ -193,31 +201,45 @@ JitConstants EltwiseKernel_b_fs_yx_fsv16::GetJitConstants(const eltwise_params&
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
}

jit.AddConstant(MakeJitConstant("ELTWISE_BROADCAST", params.broadcast));
if (params.broadcast) {
bool need_idx_safe = true;
for (size_t i = 0; i < params.inputs.size(); i++) {
if (IsBroadcastingPossibleInput(params.inputs[i], params.output)) {
need_idx_safe = false;
break;
}
}
if (need_idx_safe)
jit.AddConstant(MakeJitConstant("ELTWISE_BROADCAST", params.broadcast));
}

return jit;
}

bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& params, const optional_params& o) const {
if (!EltwiseKernelBase::Validate(params, o)) {
bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& p, const optional_params& o) const {
if (!EltwiseKernelBase::Validate(p, o)) {
return false;
}

const auto& ewParams = static_cast<const eltwise_params&>(params);
const auto& params = static_cast<const eltwise_params&>(p);

const auto& output = ewParams.output;
const auto count = params.output.PhysicalSize();

for (size_t i = 0; i < ewParams.inputs.size(); i++) {
if (ewParams.inputs[i].GetLayout() != DataLayout::b_fs_yx_fsv16 && GetBlockSize(ewParams) != 1) {
if (count % 8 != 0)
return false;

for (size_t i = 0; i < params.inputs.size(); i++) {
if ((params.inputs[i].GetLayout() != DataLayout::b_fs_yx_fsv16) &&
!IsBroadcastingPossibleInput(params.inputs[i], params.output)) {
return false;
}
}

auto input0 = ewParams.inputs[0];
auto input0 = params.inputs[0];

// Check that padding before features doesn't miss-align the blocks
auto feature_block_size = 16;
if (input0.Feature().pad.before % feature_block_size != 0 || output.Feature().pad.before % feature_block_size != 0) {
if (input0.Feature().pad.before % feature_block_size != 0 || params.output.Feature().pad.before % feature_block_size != 0) {
return false;
}

Expand All @@ -240,10 +262,10 @@ bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& params, const optional_
return same;
};

for (size_t i = 1; i < ewParams.inputs.size(); i++) {
if (ewParams.inputs[i].LogicalSize() == input0.LogicalSize() && !(compareTensors(ewParams.inputs[i], input0)))
for (size_t i = 1; i < params.inputs.size(); i++) {
if (params.inputs[i].LogicalSize() == input0.LogicalSize() && !(compareTensors(params.inputs[i], input0)))
return false;
if (ewParams.inputs[i].Feature().pad.before % feature_block_size != 0) {
if (params.inputs[i].Feature().pad.before % feature_block_size != 0) {
return false;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3198,9 +3198,6 @@ using eltwise_test_params = std::tuple<eltwise_mode, data_types, std::vector<std

template<typename T>
class BaseEltwiseTest : public ::testing::TestWithParam<T> {
};

class eltwise_test : public BaseEltwiseTest<eltwise_test_params> {
public:
template<typename T1, typename T2>
VF<float> eltwise_ref(VVVVVVF<T1> input0, VVVVVVF<T2> input1, tensor input0_size, tensor input1_size, eltwise_mode mode) {
Expand Down Expand Up @@ -3253,6 +3250,9 @@ class eltwise_test : public BaseEltwiseTest<eltwise_test_params> {
}
};

class eltwise_test : public BaseEltwiseTest<eltwise_test_params> {
};

TEST_P(eltwise_test, fsv16) {
auto p = GetParam();

Expand Down Expand Up @@ -3322,6 +3322,7 @@ TEST_P(eltwise_test, fsv16) {
}
}


static std::vector<eltwise_mode> modes = {eltwise_mode::sum, eltwise_mode::prod};
static std::vector<data_types> types = {data_types::f32, data_types::f16};
static std::vector<std::vector<std::vector<int32_t>>> inputs = {
Expand Down Expand Up @@ -3520,3 +3521,102 @@ INSTANTIATE_TEST_CASE_P(eltwise, eltwise_test_mixed_precision,
::testing::ValuesIn(mixed_types),
::testing::ValuesIn(inputs)
), );


struct eltwise_layout_test_params {
eltwise_mode mode;
std::vector<int32_t> input0_size;
std::vector<int32_t> input1_size;
format input0_format;
format input1_format;
std::string selected_kernel_name;
};

#define CASE_ELTWISE_TEST1 eltwise_mode::sum, {1, 2, 1, 1}, {4, 2, 4, 4}, format::b_fs_yx_fsv16, format::bfyx, "generic_eltwise_ref"
#define CASE_ELTWISE_TEST2 eltwise_mode::sum, {4, 1, 4, 4}, {1, 5, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "eltwise_b_fs_yx_fsv16"
#define CASE_ELTWISE_TEST3 eltwise_mode::sum, {4, 5, 4, 1}, {4, 1, 4, 1}, format::b_fs_yx_fsv16, format::bfyx, "generic_eltwise_ref"
#define CASE_ELTWISE_TEST4 eltwise_mode::sum, {4, 2, 4, 4}, {1, 1, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "eltwise_b_fs_yx_fsv16"
#define CASE_ELTWISE_TEST5 eltwise_mode::sum, {1, 2, 1, 1}, {4, 2, 4, 4}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref"
#define CASE_ELTWISE_TEST6 eltwise_mode::sum, {4, 1, 4, 4}, {1, 5, 1, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref"
#define CASE_ELTWISE_TEST7 eltwise_mode::sum, {4, 5, 4, 1}, {4, 1, 4, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref"
#define CASE_ELTWISE_TEST8 eltwise_mode::sum, {4, 2, 4, 4}, {1, 1, 1, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref"

class eltwise_layout_test : public BaseEltwiseTest<eltwise_layout_test_params> {
};

class eltwise_test_mixed_layout : public eltwise_layout_test {};
TEST_P(eltwise_test_mixed_layout, mixed_layout) {
auto p = GetParam();

auto mode = p.mode;
auto input0_size = p.input0_size;
auto input1_size = p.input1_size;
auto format0 = p.input0_format;
auto format1 = p.input1_format;
auto selected_kernel = p.selected_kernel_name;

int b0 = input0_size[0];
int f0 = input0_size[1];
int y0 = input0_size[2];
int x0 = input0_size[3];

int b1 = input1_size[0];
int f1 = input1_size[1];
int y1 = input1_size[2];
int x1 = input1_size[3];

int min_random = -2, max_random = 2;
VVVVVVF<float> input1_rnd = generate_random_6d<float>(b0, f0, 1, 1, y0, x0, min_random, max_random);
VVVVVVF<float> input2_rnd = generate_random_6d<float>(b1, f1, 1, 1, y1, x1, min_random, max_random);
VF<float> input1_rnd_vec = flatten_6d<float>(format::bfwzyx, input1_rnd);
VF<float> input2_rnd_vec = flatten_6d<float>(format::bfwzyx, input2_rnd);

const auto& engine = get_test_engine();
auto in0_size = tensor(format::bfyx, input0_size);
auto in1_size = tensor(format::bfyx, input1_size);

auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, in0_size });
auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, in1_size });
set_values(input1, input1_rnd_vec);
set_values(input2, input2_rnd_vec);

topology topology;
topology.add(input_layout("input1", input1.get_layout()));
topology.add(input_layout("input2", input2.get_layout()));
topology.add(reorder("reorder1", "input1", format0, data_types::f32));
topology.add(reorder("reorder2", "input2", format1, data_types::f32));
topology.add(eltwise("eltwise", {"reorder1", "reorder2"}, mode));
topology.add(reorder("out", "eltwise", format::bfyx, data_types::f32));
primitive_id out_id = "out";

network network(engine, topology);

network.set_input_data("input1", input1);
network.set_input_data("input2", input2);
auto outputs = network.execute();
EXPECT_EQ(outputs.size(), size_t(1));
EXPECT_EQ(outputs.begin()->first, out_id);

EXPECT_TRUE(network.get_primitive_info("eltwise").find(selected_kernel) != std::string::npos);

auto output_memory = outputs.at(out_id).get_memory();
auto output_ptr = output_memory.pointer<float>();

VF<float> output_cpu_vec = eltwise_ref(input1_rnd, input2_rnd, in0_size, in1_size, mode);
for (size_t i = 0; i < output_cpu_vec.size(); ++i) {
EXPECT_TRUE(!(std::isnan((float)output_cpu_vec[i]) && std::isnan((float)output_ptr[i])));
ASSERT_FLOAT_EQ(output_cpu_vec[i], output_ptr[i]);
}
}

INSTANTIATE_TEST_CASE_P(eltwise, eltwise_test_mixed_layout,
::testing::ValuesIn(std::vector<eltwise_layout_test_params>{
eltwise_layout_test_params{CASE_ELTWISE_TEST1},
eltwise_layout_test_params{CASE_ELTWISE_TEST2},
eltwise_layout_test_params{CASE_ELTWISE_TEST3},
eltwise_layout_test_params{CASE_ELTWISE_TEST4},
eltwise_layout_test_params{CASE_ELTWISE_TEST5},
eltwise_layout_test_params{CASE_ELTWISE_TEST6},
eltwise_layout_test_params{CASE_ELTWISE_TEST7},
eltwise_layout_test_params{CASE_ELTWISE_TEST8},
}), );

0 comments on commit 887c8c4

Please sign in to comment.