Skip to content

Commit

Permalink
Horizontally fuse contiguous operators (ROCm#1232)
Browse files Browse the repository at this point in the history
This reorders the transposes across slice to improve horizontal fusion for contiguous. This also improves eliminate_contiguous to remove contiguous better across splits.
  • Loading branch information
pfultz2 authored Jul 5, 2022
1 parent ca8a54f commit 27e980c
Show file tree
Hide file tree
Showing 4 changed files with 222 additions and 18 deletions.
54 changes: 36 additions & 18 deletions src/eliminate_contiguous.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,53 +93,71 @@ static bool try_compute_shape(instruction_ref ins,
return try_compute_shape(ins, inputs, mods);
}

void eliminate_contiguous::apply(module& m) const
template <class F>
static void remove_contiguous(const std::string& op_name, module& m, F f)
{
std::vector<instruction_ref> const_instruction;
auto last = std::prev(m.end());
std::vector<instruction_ref> const_instructions;

for(auto ins : iterator_for(m))
{
// return instruction should have inputs with standard shape
if(ins->name() == "@return")
continue;

if(ins != last and ins->outputs().empty())
continue;

if(not f(ins))
continue;

// Make a copy so we can modify it while we iterate
auto args = ins->inputs();
auto new_args = args;
auto mod_args = ins->module_inputs();

for(auto arg : ins->inputs())
{
if(arg->name() == op_name)
if(arg->name() != op_name)
continue;
auto prev = arg->inputs().front();
replace(new_args, arg, prev);
if(try_compute_shape(ins, new_args, mod_args))
{
instruction::replace_argument(ins, arg, prev);
}
else if(prev->can_eval())
{
auto prev = arg->inputs().front();
replace(new_args, arg, prev);
if(try_compute_shape(ins, new_args, mod_args))
{
instruction::replace_argument(ins, arg, prev);
}
else if(prev->can_eval())
{
const_instruction.push_back(arg);
}
const_instructions.push_back(arg);
}
}
}

// Perform evaluations in parallel
std::vector<argument> literals(const_instruction.size());
par_for(const_instruction.size(), 1, [&](const auto i) {
std::vector<argument> literals(const_instructions.size());
par_for(const_instructions.size(), 1, [&](const auto i) {
auto c = op::contiguous{};
auto prev = const_instruction[i]->inputs().front();
auto prev = const_instructions[i]->inputs().front();
literals[i] = c.compute(c.compute_shape({prev->get_shape()}), {prev->eval()});
});

for(size_t i = 0; i < const_instruction.size(); i++)
for(size_t i = 0; i < const_instructions.size(); i++)
{
auto l = m.add_literal(literals[i].get_shape(), literals[i].data());
m.replace_instruction(const_instruction[i], l);
m.replace_instruction(const_instructions[i], l);
}
}

void eliminate_contiguous::apply(module& m) const
{
// Skip contiguous from splits first
remove_contiguous(op_name, m, [](auto ins) {
if(ins->name() != "slice")
return true;
return (ins->inputs().front()->outputs().size() == 1);
});
remove_contiguous(op_name, m, [](auto) { return true; });
}

} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
64 changes: 64 additions & 0 deletions src/simplify_reshapes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,69 @@ struct find_transpose_contiguous_reshaper_unary
}
};

struct find_slice_transpose
{
auto matcher() const
{
return match::any(match::any_of[match::outputs()](
match::name("slice")(match::output(match::name("transpose")))));
}

static std::vector<int64_t> find_common_perm(const std::vector<instruction_ref>& transposes)
{
std::map<std::vector<int64_t>, int64_t> count;
for(auto t : transposes)
{
auto perm = t->get_operator().to_value()["permutation"].to_vector<int64_t>();
count[perm]++;
}
return std::max_element(
count.begin(), count.end(), by(std::less<>{}, [](auto&& p) { return p.second; }))
->first;
}

void apply(module& m, const match::matcher_result& r) const
{
auto ins = r.result;
std::vector<instruction_ref> splits;
std::copy_if(ins->outputs().begin(),
ins->outputs().end(),
std::back_inserter(splits),
[&](instruction_ref out) {
return out->name() == "slice" and out->outputs().size() == 1 and
out->outputs().front()->name() == "transpose";
});
if(splits.size() < 2)
return;
std::vector<instruction_ref> transposes;
std::transform(splits.begin(),
splits.end(),
std::back_inserter(transposes),
[](auto split) { return split->outputs().front(); });
auto perm = find_common_perm(transposes);
auto iperm = invert_permutation(perm);
auto pre = m.insert_instruction(
std::next(ins), make_op("transpose", {{"permutation", perm}}), ins);
for(auto i : range(transposes.size()))
{
auto split = splits[i];
auto t = transposes[i];
auto op = any_cast<op::slice>(split->get_operator());
std::transform(op.axes.begin(), op.axes.end(), op.axes.begin(), [&](auto axis) {
return iperm[axis];
});
auto new_ins = m.insert_instruction(t, op, pre);
if(t->get_operator() != pre->get_operator())
{
auto curr = t->get_operator().to_value()["permutation"].to_vector<int64_t>();
new_ins = m.insert_instruction(
t, make_op("transpose", {{"permutation", reorder_dims(iperm, curr)}}), new_ins);
}
m.replace_instruction(t, new_ins);
}
}
};

void simplify_reshapes::apply(module& m) const
{
for(int i = 0; i < 2; i++)
Expand All @@ -616,6 +679,7 @@ void simplify_reshapes::apply(module& m) const
find_nested_convert{},
find_nested_slice{},
find_nested_concat{},
find_slice_transpose{},
find_transpose_contiguous_reshaper_unary{});
dead_code_elimination{}.apply(m);
}
Expand Down
20 changes: 20 additions & 0 deletions test/eliminate_contiguous_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,4 +205,24 @@ TEST_CASE(contiguous_pointwise)
mm->begin(), mm->end(), [](auto&& ins) { return ins.name() == "contiguous"; }));
}

TEST_CASE(slice_contiguous)
{
migraphx::module m;

migraphx::shape s{migraphx::shape::float_type, {4, 2}};
auto x = m.add_parameter("x", s);
auto t = m.add_instruction(migraphx::make_op("transpose", {{"permutation", {1, 0}}}), x);
auto c = m.add_instruction(migraphx::make_op("contiguous"), t);
auto s1 = m.add_instruction(
migraphx::make_op("slice", {{"axes", {0}}, {"starts", {0}}, {"ends", {1}}}), c);
auto s2 = m.add_instruction(
migraphx::make_op("slice", {{"axes", {0}}, {"starts", {1}}, {"ends", {2}}}), c);
auto c1 = m.add_instruction(migraphx::make_op("contiguous"), s1);
auto c2 = m.add_instruction(migraphx::make_op("contiguous"), s2);
m.add_instruction(pass_standard_op{}, c1, c2);
run_pass(m);
EXPECT(std::count_if(
m.begin(), m.end(), [](auto&& ins) { return ins.name() == "contiguous"; }) == 1);
}

int main(int argc, const char* argv[]) { test::run(argc, argv); }
102 changes: 102 additions & 0 deletions test/simplify_reshapes_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1141,4 +1141,106 @@ TEST_CASE(transpose_contiguous_reshape_binary_broadcast)
EXPECT(m1 == m2);
}

TEST_CASE(transpose_slice)
{
migraphx::module m1;
{
auto x = m1.add_parameter("x", {migraphx::shape::float_type, {1, 384, 36, 64}});
auto slice1 = m1.add_instruction(
migraphx::make_op("slice", {{"axes", {2}}, {"starts", {0}}, {"ends", {12}}}), x);
auto transpose1 = m1.add_instruction(
migraphx::make_op("transpose", {{"permutation", {0, 2, 1, 3}}}), slice1);
auto slice2 = m1.add_instruction(
migraphx::make_op("slice", {{"axes", {2}}, {"starts", {12}}, {"ends", {24}}}), x);
auto transpose2 = m1.add_instruction(
migraphx::make_op("transpose", {{"permutation", {0, 2, 1, 3}}}), slice2);
auto slice3 = m1.add_instruction(
migraphx::make_op("slice", {{"axes", {2}}, {"starts", {24}}, {"ends", {36}}}), x);
auto transpose3 = m1.add_instruction(
migraphx::make_op("transpose", {{"permutation", {0, 2, 1, 3}}}), slice3);
m1.add_return({transpose1, transpose2, transpose3});
}
run_pass(m1);
migraphx::module m2;
{
auto x = m2.add_parameter("x", {migraphx::shape::float_type, {1, 384, 36, 64}});
auto transpose =
m2.add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 2, 1, 3}}}), x);
auto slice1 = m2.add_instruction(
migraphx::make_op("slice", {{"axes", {1}}, {"starts", {0}}, {"ends", {12}}}),
transpose);
auto slice2 = m2.add_instruction(
migraphx::make_op("slice", {{"axes", {1}}, {"starts", {12}}, {"ends", {24}}}),
transpose);
auto slice3 = m2.add_instruction(
migraphx::make_op("slice", {{"axes", {1}}, {"starts", {24}}, {"ends", {36}}}),
transpose);
m2.add_return({slice1, slice2, slice3});
}
EXPECT(m1 == m2);
}

TEST_CASE(transpose_slice_diff_perm)
{
migraphx::module m1;
{
auto x = m1.add_parameter("x", {migraphx::shape::float_type, {1, 384, 36, 64}});
auto slice1 = m1.add_instruction(
migraphx::make_op("slice", {{"axes", {2}}, {"starts", {0}}, {"ends", {12}}}), x);
auto transpose1 = m1.add_instruction(
migraphx::make_op("transpose", {{"permutation", {0, 2, 1, 3}}}), slice1);
auto slice2 = m1.add_instruction(
migraphx::make_op("slice", {{"axes", {2}}, {"starts", {12}}, {"ends", {24}}}), x);
auto transpose2 = m1.add_instruction(
migraphx::make_op("transpose", {{"permutation", {0, 2, 3, 1}}}), slice2);
auto slice3 = m1.add_instruction(
migraphx::make_op("slice", {{"axes", {2}}, {"starts", {24}}, {"ends", {36}}}), x);
auto transpose3 = m1.add_instruction(
migraphx::make_op("transpose", {{"permutation", {0, 2, 1, 3}}}), slice3);
m1.add_return({transpose1, transpose2, transpose3});
}
run_pass(m1);
migraphx::module m2;
{
auto x = m2.add_parameter("x", {migraphx::shape::float_type, {1, 384, 36, 64}});
auto transpose =
m2.add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 2, 1, 3}}}), x);
auto slice1 = m2.add_instruction(
migraphx::make_op("slice", {{"axes", {1}}, {"starts", {0}}, {"ends", {12}}}),
transpose);
auto slice2 = m2.add_instruction(
migraphx::make_op("slice", {{"axes", {1}}, {"starts", {12}}, {"ends", {24}}}),
transpose);
auto transpose2 = m2.add_instruction(
migraphx::make_op("transpose", {{"permutation", {0, 1, 3, 2}}}), slice2);
auto slice3 = m2.add_instruction(
migraphx::make_op("slice", {{"axes", {1}}, {"starts", {24}}, {"ends", {36}}}),
transpose);
m2.add_return({slice1, transpose2, slice3});
}
EXPECT(m1 == m2);
}

TEST_CASE(transpose_slice_single_transpose)
{
migraphx::module m1;
{
auto x = m1.add_parameter("x", {migraphx::shape::float_type, {1, 384, 36, 64}});
auto slice1 = m1.add_instruction(
migraphx::make_op("slice", {{"axes", {2}}, {"starts", {0}}, {"ends", {12}}}), x);
auto sqrt1 = m1.add_instruction(migraphx::make_op("sqrt"), slice1);
auto slice2 = m1.add_instruction(
migraphx::make_op("slice", {{"axes", {2}}, {"starts", {12}}, {"ends", {24}}}), x);
auto transpose = m1.add_instruction(
migraphx::make_op("transpose", {{"permutation", {0, 2, 1, 3}}}), slice2);
auto slice3 = m1.add_instruction(
migraphx::make_op("slice", {{"axes", {2}}, {"starts", {24}}, {"ends", {36}}}), x);
auto sqrt3 = m1.add_instruction(migraphx::make_op("sqrt"), slice3);
m1.add_return({sqrt1, transpose, sqrt3});
}
migraphx::module m2 = m1;
run_pass(m1);
EXPECT(m1 == m2);
}

int main(int argc, const char* argv[]) { test::run(argc, argv); }

0 comments on commit 27e980c

Please sign in to comment.