Skip to content

Commit 4467b4c

Browse files
author
Simon Dardis
committed
[mips] Materialize constants for multiplication
Previously, the MIPS backend would alwyas break down constant multiplications into a series of shifts, adds, and subs. This patch changes that so the cost of doing so is estimated. The cost is estimated against worst case constant materialization and retrieving the results from the HI/LO registers. For cases where the value type of the multiplication is not legal, the cost of legalization is estimated and is accounted for before performing the optimization of breaking down the constant This resolves PR36884. Thanks to npl for reporting the issue! Reviewers: abeserminji, smaksimovic Differential Revision: https://reviews.llvm.org/D45316 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@330037 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent cef8ffa commit 4467b4c

File tree

2 files changed

+140
-226
lines changed

2 files changed

+140
-226
lines changed

lib/Target/Mips/MipsSEISelLowering.cpp

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,77 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
705705
return SDValue();
706706
}
707707

708+
static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
709+
SelectionDAG &DAG,
710+
const MipsSubtarget &Subtarget) {
711+
// Estimate the number of operations the below transform will turn a
712+
// constant multiply into. The number is approximately how many powers
713+
// of two summed together that the constant can be broken down into.
714+
715+
SmallVector<APInt, 16> WorkStack(1, C);
716+
unsigned Steps = 0;
717+
unsigned BitWidth = C.getBitWidth();
718+
719+
while (!WorkStack.empty()) {
720+
APInt Val = WorkStack.pop_back_val();
721+
722+
if (Val == 0 || Val == 1)
723+
continue;
724+
725+
if (Val.isPowerOf2()) {
726+
++Steps;
727+
continue;
728+
}
729+
730+
APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
731+
APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
732+
: APInt(BitWidth, 1) << C.ceilLogBase2();
733+
734+
if ((Val - Floor).ule(Ceil - Val)) {
735+
WorkStack.push_back(Floor);
736+
WorkStack.push_back(Val - Floor);
737+
++Steps;
738+
continue;
739+
}
740+
741+
WorkStack.push_back(Ceil);
742+
WorkStack.push_back(Ceil - Val);
743+
++Steps;
744+
745+
// If we have taken more than 12[1] / 8[2] steps to attempt the
746+
// optimization for a native sized value, it is more than likely that this
747+
// optimization will make things worse.
748+
//
749+
// [1] MIPS64 requires 6 instructions at most to materialize any constant,
750+
// multiplication requires at least 4 cycles, but another cycle (or two)
751+
// to retrieve the result from the HI/LO registers.
752+
//
753+
// [2] For MIPS32, more than 8 steps is expensive as the constant could be
754+
// materialized in 2 instructions, multiplication requires at least 4
755+
// cycles, but another cycle (or two) to retrieve the result from the
756+
// HI/LO registers.
757+
758+
if (Steps > 12 && (Subtarget.isABI_N32() || Subtarget.isABI_N64()))
759+
return false;
760+
761+
if (Steps > 8 && Subtarget.isABI_O32())
762+
return false;
763+
}
764+
765+
// If the value being multiplied is not supported natively, we have to pay
766+
// an additional legalization cost, conservatively assume an increase in the
767+
// cost of 3 instructions per step. This values for this heuristic were
768+
// determined experimentally.
769+
unsigned RegisterSize = DAG.getTargetLoweringInfo()
770+
.getRegisterType(*DAG.getContext(), VT)
771+
.getSizeInBits();
772+
Steps *= (VT.getSizeInBits() != RegisterSize) * 3;
773+
if (Steps > 27)
774+
return false;
775+
776+
return true;
777+
}
778+
708779
static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT,
709780
EVT ShiftTy, SelectionDAG &DAG) {
710781
// Return 0.
@@ -743,11 +814,13 @@ static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT,
743814

744815
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
745816
const TargetLowering::DAGCombinerInfo &DCI,
746-
const MipsSETargetLowering *TL) {
817+
const MipsSETargetLowering *TL,
818+
const MipsSubtarget &Subtarget) {
747819
EVT VT = N->getValueType(0);
748820

749821
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
750-
if (!VT.isVector())
822+
if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs(
823+
C->getAPIntValue(), VT, DAG, Subtarget))
751824
return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT,
752825
TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT),
753826
DAG);
@@ -948,7 +1021,7 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
9481021
Val = performORCombine(N, DAG, DCI, Subtarget);
9491022
break;
9501023
case ISD::MUL:
951-
return performMULCombine(N, DAG, DCI, this);
1024+
return performMULCombine(N, DAG, DCI, this, Subtarget);
9521025
case ISD::SHL:
9531026
Val = performSHLCombine(N, DAG, DCI, Subtarget);
9541027
break;

0 commit comments

Comments
 (0)