Skip to content

Commit 129fc8c

Browse files
author
Krzysztof Parzyszek
committed
[Hexagon] Initial instruction cost model for auto-vectorization
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@330065 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent f5fe785 commit 129fc8c

File tree

2 files changed

+195
-98
lines changed

2 files changed

+195
-98
lines changed

lib/Target/Hexagon/HexagonTargetTransformInfo.cpp

Lines changed: 160 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "HexagonTargetTransformInfo.h"
1717
#include "HexagonSubtarget.h"
1818
#include "llvm/Analysis/TargetTransformInfo.h"
19+
#include "llvm/CodeGen/ValueTypes.h"
1920
#include "llvm/IR/InstrTypes.h"
2021
#include "llvm/IR/Instructions.h"
2122
#include "llvm/IR/User.h"
@@ -27,16 +28,35 @@ using namespace llvm;
2728

2829
#define DEBUG_TYPE "hexagontti"
2930

30-
static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false),
31+
static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(true),
3132
cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
3233

3334
static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
3435
cl::init(true), cl::Hidden,
3536
cl::desc("Control lookup table emission on Hexagon target"));
3637

38+
39+
bool HexagonTTIImpl::useHVX() const {
40+
return ST.useHVXOps() && HexagonAutoHVX;
41+
}
42+
43+
bool HexagonTTIImpl::isTypeForHVX(Type *VecTy) const {
44+
assert(VecTy->isVectorTy());
45+
// Avoid types like <2 x i32*>.
46+
if (!cast<VectorType>(VecTy)->getElementType()->isIntegerTy())
47+
return false;
48+
EVT VecVT = EVT::getEVT(VecTy);
49+
if (!VecVT.isSimple() || VecVT.getSizeInBits() <= 64)
50+
return false;
51+
if (ST.isHVXVectorType(VecVT.getSimpleVT()))
52+
return true;
53+
auto Action = TLI.getPreferredVectorAction(VecVT);
54+
return Action == TargetLoweringBase::TypeWidenVector;
55+
}
56+
3757
TargetTransformInfo::PopcntSupportKind
3858
HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
39-
// Return Fast Hardware support as every input < 64 bits will be promoted
59+
// Return fast hardware support as every input < 64 bits will be promoted
4060
// to 64 bits.
4161
return TargetTransformInfo::PSK_FastHardware;
4262
}
@@ -58,53 +78,178 @@ bool HexagonTTIImpl::shouldFavorPostInc() const {
5878
return true;
5979
}
6080

81+
/// --- Vector TTI begin ---
82+
6183
unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const {
6284
if (Vector)
63-
return HexagonAutoHVX && getST()->useHVXOps() ? 32 : 0;
85+
return useHVX() ? 32 : 0;
6486
return 32;
6587
}
6688

6789
unsigned HexagonTTIImpl::getMaxInterleaveFactor(unsigned VF) {
68-
return HexagonAutoHVX && getST()->useHVXOps() ? 64 : 0;
90+
return useHVX() ? 2 : 0;
6991
}
7092

7193
unsigned HexagonTTIImpl::getRegisterBitWidth(bool Vector) const {
7294
return Vector ? getMinVectorRegisterBitWidth() : 32;
7395
}
7496

7597
unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth() const {
76-
return getST()->useHVXOps() ? getST()->getVectorLength()*8 : 0;
98+
return useHVX() ? ST.getVectorLength()*8 : 0;
7799
}
78100

79101
unsigned HexagonTTIImpl::getMinimumVF(unsigned ElemWidth) const {
80-
return (8 * getST()->getVectorLength()) / ElemWidth;
102+
return (8 * ST.getVectorLength()) / ElemWidth;
103+
}
104+
105+
unsigned HexagonTTIImpl::getScalarizationOverhead(Type *Ty, bool Insert,
106+
bool Extract) {
107+
return BaseT::getScalarizationOverhead(Ty, Insert, Extract);
108+
}
109+
110+
unsigned HexagonTTIImpl::getOperandsScalarizationOverhead(
111+
ArrayRef<const Value*> Args, unsigned VF) {
112+
return BaseT::getOperandsScalarizationOverhead(Args, VF);
113+
}
114+
115+
unsigned HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy,
116+
ArrayRef<Type*> Tys) {
117+
return BaseT::getCallInstrCost(F, RetTy, Tys);
118+
}
119+
120+
unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
121+
ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
122+
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
123+
}
124+
125+
unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
126+
ArrayRef<Type*> Tys, FastMathFlags FMF,
127+
unsigned ScalarizationCostPassed) {
128+
if (ID == Intrinsic::bswap) {
129+
std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, RetTy);
130+
return LT.first + 2;
131+
}
132+
return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
133+
ScalarizationCostPassed);
134+
}
135+
136+
unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp,
137+
ScalarEvolution *SE, const SCEV *S) {
138+
return 0;
81139
}
82140

83141
unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
84142
unsigned Alignment, unsigned AddressSpace, const Instruction *I) {
85-
if (Opcode == Instruction::Load && Src->isVectorTy()) {
143+
assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
144+
if (Opcode == Instruction::Store)
145+
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
146+
147+
if (Src->isVectorTy()) {
86148
VectorType *VecTy = cast<VectorType>(Src);
87149
unsigned VecWidth = VecTy->getBitWidth();
88-
if (VecWidth > 64) {
89-
// Assume that vectors longer than 64 bits are meant for HVX.
90-
if (getNumberOfRegisters(true) > 0) {
91-
if (VecWidth % getRegisterBitWidth(true) == 0)
92-
return 1;
93-
}
150+
if (useHVX() && isTypeForHVX(VecTy)) {
151+
unsigned RegWidth = getRegisterBitWidth(true);
152+
Alignment = std::min(Alignment, RegWidth/8);
153+
// Cost of HVX loads.
154+
if (VecWidth % RegWidth == 0)
155+
return VecWidth / RegWidth;
156+
// Cost of constructing HVX vector from scalar loads.
94157
unsigned AlignWidth = 8 * std::max(1u, Alignment);
95158
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
96159
return 3*NumLoads;
97160
}
161+
162+
// Non-HVX vectors.
163+
// Add extra cost for floating point types.
164+
unsigned Cost = VecTy->getElementType()->isFloatingPointTy() ? 4 : 1;
165+
166+
Alignment = std::min(Alignment, 8u);
167+
unsigned AlignWidth = 8 * std::max(1u, Alignment);
168+
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
169+
if (Alignment == 4 || Alignment == 8)
170+
return Cost * NumLoads;
171+
// Loads of less than 32 bits will need extra inserts to compose a vector.
172+
unsigned LogA = Log2_32(Alignment);
173+
return (3 - LogA) * Cost * NumLoads;
98174
}
175+
99176
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
100177
}
101178

179+
unsigned HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode,
180+
Type *Src, unsigned Alignment, unsigned AddressSpace) {
181+
return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
182+
}
183+
184+
unsigned HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
185+
int Index, Type *SubTp) {
186+
return 1;
187+
}
188+
189+
unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
190+
Value *Ptr, bool VariableMask, unsigned Alignment) {
191+
return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
192+
Alignment);
193+
}
194+
195+
unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode,
196+
Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
197+
unsigned Alignment, unsigned AddressSpace) {
198+
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
199+
Alignment, AddressSpace);
200+
}
201+
202+
unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
203+
Type *CondTy, const Instruction *I) {
204+
if (ValTy->isVectorTy()) {
205+
auto *VecTy = dyn_cast<VectorType>(ValTy);
206+
std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
207+
if (Opcode == Instruction::FCmp)
208+
return LT.first + 4 * VecTy->getNumElements();
209+
}
210+
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
211+
}
212+
213+
unsigned HexagonTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
214+
TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
215+
TTI::OperandValueProperties Opd1PropInfo,
216+
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value*> Args) {
217+
return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
218+
Opd1PropInfo, Opd2PropInfo, Args);
219+
}
220+
221+
unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
222+
Type *Src, const Instruction *I) {
223+
return 1;
224+
}
225+
226+
unsigned HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
227+
unsigned Index) {
228+
Type *ElemTy = Val->isVectorTy() ? cast<VectorType>(Val)->getElementType()
229+
: Val;
230+
if (Opcode == Instruction::InsertElement) {
231+
// Need two rotations for non-zero index.
232+
unsigned Cost = (Index != 0) ? 2 : 0;
233+
if (ElemTy->isIntegerTy(32))
234+
return Cost;
235+
// If it's not a 32-bit value, there will need to be an extract.
236+
return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, Index);
237+
}
238+
239+
if (Opcode == Instruction::ExtractElement)
240+
return 2;
241+
242+
return 1;
243+
}
244+
245+
/// --- Vector TTI end ---
246+
102247
unsigned HexagonTTIImpl::getPrefetchDistance() const {
103-
return getST()->getL1PrefetchDistance();
248+
return ST.getL1PrefetchDistance();
104249
}
105250

106251
unsigned HexagonTTIImpl::getCacheLineSize() const {
107-
return getST()->getL1CacheLineSize();
252+
return ST.getL1CacheLineSize();
108253
}
109254

110255
int HexagonTTIImpl::getUserCost(const User *U,

0 commit comments

Comments
 (0)