16
16
#include " HexagonTargetTransformInfo.h"
17
17
#include " HexagonSubtarget.h"
18
18
#include " llvm/Analysis/TargetTransformInfo.h"
19
+ #include " llvm/CodeGen/ValueTypes.h"
19
20
#include " llvm/IR/InstrTypes.h"
20
21
#include " llvm/IR/Instructions.h"
21
22
#include " llvm/IR/User.h"
@@ -27,16 +28,35 @@ using namespace llvm;
27
28
28
29
#define DEBUG_TYPE " hexagontti"
29
30
30
- static cl::opt<bool > HexagonAutoHVX (" hexagon-autohvx" , cl::init(false ),
31
+ static cl::opt<bool > HexagonAutoHVX (" hexagon-autohvx" , cl::init(true ),
31
32
cl::Hidden, cl::desc(" Enable loop vectorizer for HVX" ));
32
33
33
34
static cl::opt<bool > EmitLookupTables (" hexagon-emit-lookup-tables" ,
34
35
cl::init (true ), cl::Hidden,
35
36
cl::desc(" Control lookup table emission on Hexagon target" ));
36
37
38
+
39
+ bool HexagonTTIImpl::useHVX () const {
40
+ return ST.useHVXOps () && HexagonAutoHVX;
41
+ }
42
+
43
+ bool HexagonTTIImpl::isTypeForHVX (Type *VecTy) const {
44
+ assert (VecTy->isVectorTy ());
45
+ // Avoid types like <2 x i32*>.
46
+ if (!cast<VectorType>(VecTy)->getElementType ()->isIntegerTy ())
47
+ return false ;
48
+ EVT VecVT = EVT::getEVT (VecTy);
49
+ if (!VecVT.isSimple () || VecVT.getSizeInBits () <= 64 )
50
+ return false ;
51
+ if (ST.isHVXVectorType (VecVT.getSimpleVT ()))
52
+ return true ;
53
+ auto Action = TLI.getPreferredVectorAction (VecVT);
54
+ return Action == TargetLoweringBase::TypeWidenVector;
55
+ }
56
+
37
57
TargetTransformInfo::PopcntSupportKind
38
58
HexagonTTIImpl::getPopcntSupport (unsigned IntTyWidthInBit) const {
39
- // Return Fast Hardware support as every input < 64 bits will be promoted
59
+ // Return fast hardware support as every input < 64 bits will be promoted
40
60
// to 64 bits.
41
61
return TargetTransformInfo::PSK_FastHardware;
42
62
}
@@ -58,53 +78,178 @@ bool HexagonTTIImpl::shouldFavorPostInc() const {
58
78
return true ;
59
79
}
60
80
81
+ // / --- Vector TTI begin ---
82
+
61
83
unsigned HexagonTTIImpl::getNumberOfRegisters (bool Vector) const {
62
84
if (Vector)
63
- return HexagonAutoHVX && getST ()-> useHVXOps () ? 32 : 0 ;
85
+ return useHVX () ? 32 : 0 ;
64
86
return 32 ;
65
87
}
66
88
67
89
unsigned HexagonTTIImpl::getMaxInterleaveFactor (unsigned VF) {
68
- return HexagonAutoHVX && getST ()-> useHVXOps () ? 64 : 0 ;
90
+ return useHVX () ? 2 : 0 ;
69
91
}
70
92
71
93
unsigned HexagonTTIImpl::getRegisterBitWidth (bool Vector) const {
72
94
return Vector ? getMinVectorRegisterBitWidth () : 32 ;
73
95
}
74
96
75
97
unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth () const {
76
- return getST ()-> useHVXOps () ? getST ()-> getVectorLength ()*8 : 0 ;
98
+ return useHVX () ? ST. getVectorLength ()*8 : 0 ;
77
99
}
78
100
79
101
unsigned HexagonTTIImpl::getMinimumVF (unsigned ElemWidth) const {
80
- return (8 * getST ()->getVectorLength ()) / ElemWidth;
102
+ return (8 * ST.getVectorLength ()) / ElemWidth;
103
+ }
104
+
105
+ unsigned HexagonTTIImpl::getScalarizationOverhead (Type *Ty, bool Insert,
106
+ bool Extract) {
107
+ return BaseT::getScalarizationOverhead (Ty, Insert, Extract);
108
+ }
109
+
110
+ unsigned HexagonTTIImpl::getOperandsScalarizationOverhead (
111
+ ArrayRef<const Value*> Args, unsigned VF) {
112
+ return BaseT::getOperandsScalarizationOverhead (Args, VF);
113
+ }
114
+
115
+ unsigned HexagonTTIImpl::getCallInstrCost (Function *F, Type *RetTy,
116
+ ArrayRef<Type*> Tys) {
117
+ return BaseT::getCallInstrCost (F, RetTy, Tys);
118
+ }
119
+
120
+ unsigned HexagonTTIImpl::getIntrinsicInstrCost (Intrinsic::ID ID, Type *RetTy,
121
+ ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
122
+ return BaseT::getIntrinsicInstrCost (ID, RetTy, Args, FMF, VF);
123
+ }
124
+
125
+ unsigned HexagonTTIImpl::getIntrinsicInstrCost (Intrinsic::ID ID, Type *RetTy,
126
+ ArrayRef<Type*> Tys, FastMathFlags FMF,
127
+ unsigned ScalarizationCostPassed) {
128
+ if (ID == Intrinsic::bswap) {
129
+ std::pair<int , MVT> LT = TLI.getTypeLegalizationCost (DL, RetTy);
130
+ return LT.first + 2 ;
131
+ }
132
+ return BaseT::getIntrinsicInstrCost (ID, RetTy, Tys, FMF,
133
+ ScalarizationCostPassed);
134
+ }
135
+
136
+ unsigned HexagonTTIImpl::getAddressComputationCost (Type *Tp,
137
+ ScalarEvolution *SE, const SCEV *S) {
138
+ return 0 ;
81
139
}
82
140
83
141
unsigned HexagonTTIImpl::getMemoryOpCost (unsigned Opcode, Type *Src,
84
142
unsigned Alignment, unsigned AddressSpace, const Instruction *I) {
85
- if (Opcode == Instruction::Load && Src->isVectorTy ()) {
143
+ assert (Opcode == Instruction::Load || Opcode == Instruction::Store);
144
+ if (Opcode == Instruction::Store)
145
+ return BaseT::getMemoryOpCost (Opcode, Src, Alignment, AddressSpace, I);
146
+
147
+ if (Src->isVectorTy ()) {
86
148
VectorType *VecTy = cast<VectorType>(Src);
87
149
unsigned VecWidth = VecTy->getBitWidth ();
88
- if (VecWidth > 64 ) {
89
- // Assume that vectors longer than 64 bits are meant for HVX.
90
- if (getNumberOfRegisters (true ) > 0 ) {
91
- if (VecWidth % getRegisterBitWidth (true ) == 0 )
92
- return 1 ;
93
- }
150
+ if (useHVX () && isTypeForHVX (VecTy)) {
151
+ unsigned RegWidth = getRegisterBitWidth (true );
152
+ Alignment = std::min (Alignment, RegWidth/8 );
153
+ // Cost of HVX loads.
154
+ if (VecWidth % RegWidth == 0 )
155
+ return VecWidth / RegWidth;
156
+ // Cost of constructing HVX vector from scalar loads.
94
157
unsigned AlignWidth = 8 * std::max (1u , Alignment);
95
158
unsigned NumLoads = alignTo (VecWidth, AlignWidth) / AlignWidth;
96
159
return 3 *NumLoads;
97
160
}
161
+
162
+ // Non-HVX vectors.
163
+ // Add extra cost for floating point types.
164
+ unsigned Cost = VecTy->getElementType ()->isFloatingPointTy () ? 4 : 1 ;
165
+
166
+ Alignment = std::min (Alignment, 8u );
167
+ unsigned AlignWidth = 8 * std::max (1u , Alignment);
168
+ unsigned NumLoads = alignTo (VecWidth, AlignWidth) / AlignWidth;
169
+ if (Alignment == 4 || Alignment == 8 )
170
+ return Cost * NumLoads;
171
+ // Loads of less than 32 bits will need extra inserts to compose a vector.
172
+ unsigned LogA = Log2_32 (Alignment);
173
+ return (3 - LogA) * Cost * NumLoads;
98
174
}
175
+
99
176
return BaseT::getMemoryOpCost (Opcode, Src, Alignment, AddressSpace, I);
100
177
}
101
178
179
+ unsigned HexagonTTIImpl::getMaskedMemoryOpCost (unsigned Opcode,
180
+ Type *Src, unsigned Alignment, unsigned AddressSpace) {
181
+ return BaseT::getMaskedMemoryOpCost (Opcode, Src, Alignment, AddressSpace);
182
+ }
183
+
184
+ unsigned HexagonTTIImpl::getShuffleCost (TTI::ShuffleKind Kind, Type *Tp,
185
+ int Index, Type *SubTp) {
186
+ return 1 ;
187
+ }
188
+
189
+ unsigned HexagonTTIImpl::getGatherScatterOpCost (unsigned Opcode, Type *DataTy,
190
+ Value *Ptr, bool VariableMask, unsigned Alignment) {
191
+ return BaseT::getGatherScatterOpCost (Opcode, DataTy, Ptr, VariableMask,
192
+ Alignment);
193
+ }
194
+
195
+ unsigned HexagonTTIImpl::getInterleavedMemoryOpCost (unsigned Opcode,
196
+ Type *VecTy, unsigned Factor, ArrayRef<unsigned > Indices,
197
+ unsigned Alignment, unsigned AddressSpace) {
198
+ return BaseT::getInterleavedMemoryOpCost (Opcode, VecTy, Factor, Indices,
199
+ Alignment, AddressSpace);
200
+ }
201
+
202
+ unsigned HexagonTTIImpl::getCmpSelInstrCost (unsigned Opcode, Type *ValTy,
203
+ Type *CondTy, const Instruction *I) {
204
+ if (ValTy->isVectorTy ()) {
205
+ auto *VecTy = dyn_cast<VectorType>(ValTy);
206
+ std::pair<int , MVT> LT = TLI.getTypeLegalizationCost (DL, ValTy);
207
+ if (Opcode == Instruction::FCmp)
208
+ return LT.first + 4 * VecTy->getNumElements ();
209
+ }
210
+ return BaseT::getCmpSelInstrCost (Opcode, ValTy, CondTy, I);
211
+ }
212
+
213
+ unsigned HexagonTTIImpl::getArithmeticInstrCost (unsigned Opcode, Type *Ty,
214
+ TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
215
+ TTI::OperandValueProperties Opd1PropInfo,
216
+ TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value*> Args) {
217
+ return BaseT::getArithmeticInstrCost (Opcode, Ty, Opd1Info, Opd2Info,
218
+ Opd1PropInfo, Opd2PropInfo, Args);
219
+ }
220
+
221
+ unsigned HexagonTTIImpl::getCastInstrCost (unsigned Opcode, Type *Dst,
222
+ Type *Src, const Instruction *I) {
223
+ return 1 ;
224
+ }
225
+
226
+ unsigned HexagonTTIImpl::getVectorInstrCost (unsigned Opcode, Type *Val,
227
+ unsigned Index) {
228
+ Type *ElemTy = Val->isVectorTy () ? cast<VectorType>(Val)->getElementType ()
229
+ : Val;
230
+ if (Opcode == Instruction::InsertElement) {
231
+ // Need two rotations for non-zero index.
232
+ unsigned Cost = (Index != 0 ) ? 2 : 0 ;
233
+ if (ElemTy->isIntegerTy (32 ))
234
+ return Cost;
235
+ // If it's not a 32-bit value, there will need to be an extract.
236
+ return Cost + getVectorInstrCost (Instruction::ExtractElement, Val, Index);
237
+ }
238
+
239
+ if (Opcode == Instruction::ExtractElement)
240
+ return 2 ;
241
+
242
+ return 1 ;
243
+ }
244
+
245
+ // / --- Vector TTI end ---
246
+
102
247
unsigned HexagonTTIImpl::getPrefetchDistance () const {
103
- return getST ()-> getL1PrefetchDistance ();
248
+ return ST. getL1PrefetchDistance ();
104
249
}
105
250
106
251
unsigned HexagonTTIImpl::getCacheLineSize () const {
107
- return getST ()-> getL1CacheLineSize ();
252
+ return ST. getL1CacheLineSize ();
108
253
}
109
254
110
255
int HexagonTTIImpl::getUserCost (const User *U,
0 commit comments