@@ -37,22 +37,23 @@ using namespace llvm;
37
37
namespace {
38
38
39
39
// Observer to apply a register bank to new registers created by LegalizerHelper.
40
- class ApplySALUMapping final : public GISelChangeObserver {
40
+ class ApplyRegBankMapping final : public GISelChangeObserver {
41
41
private:
42
42
MachineRegisterInfo &MRI;
43
+ const RegisterBank *NewBank;
43
44
SmallVector<MachineInstr *, 4 > NewInsts;
44
45
45
46
public:
46
- ApplySALUMapping (MachineRegisterInfo &MRI_)
47
- : MRI(MRI_) {}
47
+ ApplyRegBankMapping (MachineRegisterInfo &MRI_, const RegisterBank *RB )
48
+ : MRI(MRI_), NewBank(RB) {}
48
49
49
- ~ApplySALUMapping () {
50
+ ~ApplyRegBankMapping () {
50
51
for (MachineInstr *MI : NewInsts)
51
- applySALUBank (*MI);
52
+ applyBank (*MI);
52
53
}
53
54
54
55
// / Set any registers that don't have a set register class or bank to SALU.
55
- void applySALUBank (MachineInstr &MI) {
56
+ void applyBank (MachineInstr &MI) {
56
57
for (MachineOperand &Op : MI.operands ()) {
57
58
if (!Op.isReg ())
58
59
continue ;
@@ -61,10 +62,13 @@ class ApplySALUMapping final : public GISelChangeObserver {
61
62
if (MRI.getRegClassOrRegBank (Reg))
62
63
continue ;
63
64
65
+ const RegisterBank *RB = NewBank;
64
66
// FIXME: This might not be enough to detect when SCC should be used.
65
- const RegisterBank &RB = MRI.getType (Reg) == LLT::scalar (1 ) ?
66
- AMDGPU::SCCRegBank : AMDGPU::SGPRRegBank;
67
- MRI.setRegBank (Reg, RB);
67
+ if (MRI.getType (Reg) == LLT::scalar (1 ))
68
+ RB = (NewBank == &AMDGPU::SGPRRegBank ?
69
+ &AMDGPU::SCCRegBank : &AMDGPU::VCCRegBank);
70
+
71
+ MRI.setRegBank (Reg, *RB);
68
72
}
69
73
}
70
74
@@ -80,7 +84,6 @@ class ApplySALUMapping final : public GISelChangeObserver {
80
84
};
81
85
82
86
}
83
-
84
87
AMDGPURegisterBankInfo::AMDGPURegisterBankInfo (const TargetRegisterInfo &TRI)
85
88
: AMDGPUGenRegisterBankInfo(),
86
89
TRI(static_cast <const SIRegisterInfo*>(&TRI)) {
@@ -128,6 +131,12 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
128
131
unsigned AMDGPURegisterBankInfo::getBreakDownCost (
129
132
const ValueMapping &ValMapping,
130
133
const RegisterBank *CurBank) const {
134
+ // Check if this is a breakdown for G_LOAD to move the pointer from SGPR to
135
+ // VGPR.
136
+ // FIXME: Is there a better way to do this?
137
+ if (ValMapping.NumBreakDowns >= 2 || ValMapping.BreakDown [0 ].Length >= 64 )
138
+ return 10 ; // This is expensive.
139
+
131
140
assert (ValMapping.NumBreakDowns == 2 &&
132
141
ValMapping.BreakDown [0 ].Length == 32 &&
133
142
ValMapping.BreakDown [0 ].StartIdx == 0 &&
@@ -302,6 +311,14 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
302
311
}
303
312
}
304
313
314
+ static bool isInstrUniform (const MachineInstr &MI) {
315
+ if (!MI.hasOneMemOperand ())
316
+ return false ;
317
+
318
+ const MachineMemOperand *MMO = *MI.memoperands_begin ();
319
+ return AMDGPUInstrInfo::isUniformMMO (MMO);
320
+ }
321
+
305
322
RegisterBankInfo::InstructionMappings
306
323
AMDGPURegisterBankInfo::getInstrAlternativeMappings (
307
324
const MachineInstr &MI) const {
@@ -356,29 +373,29 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
356
373
}
357
374
case TargetOpcode::G_LOAD: {
358
375
unsigned Size = getSizeInBits (MI.getOperand (0 ).getReg (), MRI, *TRI);
376
+ LLT LoadTy = MRI.getType (MI.getOperand (0 ).getReg ());
359
377
// FIXME: Should we be hard coding the size for these mappings?
360
- const InstructionMapping &SSMapping = getInstructionMapping (
361
- 1 , 1 , getOperandsMapping (
362
- {AMDGPU::getValueMapping (AMDGPU::SGPRRegBankID, Size),
363
- AMDGPU::getValueMapping (AMDGPU::SGPRRegBankID, 64 )}),
364
- 2 ); // Num Operands
365
- AltMappings.push_back (&SSMapping);
378
+ if (isInstrUniform (MI)) {
379
+ const InstructionMapping &SSMapping = getInstructionMapping (
380
+ 1 , 1 , getOperandsMapping (
381
+ {AMDGPU::getValueMapping (AMDGPU::SGPRRegBankID, Size),
382
+ AMDGPU::getValueMapping (AMDGPU::SGPRRegBankID, 64 )}),
383
+ 2 ); // Num Operands
384
+ AltMappings.push_back (&SSMapping);
385
+ }
366
386
367
387
const InstructionMapping &VVMapping = getInstructionMapping (
368
388
2 , 1 , getOperandsMapping (
369
- {AMDGPU::getValueMapping (AMDGPU::VGPRRegBankID, Size ),
389
+ {AMDGPU::getValueMappingLoadSGPROnly (AMDGPU::VGPRRegBankID, LoadTy ),
370
390
AMDGPU::getValueMapping (AMDGPU::VGPRRegBankID, 64 )}),
371
391
2 ); // Num Operands
372
392
AltMappings.push_back (&VVMapping);
373
393
374
- // FIXME: Should this be the pointer-size (64-bits) or the size of the
375
- // register that will hold the bufffer resourc (128-bits).
376
- const InstructionMapping &VSMapping = getInstructionMapping (
377
- 3 , 1 , getOperandsMapping (
378
- {AMDGPU::getValueMapping (AMDGPU::VGPRRegBankID, Size),
379
- AMDGPU::getValueMapping (AMDGPU::SGPRRegBankID, 64 )}),
380
- 2 ); // Num Operands
381
- AltMappings.push_back (&VSMapping);
394
+ // It may be possible to have a vgpr = load sgpr mapping here, because
395
+ // the mubuf instructions support this kind of load, but probably for only
396
+ // gfx7 and older. However, the addressing mode matching in the instruction
397
+ // selector should be able to do a better job of detecting and selecting
398
+ // these kinds of loads from the vgpr = load vgpr mapping.
382
399
383
400
return AltMappings;
384
401
@@ -874,6 +891,91 @@ void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
874
891
MI.getOperand (OpIdx).setReg (SGPR);
875
892
}
876
893
894
+ // When regbankselect repairs registers, it will insert a repair instruction
895
+ // which defines the repaired register. Then it calls applyMapping and expects
896
+ // that the targets will either delete or rewrite the originally wrote to the
897
+ // repaired registers. Beccause of this, we end up in a situation where
898
+ // we have 2 instructions defining the same registers.
899
+ static MachineInstr *getOtherVRegDef (const MachineRegisterInfo &MRI,
900
+ Register Reg,
901
+ const MachineInstr &MI) {
902
+ // Is there some way we can assert that there are exactly 2 def instructions?
903
+ for (MachineInstr &Other : MRI.def_instructions (Reg)) {
904
+ if (&Other != &MI)
905
+ return &Other;
906
+ }
907
+
908
+ return nullptr ;
909
+ }
910
+
911
+ bool AMDGPURegisterBankInfo::applyMappingWideLoad (MachineInstr &MI,
912
+ const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
913
+ MachineRegisterInfo &MRI) const {
914
+ Register DstReg = MI.getOperand (0 ).getReg ();
915
+ const LLT LoadTy = MRI.getType (DstReg);
916
+ unsigned LoadSize = LoadTy.getSizeInBits ();
917
+ const unsigned MaxNonSmrdLoadSize = 128 ;
918
+ // 128-bit loads are supported for all instruction types.
919
+ if (LoadSize <= MaxNonSmrdLoadSize)
920
+ return false ;
921
+
922
+ SmallVector<unsigned , 16 > DefRegs (OpdMapper.getVRegs (0 ));
923
+ SmallVector<unsigned , 1 > SrcRegs (OpdMapper.getVRegs (1 ));
924
+
925
+ // If the pointer is an SGPR, we have nothing to do.
926
+ if (SrcRegs.empty ())
927
+ return false ;
928
+
929
+ assert (LoadSize % MaxNonSmrdLoadSize == 0 );
930
+
931
+ // We want to get the repair instruction now, because it will help us
932
+ // determine which instruction the legalizer inserts that will also
933
+ // write to DstReg.
934
+ MachineInstr *RepairInst = getOtherVRegDef (MRI, DstReg, MI);
935
+
936
+ // RegBankSelect only emits scalar types, so we need to reset the pointer
937
+ // operand to a pointer type.
938
+ Register BasePtrReg = SrcRegs[0 ];
939
+ LLT PtrTy = MRI.getType (MI.getOperand (1 ).getReg ());
940
+ MRI.setType (BasePtrReg, PtrTy);
941
+
942
+ MachineIRBuilder B (MI);
943
+
944
+ unsigned SplitElts =
945
+ MaxNonSmrdLoadSize / LoadTy.getScalarType ().getSizeInBits ();
946
+ const LLT LoadSplitTy = LLT::vector (SplitElts, LoadTy.getScalarType ());
947
+ ApplyRegBankMapping O (MRI, &AMDGPU::VGPRRegBank);
948
+ GISelObserverWrapper Observer (&O);
949
+ B.setChangeObserver (Observer);
950
+ LegalizerHelper Helper (B.getMF (), Observer, B);
951
+ if (Helper.fewerElementsVector (MI, 0 , LoadSplitTy) != LegalizerHelper::Legalized)
952
+ return false ;
953
+
954
+ // At this point, the legalizer has split the original load into smaller
955
+ // loads. At the end of lowering, it inserts an instruction (LegalizedInst)
956
+ // that combines the outputs of the lower loads and writes it to DstReg.
957
+ // The register bank selector has also added the RepairInst which writes to
958
+ // DstReg as well.
959
+
960
+ MachineInstr *LegalizedInst = getOtherVRegDef (MRI, DstReg, *RepairInst);
961
+
962
+ // Replace the output of the LegalizedInst with a temporary register, since
963
+ // RepairInst already defines DstReg.
964
+ Register TmpReg = MRI.createGenericVirtualRegister (MRI.getType (DstReg));
965
+ LegalizedInst->getOperand (0 ).setReg (TmpReg);
966
+ B.setInsertPt (*RepairInst->getParent (), RepairInst);
967
+
968
+ for (unsigned DefIdx = 0 , e = DefRegs.size (); DefIdx != e; ++DefIdx) {
969
+ Register IdxReg = MRI.createGenericVirtualRegister (LLT::scalar (32 ));
970
+ B.buildConstant (IdxReg, DefIdx);
971
+ MRI.setRegBank (IdxReg, getRegBank (AMDGPU::VGPRRegBankID));
972
+ B.buildExtractVectorElement (DefRegs[DefIdx], TmpReg, IdxReg);
973
+ }
974
+
975
+ MRI.setRegBank (DstReg, getRegBank (AMDGPU::VGPRRegBankID));
976
+ return true ;
977
+ }
978
+
877
979
// For cases where only a single copy is inserted for matching register banks.
878
980
// Replace the register in the instruction operand
879
981
static void substituteSimpleCopyRegs (
@@ -1008,7 +1110,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
1008
1110
// 16-bit operations are VALU only, but can be promoted to 32-bit SALU.
1009
1111
MachineFunction *MF = MI.getParent ()->getParent ();
1010
1112
MachineIRBuilder B (MI);
1011
- ApplySALUMapping ApplySALU (MRI);
1113
+ ApplyRegBankMapping ApplySALU (MRI, &AMDGPU::SGPRRegBank );
1012
1114
GISelObserverWrapper Observer (&ApplySALU);
1013
1115
LegalizerHelper Helper (*MF, Observer, B);
1014
1116
@@ -1028,7 +1130,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
1028
1130
1029
1131
MachineFunction *MF = MI.getParent ()->getParent ();
1030
1132
MachineIRBuilder B (MI);
1031
- ApplySALUMapping ApplySALU (MRI);
1133
+ ApplyRegBankMapping ApplySALU (MRI, &AMDGPU::SGPRRegBank );
1032
1134
GISelObserverWrapper Observer (&ApplySALU);
1033
1135
LegalizerHelper Helper (*MF, Observer, B);
1034
1136
@@ -1212,21 +1314,18 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
1212
1314
}
1213
1315
break ;
1214
1316
}
1317
+ case AMDGPU::G_LOAD: {
1318
+ if (applyMappingWideLoad (MI, OpdMapper, MRI))
1319
+ return ;
1320
+ break ;
1321
+ }
1215
1322
default :
1216
1323
break ;
1217
1324
}
1218
1325
1219
1326
return applyDefaultMapping (OpdMapper);
1220
1327
}
1221
1328
1222
- static bool isInstrUniform (const MachineInstr &MI) {
1223
- if (!MI.hasOneMemOperand ())
1224
- return false ;
1225
-
1226
- const MachineMemOperand *MMO = *MI.memoperands_begin ();
1227
- return AMDGPUInstrInfo::isUniformMMO (MMO);
1228
- }
1229
-
1230
1329
bool AMDGPURegisterBankInfo::isSALUMapping (const MachineInstr &MI) const {
1231
1330
const MachineFunction &MF = *MI.getParent ()->getParent ();
1232
1331
const MachineRegisterInfo &MRI = MF.getRegInfo ();
@@ -1322,6 +1421,7 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
1322
1421
const MachineRegisterInfo &MRI = MF.getRegInfo ();
1323
1422
SmallVector<const ValueMapping*, 8 > OpdsMapping (MI.getNumOperands ());
1324
1423
unsigned Size = getSizeInBits (MI.getOperand (0 ).getReg (), MRI, *TRI);
1424
+ LLT LoadTy = MRI.getType (MI.getOperand (0 ).getReg ());
1325
1425
unsigned PtrSize = getSizeInBits (MI.getOperand (1 ).getReg (), MRI, *TRI);
1326
1426
1327
1427
const ValueMapping *ValMapping;
@@ -1332,7 +1432,7 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
1332
1432
ValMapping = AMDGPU::getValueMapping (AMDGPU::SGPRRegBankID, Size);
1333
1433
PtrMapping = AMDGPU::getValueMapping (AMDGPU::SGPRRegBankID, PtrSize);
1334
1434
} else {
1335
- ValMapping = AMDGPU::getValueMapping (AMDGPU::VGPRRegBankID, Size );
1435
+ ValMapping = AMDGPU::getValueMappingLoadSGPROnly (AMDGPU::VGPRRegBankID, LoadTy );
1336
1436
// FIXME: What would happen if we used SGPRRegBankID here?
1337
1437
PtrMapping = AMDGPU::getValueMapping (AMDGPU::VGPRRegBankID, PtrSize);
1338
1438
}
0 commit comments