@@ -59,8 +59,9 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector(
59
59
60
60
const char *AMDGPUInstructionSelector::getName () { return DEBUG_TYPE; }
61
61
62
- static bool isSCC (unsigned Reg, const MachineRegisterInfo &MRI) {
63
- assert (!TargetRegisterInfo::isPhysicalRegister (Reg));
62
+ static bool isSCC (Register Reg, const MachineRegisterInfo &MRI) {
63
+ if (TargetRegisterInfo::isPhysicalRegister (Reg))
64
+ return Reg == AMDGPU::SCC;
64
65
65
66
auto &RegClassOrBank = MRI.getRegClassOrRegBank (Reg);
66
67
const TargetRegisterClass *RC =
@@ -76,15 +77,16 @@ static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) {
76
77
return RB->getID () == AMDGPU::SCCRegBankID;
77
78
}
78
79
79
- static bool isVCC (unsigned Reg, const MachineRegisterInfo &MRI,
80
- const SIRegisterInfo &TRI) {
81
- assert (!TargetRegisterInfo::isPhysicalRegister (Reg));
80
+ bool AMDGPUInstructionSelector::isVCC (Register Reg,
81
+ const MachineRegisterInfo &MRI) const {
82
+ if (TargetRegisterInfo::isPhysicalRegister (Reg))
83
+ return Reg == TRI.getVCC ();
82
84
83
85
auto &RegClassOrBank = MRI.getRegClassOrRegBank (Reg);
84
86
const TargetRegisterClass *RC =
85
87
RegClassOrBank.dyn_cast <const TargetRegisterClass*>();
86
88
if (RC) {
87
- return RC == TRI.getWaveMaskRegClass ( ) &&
89
+ return RC-> hasSuperClassEq ( TRI.getBoolRC () ) &&
88
90
MRI.getType (Reg).getSizeInBits () == 1 ;
89
91
}
90
92
@@ -106,7 +108,7 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
106
108
unsigned DstReg = I.getOperand (0 ).getReg ();
107
109
108
110
// Specially handle scc->vcc copies.
109
- if (isVCC (DstReg, MRI, TRI )) {
111
+ if (isVCC (DstReg, MRI)) {
110
112
const DebugLoc &DL = I.getDebugLoc ();
111
113
BuildMI (*BB, &I, DL, TII.get (AMDGPU::V_CMP_NE_U32_e64), DstReg)
112
114
.addImm (0 )
@@ -991,27 +993,41 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
991
993
Register CondReg = CondOp.getReg ();
992
994
const DebugLoc &DL = I.getDebugLoc ();
993
995
996
+ unsigned BrOpcode;
997
+ Register CondPhysReg;
998
+ const TargetRegisterClass *ConstrainRC;
999
+
1000
+ // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1001
+ // whether the branch is uniform when selecting the instruction. In
1002
+ // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1003
+ // RegBankSelect knows what it's doing if the branch condition is scc, even
1004
+ // though it currently does not.
994
1005
if (isSCC (CondReg, MRI)) {
995
- // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
996
- // whether the branch is uniform when selecting the instruction. In
997
- // GlobalISel, we should push that decision into RegBankSelect. Assume for now
998
- // RegBankSelect knows what it's doing if the branch condition is scc, even
999
- // though it currently does not.
1000
- BuildMI (*BB, &I, DL, TII.get (AMDGPU::COPY), AMDGPU::SCC)
1001
- .addReg (CondReg);
1002
- if (!MRI.getRegClassOrNull (CondReg)) {
1003
- const TargetRegisterClass *RC
1004
- = TRI.getConstrainedRegClassForOperand (CondOp, MRI);
1005
- MRI.setRegClass (CondReg, RC);
1006
- }
1006
+ CondPhysReg = AMDGPU::SCC;
1007
+ BrOpcode = AMDGPU::S_CBRANCH_SCC1;
1008
+ ConstrainRC = &AMDGPU::SReg_32_XM0RegClass;
1009
+ } else if (isVCC (CondReg, MRI)) {
1010
+ // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1011
+ // We sort of know that a VCC producer based on the register bank, that ands
1012
+ // inactive lanes with 0. What if there was a logical operation with vcc
1013
+ // producers in different blocks/with different exec masks?
1014
+ // FIXME: Should scc->vcc copies and with exec?
1015
+ CondPhysReg = TRI.getVCC ();
1016
+ BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
1017
+ ConstrainRC = TRI.getBoolRC ();
1018
+ } else
1019
+ return false ;
1007
1020
1008
- BuildMI (*BB, &I, DL, TII.get (AMDGPU::S_CBRANCH_SCC1))
1009
- .addMBB (I.getOperand (1 ).getMBB ());
1010
- I.eraseFromParent ();
1011
- return true ;
1012
- }
1021
+ if (!MRI.getRegClassOrNull (CondReg))
1022
+ MRI.setRegClass (CondReg, ConstrainRC);
1013
1023
1014
- return false ;
1024
+ BuildMI (*BB, &I, DL, TII.get (AMDGPU::COPY), CondPhysReg)
1025
+ .addReg (CondReg);
1026
+ BuildMI (*BB, &I, DL, TII.get (BrOpcode))
1027
+ .addMBB (I.getOperand (1 ).getMBB ());
1028
+
1029
+ I.eraseFromParent ();
1030
+ return true ;
1015
1031
}
1016
1032
1017
1033
bool AMDGPUInstructionSelector::selectG_FRAME_INDEX (MachineInstr &I) const {
0 commit comments