Skip to content

Commit ebb2f49

Browse files
committed
AMDGPU/GlobalISel: Select G_BRCOND for vcc
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364795 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent ce3ccf7 commit ebb2f49

File tree

3 files changed

+80
-36
lines changed

3 files changed

+80
-36
lines changed

lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 41 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,9 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector(
5959

6060
const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
6161

62-
static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) {
63-
assert(!TargetRegisterInfo::isPhysicalRegister(Reg));
62+
static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
63+
if (TargetRegisterInfo::isPhysicalRegister(Reg))
64+
return Reg == AMDGPU::SCC;
6465

6566
auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
6667
const TargetRegisterClass *RC =
@@ -76,15 +77,16 @@ static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) {
7677
return RB->getID() == AMDGPU::SCCRegBankID;
7778
}
7879

79-
static bool isVCC(unsigned Reg, const MachineRegisterInfo &MRI,
80-
const SIRegisterInfo &TRI) {
81-
assert(!TargetRegisterInfo::isPhysicalRegister(Reg));
80+
bool AMDGPUInstructionSelector::isVCC(Register Reg,
81+
const MachineRegisterInfo &MRI) const {
82+
if (TargetRegisterInfo::isPhysicalRegister(Reg))
83+
return Reg == TRI.getVCC();
8284

8385
auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
8486
const TargetRegisterClass *RC =
8587
RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
8688
if (RC) {
87-
return RC == TRI.getWaveMaskRegClass() &&
89+
return RC->hasSuperClassEq(TRI.getBoolRC()) &&
8890
MRI.getType(Reg).getSizeInBits() == 1;
8991
}
9092

@@ -106,7 +108,7 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
106108
unsigned DstReg = I.getOperand(0).getReg();
107109

108110
// Specially handle scc->vcc copies.
109-
if (isVCC(DstReg, MRI, TRI)) {
111+
if (isVCC(DstReg, MRI)) {
110112
const DebugLoc &DL = I.getDebugLoc();
111113
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
112114
.addImm(0)
@@ -991,27 +993,41 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
991993
Register CondReg = CondOp.getReg();
992994
const DebugLoc &DL = I.getDebugLoc();
993995

996+
unsigned BrOpcode;
997+
Register CondPhysReg;
998+
const TargetRegisterClass *ConstrainRC;
999+
1000+
// In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1001+
// whether the branch is uniform when selecting the instruction. In
1002+
// GlobalISel, we should push that decision into RegBankSelect. Assume for now
1003+
// RegBankSelect knows what it's doing if the branch condition is scc, even
1004+
// though it currently does not.
9941005
if (isSCC(CondReg, MRI)) {
995-
// In SelectionDAG, we inspect the IR block for uniformity metadata to decide
996-
// whether the branch is uniform when selecting the instruction. In
997-
// GlobalISel, we should push that decision into RegBankSelect. Assume for now
998-
// RegBankSelect knows what it's doing if the branch condition is scc, even
999-
// though it currently does not.
1000-
BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
1001-
.addReg(CondReg);
1002-
if (!MRI.getRegClassOrNull(CondReg)) {
1003-
const TargetRegisterClass *RC
1004-
= TRI.getConstrainedRegClassForOperand(CondOp, MRI);
1005-
MRI.setRegClass(CondReg, RC);
1006-
}
1006+
CondPhysReg = AMDGPU::SCC;
1007+
BrOpcode = AMDGPU::S_CBRANCH_SCC1;
1008+
ConstrainRC = &AMDGPU::SReg_32_XM0RegClass;
1009+
} else if (isVCC(CondReg, MRI)) {
1010+
// FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1011+
// We sort of know that a VCC producer based on the register bank, that ands
1012+
// inactive lanes with 0. What if there was a logical operation with vcc
1013+
// producers in different blocks/with different exec masks?
1014+
// FIXME: Should scc->vcc copies and with exec?
1015+
CondPhysReg = TRI.getVCC();
1016+
BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
1017+
ConstrainRC = TRI.getBoolRC();
1018+
} else
1019+
return false;
10071020

1008-
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_CBRANCH_SCC1))
1009-
.addMBB(I.getOperand(1).getMBB());
1010-
I.eraseFromParent();
1011-
return true;
1012-
}
1021+
if (!MRI.getRegClassOrNull(CondReg))
1022+
MRI.setRegClass(CondReg, ConstrainRC);
10131023

1014-
return false;
1024+
BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
1025+
.addReg(CondReg);
1026+
BuildMI(*BB, &I, DL, TII.get(BrOpcode))
1027+
.addMBB(I.getOperand(1).getMBB());
1028+
1029+
I.eraseFromParent();
1030+
return true;
10151031
}
10161032

10171033
bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {

lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "AMDGPUArgumentUsageInfo.h"
1818
#include "llvm/ADT/ArrayRef.h"
1919
#include "llvm/ADT/SmallVector.h"
20+
#include "llvm/CodeGen/Register.h"
2021
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
2122
#include "llvm/IR/InstrTypes.h"
2223

@@ -59,6 +60,8 @@ class AMDGPUInstructionSelector : public InstructionSelector {
5960
};
6061

6162
bool isInstrUniform(const MachineInstr &MI) const;
63+
bool isVCC(Register Reg, const MachineRegisterInfo &MRI) const;
64+
6265
/// tblgen-erated 'select' implementation.
6366
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
6467

test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
# ERR-NOT: remark:
66
# ERR: remark: <unknown>:0:0: cannot select: G_BRCOND %1:sgpr(s1), %bb.1 (in function: brcond_sgpr)
7-
# ERR-NEXT: remark: <unknown>:0:0: cannot select: G_BRCOND %2:vcc(s1), %bb.1 (in function: brcond_vcc)
7+
# ERR-NEXT: remark: <unknown>:0:0: cannot select: G_BRCOND %1:vgpr(s1), %bb.1 (in function: brcond_vgpr)
88
# ERR-NOT: remark:
99

1010
---
@@ -117,6 +117,34 @@ body: |
117117
118118
...
119119

120+
---
121+
122+
name: brcond_vcc
123+
legalized: true
124+
regBankSelected: true
125+
126+
body: |
127+
; GCN-LABEL: name: brcond_vcc
128+
; GCN: bb.0:
129+
; GCN: successors: %bb.1(0x80000000)
130+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
131+
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
132+
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
133+
; GCN: $vcc = COPY [[V_CMP_EQ_U32_e64_]]
134+
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
135+
; GCN: bb.1:
136+
bb.0:
137+
liveins: $vgpr0, $vgpr1
138+
139+
%0:vgpr(s32) = COPY $vgpr0
140+
%1:vgpr(s32) = COPY $vgpr1
141+
%2:vcc(s1) = G_ICMP intpred(eq), %0, %1
142+
G_BRCOND %2, %bb.1
143+
144+
bb.1:
145+
146+
...
147+
120148
# Don't try to select this.
121149
---
122150

@@ -143,30 +171,27 @@ body: |
143171
144172
...
145173

146-
147-
# Don't try to select this for now.
174+
# Don't try to select this.
148175
---
149176

150-
name: brcond_vcc
177+
name: brcond_vgpr
151178
legalized: true
152179
regBankSelected: true
153180

154181
body: |
155-
; GCN-LABEL: name: brcond_vcc
182+
; GCN-LABEL: name: brcond_vgpr
156183
; GCN: bb.0:
157184
; GCN: successors: %bb.1(0x80000000)
158185
; GCN: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
159-
; GCN: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
160-
; GCN: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
161-
; GCN: G_BRCOND [[ICMP]](s1), %bb.1
186+
; GCN: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
187+
; GCN: G_BRCOND [[TRUNC]](s1), %bb.1
162188
; GCN: bb.1:
163189
bb.0:
164190
liveins: $vgpr0, $vgpr1
165191
166192
%0:vgpr(s32) = COPY $vgpr0
167-
%1:vgpr(s32) = COPY $vgpr1
168-
%2:vcc(s1) = G_ICMP intpred(eq), %0, %1
169-
G_BRCOND %2, %bb.1
193+
%1:vgpr(s1) = G_TRUNC %0
194+
G_BRCOND %1, %bb.1
170195
171196
bb.1:
172197

0 commit comments

Comments
 (0)