Skip to content

Commit 2d928b8

Browse files
committed
AMDGPU/GlobalISel: Implement select for G_ICMP and G_SELECT
Reviewers: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, hiraditya, Petar.Avramovic, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D60640 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363576 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 1cb383e commit 2d928b8

File tree

5 files changed

+559
-5
lines changed

5 files changed

+559
-5
lines changed

lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,52 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector(
5959

6060
const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
6161

62+
static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) {
63+
if (Reg == AMDGPU::SCC)
64+
return true;
65+
66+
if (TargetRegisterInfo::isPhysicalRegister(Reg))
67+
return false;
68+
69+
auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
70+
const TargetRegisterClass *RC =
71+
RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
72+
if (RC)
73+
return RC->getID() == AMDGPU::SReg_32_XM0RegClassID &&
74+
MRI.getType(Reg).getSizeInBits() == 1;
75+
76+
const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
77+
return RB->getID() == AMDGPU::SCCRegBankID;
78+
}
79+
6280
bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
6381
MachineBasicBlock *BB = I.getParent();
6482
MachineFunction *MF = BB->getParent();
6583
MachineRegisterInfo &MRI = MF->getRegInfo();
6684
I.setDesc(TII.get(TargetOpcode::COPY));
85+
86+
// Special case for COPY from the scc register bank. The scc register bank
87+
// is modeled using 32-bit sgprs.
88+
const MachineOperand &Src = I.getOperand(1);
89+
unsigned SrcReg = Src.getReg();
90+
if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) {
91+
unsigned DstReg = TRI.getRegSizeInBits(I.getOperand(0).getReg(), MRI);
92+
unsigned DstSize = TRI.getRegSizeInBits(DstReg, MRI);
93+
94+
// We have a copy from a 32-bit to 64-bit register. This happens
95+
// when we are selecting scc->vcc copies.
96+
if (DstSize == 64) {
97+
const DebugLoc &DL = I.getDebugLoc();
98+
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), I.getOperand(0).getReg())
99+
.addImm(0)
100+
.addReg(SrcReg);
101+
if (!MRI.getRegClassOrNull(SrcReg))
102+
MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
103+
I.eraseFromParent();
104+
return true;
105+
}
106+
}
107+
67108
for (const MachineOperand &MO : I.operands()) {
68109
if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
69110
continue;
@@ -262,6 +303,101 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I,
262303
return false;
263304
}
264305

306+
static unsigned getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
307+
assert(Size == 32 || Size == 64);
308+
switch (P) {
309+
default:
310+
llvm_unreachable("Unknown condition code!");
311+
case CmpInst::ICMP_NE:
312+
return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
313+
case CmpInst::ICMP_EQ:
314+
return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
315+
case CmpInst::ICMP_SGT:
316+
return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
317+
case CmpInst::ICMP_SGE:
318+
return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
319+
case CmpInst::ICMP_SLT:
320+
return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
321+
case CmpInst::ICMP_SLE:
322+
return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
323+
case CmpInst::ICMP_UGT:
324+
return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
325+
case CmpInst::ICMP_UGE:
326+
return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
327+
case CmpInst::ICMP_ULT:
328+
return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
329+
case CmpInst::ICMP_ULE:
330+
return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
331+
}
332+
}
333+
334+
static unsigned getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
335+
// FIXME: VI supports 64-bit comparse.
336+
assert(Size == 32);
337+
switch (P) {
338+
default:
339+
llvm_unreachable("Unknown condition code!");
340+
case CmpInst::ICMP_NE:
341+
return AMDGPU::S_CMP_LG_U32;
342+
case CmpInst::ICMP_EQ:
343+
return AMDGPU::S_CMP_EQ_U32;
344+
case CmpInst::ICMP_SGT:
345+
return AMDGPU::S_CMP_GT_I32;
346+
case CmpInst::ICMP_SGE:
347+
return AMDGPU::S_CMP_GE_I32;
348+
case CmpInst::ICMP_SLT:
349+
return AMDGPU::S_CMP_LT_I32;
350+
case CmpInst::ICMP_SLE:
351+
return AMDGPU::S_CMP_LE_I32;
352+
case CmpInst::ICMP_UGT:
353+
return AMDGPU::S_CMP_GT_U32;
354+
case CmpInst::ICMP_UGE:
355+
return AMDGPU::S_CMP_GE_U32;
356+
case CmpInst::ICMP_ULT:
357+
return AMDGPU::S_CMP_LT_U32;
358+
case CmpInst::ICMP_ULE:
359+
return AMDGPU::S_CMP_LE_U32;
360+
}
361+
}
362+
363+
bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
364+
MachineBasicBlock *BB = I.getParent();
365+
MachineFunction *MF = BB->getParent();
366+
MachineRegisterInfo &MRI = MF->getRegInfo();
367+
DebugLoc DL = I.getDebugLoc();
368+
369+
unsigned SrcReg = I.getOperand(2).getReg();
370+
unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
371+
// FIXME: VI supports 64-bit compares.
372+
assert(Size == 32);
373+
374+
unsigned CCReg = I.getOperand(0).getReg();
375+
if (isSCC(CCReg, MRI)) {
376+
unsigned Opcode = getS_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size);
377+
MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
378+
.add(I.getOperand(2))
379+
.add(I.getOperand(3));
380+
MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
381+
.addReg(AMDGPU::SCC);
382+
bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) |
383+
constrainSelectedInstRegOperands(*Copy, TII, TRI, RBI);
384+
I.eraseFromParent();
385+
return Ret;
386+
}
387+
388+
assert(Size == 32 || Size == 64);
389+
unsigned Opcode = getV_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size);
390+
MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
391+
I.getOperand(0).getReg())
392+
.add(I.getOperand(2))
393+
.add(I.getOperand(3));
394+
RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
395+
AMDGPU::SReg_64RegClass, MRI);
396+
bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
397+
I.eraseFromParent();
398+
return Ret;
399+
}
400+
265401
static MachineInstr *
266402
buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
267403
unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
@@ -325,6 +461,53 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
325461
return false;
326462
}
327463

464+
bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
465+
MachineBasicBlock *BB = I.getParent();
466+
MachineFunction *MF = BB->getParent();
467+
MachineRegisterInfo &MRI = MF->getRegInfo();
468+
const DebugLoc &DL = I.getDebugLoc();
469+
470+
unsigned DstReg = I.getOperand(0).getReg();
471+
unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
472+
assert(Size == 32 || Size == 64);
473+
const MachineOperand &CCOp = I.getOperand(1);
474+
unsigned CCReg = CCOp.getReg();
475+
if (isSCC(CCReg, MRI)) {
476+
unsigned SelectOpcode = Size == 32 ? AMDGPU::S_CSELECT_B32 :
477+
AMDGPU::S_CSELECT_B64;
478+
MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
479+
.addReg(CCReg);
480+
481+
// The generic constrainSelectedInstRegOperands doesn't work for the scc register
482+
// bank, because it does not cover the register class that we used to represent
483+
// for it. So we need to manually set the register class here.
484+
if (!MRI.getRegClassOrNull(CCReg))
485+
MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
486+
MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
487+
.add(I.getOperand(2))
488+
.add(I.getOperand(3));
489+
490+
bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
491+
constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
492+
I.eraseFromParent();
493+
return Ret;
494+
}
495+
496+
assert(Size == 32);
497+
// FIXME: Support 64-bit select
498+
MachineInstr *Select =
499+
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
500+
.addImm(0)
501+
.add(I.getOperand(3))
502+
.addImm(0)
503+
.add(I.getOperand(2))
504+
.add(I.getOperand(1));
505+
506+
bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
507+
I.eraseFromParent();
508+
return Ret;
509+
}
510+
328511
bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
329512
MachineBasicBlock *BB = I.getParent();
330513
MachineFunction *MF = BB->getParent();
@@ -573,10 +756,14 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
573756
return selectG_INTRINSIC(I, CoverageInfo);
574757
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
575758
return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
759+
case TargetOpcode::G_ICMP:
760+
return selectG_ICMP(I);
576761
case TargetOpcode::G_LOAD:
577762
if (selectImpl(I, CoverageInfo))
578763
return true;
579764
return selectG_LOAD(I);
765+
case TargetOpcode::G_SELECT:
766+
return selectG_SELECT(I);
580767
case TargetOpcode::G_STORE:
581768
return selectG_STORE(I);
582769
}

lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,13 @@ class AMDGPUInstructionSelector : public InstructionSelector {
7272
bool selectG_INTRINSIC(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
7373
bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I,
7474
CodeGenCoverage &CoverageInfo) const;
75+
bool selectG_ICMP(MachineInstr &I) const;
7576
bool hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const;
7677
void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
7778
SmallVectorImpl<GEPInfo> &AddrInfo) const;
7879
bool selectSMRD(MachineInstr &I, ArrayRef<GEPInfo> AddrInfo) const;
7980
bool selectG_LOAD(MachineInstr &I) const;
81+
bool selectG_SELECT(MachineInstr &I) const;
8082
bool selectG_STORE(MachineInstr &I) const;
8183

8284
InstructionSelector::ComplexRendererFns

lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1688,6 +1688,10 @@ SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
16881688

16891689
Size = PowerOf2Ceil(Size);
16901690
switch (Size) {
1691+
case 1:
1692+
if (RB->getID() == AMDGPU::SCCRegBankID)
1693+
return &AMDGPU::SReg_32_XM0RegClass;
1694+
break;
16911695
case 32:
16921696
return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
16931697
&AMDGPU::SReg_32_XM0RegClass;
@@ -1710,8 +1714,9 @@ SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
17101714
return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
17111715
&AMDGPU::SReg_512RegClass;
17121716
default:
1713-
llvm_unreachable("not implemented");
1717+
break;
17141718
}
1719+
llvm_unreachable("not implemented");
17151720
}
17161721

17171722
unsigned SIRegisterInfo::getVCC() const {

test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
33

4-
--- |
5-
define amdgpu_kernel void @copy(i32 addrspace(1)* %global0) {ret void}
6-
...
74
---
85

96
name: copy
@@ -22,6 +19,60 @@ body: |
2219
%0:sgpr(p1) = COPY $sgpr2_sgpr3
2320
%1:vgpr(p1) = COPY %0
2421
%2:vgpr(s32) = G_IMPLICIT_DEF
25-
G_STORE %2, %1 :: (store 4 into %ir.global0)
22+
G_STORE %2, %1 :: (store 4, addrspace 1)
23+
...
24+
---
25+
26+
name: copy_vcc_scc
27+
legalized: true
28+
regBankSelected: true
29+
30+
body: |
31+
bb.0:
32+
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc
33+
; GCN-LABEL: name: copy_vcc_scc
34+
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
35+
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
36+
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
37+
; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
38+
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
39+
; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
40+
; GCN: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
41+
%0:vgpr(p1) = COPY $vgpr0_vgpr1
42+
%1:vgpr(s32) = COPY $vgpr2
43+
%2:vgpr(s32) = COPY $vgpr3
44+
%3:scc(s1) = COPY $scc
45+
%4:vcc(s1) = COPY %3
46+
%5:vgpr(s32) = G_SELECT %4, %1, %2
47+
G_STORE %5, %0 :: (store 4, addrspace 1)
48+
...
49+
---
50+
51+
name: copy_vcc_scc_2_uses
52+
legalized: true
53+
regBankSelected: true
54+
55+
body: |
56+
bb.0:
57+
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc
58+
; GCN-LABEL: name: copy_vcc_scc_2_uses
59+
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
60+
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
61+
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
62+
; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
63+
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
64+
; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
65+
; GCN: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
66+
; GCN: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec
67+
; GCN: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
68+
%0:vgpr(p1) = COPY $vgpr0_vgpr1
69+
%1:vgpr(s32) = COPY $vgpr2
70+
%2:vgpr(s32) = COPY $vgpr3
71+
%3:scc(s1) = COPY $scc
72+
%4:vcc(s1) = COPY %3
73+
%5:vgpr(s32) = G_SELECT %4, %1, %2
74+
%6:vcc(s1) = COPY %3
75+
%7:vgpr(s32) = G_SELECT %6, %1, %5
76+
G_STORE %7, %0 :: (store 4, addrspace 1)
2677
...
2778
---

0 commit comments

Comments
 (0)