@@ -59,11 +59,52 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector(
59
59
60
60
const char *AMDGPUInstructionSelector::getName () { return DEBUG_TYPE; }
61
61
62
+ static bool isSCC (unsigned Reg, const MachineRegisterInfo &MRI) {
63
+ if (Reg == AMDGPU::SCC)
64
+ return true ;
65
+
66
+ if (TargetRegisterInfo::isPhysicalRegister (Reg))
67
+ return false ;
68
+
69
+ auto &RegClassOrBank = MRI.getRegClassOrRegBank (Reg);
70
+ const TargetRegisterClass *RC =
71
+ RegClassOrBank.dyn_cast <const TargetRegisterClass*>();
72
+ if (RC)
73
+ return RC->getID () == AMDGPU::SReg_32_XM0RegClassID &&
74
+ MRI.getType (Reg).getSizeInBits () == 1 ;
75
+
76
+ const RegisterBank *RB = RegClassOrBank.get <const RegisterBank *>();
77
+ return RB->getID () == AMDGPU::SCCRegBankID;
78
+ }
79
+
62
80
bool AMDGPUInstructionSelector::selectCOPY (MachineInstr &I) const {
63
81
MachineBasicBlock *BB = I.getParent ();
64
82
MachineFunction *MF = BB->getParent ();
65
83
MachineRegisterInfo &MRI = MF->getRegInfo ();
66
84
I.setDesc (TII.get (TargetOpcode::COPY));
85
+
86
+ // Special case for COPY from the scc register bank. The scc register bank
87
+ // is modeled using 32-bit sgprs.
88
+ const MachineOperand &Src = I.getOperand (1 );
89
+ unsigned SrcReg = Src.getReg ();
90
+ if (!TargetRegisterInfo::isPhysicalRegister (SrcReg) && isSCC (SrcReg, MRI)) {
91
+ unsigned DstReg = TRI.getRegSizeInBits (I.getOperand (0 ).getReg (), MRI);
92
+ unsigned DstSize = TRI.getRegSizeInBits (DstReg, MRI);
93
+
94
+ // We have a copy from a 32-bit to 64-bit register. This happens
95
+ // when we are selecting scc->vcc copies.
96
+ if (DstSize == 64 ) {
97
+ const DebugLoc &DL = I.getDebugLoc ();
98
+ BuildMI (*BB, &I, DL, TII.get (AMDGPU::V_CMP_NE_U32_e64), I.getOperand (0 ).getReg ())
99
+ .addImm (0 )
100
+ .addReg (SrcReg);
101
+ if (!MRI.getRegClassOrNull (SrcReg))
102
+ MRI.setRegClass (SrcReg, TRI.getConstrainedRegClassForOperand (Src, MRI));
103
+ I.eraseFromParent ();
104
+ return true ;
105
+ }
106
+ }
107
+
67
108
for (const MachineOperand &MO : I.operands ()) {
68
109
if (TargetRegisterInfo::isPhysicalRegister (MO.getReg ()))
69
110
continue ;
@@ -262,6 +303,101 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I,
262
303
return false ;
263
304
}
264
305
306
+ static unsigned getV_CMPOpcode (CmpInst::Predicate P, unsigned Size) {
307
+ assert (Size == 32 || Size == 64 );
308
+ switch (P) {
309
+ default :
310
+ llvm_unreachable (" Unknown condition code!" );
311
+ case CmpInst::ICMP_NE:
312
+ return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
313
+ case CmpInst::ICMP_EQ:
314
+ return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
315
+ case CmpInst::ICMP_SGT:
316
+ return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
317
+ case CmpInst::ICMP_SGE:
318
+ return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
319
+ case CmpInst::ICMP_SLT:
320
+ return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
321
+ case CmpInst::ICMP_SLE:
322
+ return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
323
+ case CmpInst::ICMP_UGT:
324
+ return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
325
+ case CmpInst::ICMP_UGE:
326
+ return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
327
+ case CmpInst::ICMP_ULT:
328
+ return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
329
+ case CmpInst::ICMP_ULE:
330
+ return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
331
+ }
332
+ }
333
+
334
+ static unsigned getS_CMPOpcode (CmpInst::Predicate P, unsigned Size) {
335
+ // FIXME: VI supports 64-bit comparse.
336
+ assert (Size == 32 );
337
+ switch (P) {
338
+ default :
339
+ llvm_unreachable (" Unknown condition code!" );
340
+ case CmpInst::ICMP_NE:
341
+ return AMDGPU::S_CMP_LG_U32;
342
+ case CmpInst::ICMP_EQ:
343
+ return AMDGPU::S_CMP_EQ_U32;
344
+ case CmpInst::ICMP_SGT:
345
+ return AMDGPU::S_CMP_GT_I32;
346
+ case CmpInst::ICMP_SGE:
347
+ return AMDGPU::S_CMP_GE_I32;
348
+ case CmpInst::ICMP_SLT:
349
+ return AMDGPU::S_CMP_LT_I32;
350
+ case CmpInst::ICMP_SLE:
351
+ return AMDGPU::S_CMP_LE_I32;
352
+ case CmpInst::ICMP_UGT:
353
+ return AMDGPU::S_CMP_GT_U32;
354
+ case CmpInst::ICMP_UGE:
355
+ return AMDGPU::S_CMP_GE_U32;
356
+ case CmpInst::ICMP_ULT:
357
+ return AMDGPU::S_CMP_LT_U32;
358
+ case CmpInst::ICMP_ULE:
359
+ return AMDGPU::S_CMP_LE_U32;
360
+ }
361
+ }
362
+
363
+ bool AMDGPUInstructionSelector::selectG_ICMP (MachineInstr &I) const {
364
+ MachineBasicBlock *BB = I.getParent ();
365
+ MachineFunction *MF = BB->getParent ();
366
+ MachineRegisterInfo &MRI = MF->getRegInfo ();
367
+ DebugLoc DL = I.getDebugLoc ();
368
+
369
+ unsigned SrcReg = I.getOperand (2 ).getReg ();
370
+ unsigned Size = RBI.getSizeInBits (SrcReg, MRI, TRI);
371
+ // FIXME: VI supports 64-bit compares.
372
+ assert (Size == 32 );
373
+
374
+ unsigned CCReg = I.getOperand (0 ).getReg ();
375
+ if (isSCC (CCReg, MRI)) {
376
+ unsigned Opcode = getS_CMPOpcode ((CmpInst::Predicate)I.getOperand (1 ).getPredicate (), Size);
377
+ MachineInstr *ICmp = BuildMI (*BB, &I, DL, TII.get (Opcode))
378
+ .add (I.getOperand (2 ))
379
+ .add (I.getOperand (3 ));
380
+ MachineInstr *Copy = BuildMI (*BB, &I, DL, TII.get (AMDGPU::COPY), CCReg)
381
+ .addReg (AMDGPU::SCC);
382
+ bool Ret = constrainSelectedInstRegOperands (*ICmp, TII, TRI, RBI) |
383
+ constrainSelectedInstRegOperands (*Copy, TII, TRI, RBI);
384
+ I.eraseFromParent ();
385
+ return Ret;
386
+ }
387
+
388
+ assert (Size == 32 || Size == 64 );
389
+ unsigned Opcode = getV_CMPOpcode ((CmpInst::Predicate)I.getOperand (1 ).getPredicate (), Size);
390
+ MachineInstr *ICmp = BuildMI (*BB, &I, DL, TII.get (Opcode),
391
+ I.getOperand (0 ).getReg ())
392
+ .add (I.getOperand (2 ))
393
+ .add (I.getOperand (3 ));
394
+ RBI.constrainGenericRegister (ICmp->getOperand (0 ).getReg (),
395
+ AMDGPU::SReg_64RegClass, MRI);
396
+ bool Ret = constrainSelectedInstRegOperands (*ICmp, TII, TRI, RBI);
397
+ I.eraseFromParent ();
398
+ return Ret;
399
+ }
400
+
265
401
static MachineInstr *
266
402
buildEXP (const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
267
403
unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
@@ -325,6 +461,53 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
325
461
return false ;
326
462
}
327
463
464
+ bool AMDGPUInstructionSelector::selectG_SELECT (MachineInstr &I) const {
465
+ MachineBasicBlock *BB = I.getParent ();
466
+ MachineFunction *MF = BB->getParent ();
467
+ MachineRegisterInfo &MRI = MF->getRegInfo ();
468
+ const DebugLoc &DL = I.getDebugLoc ();
469
+
470
+ unsigned DstReg = I.getOperand (0 ).getReg ();
471
+ unsigned Size = RBI.getSizeInBits (DstReg, MRI, TRI);
472
+ assert (Size == 32 || Size == 64 );
473
+ const MachineOperand &CCOp = I.getOperand (1 );
474
+ unsigned CCReg = CCOp.getReg ();
475
+ if (isSCC (CCReg, MRI)) {
476
+ unsigned SelectOpcode = Size == 32 ? AMDGPU::S_CSELECT_B32 :
477
+ AMDGPU::S_CSELECT_B64;
478
+ MachineInstr *CopySCC = BuildMI (*BB, &I, DL, TII.get (AMDGPU::COPY), AMDGPU::SCC)
479
+ .addReg (CCReg);
480
+
481
+ // The generic constrainSelectedInstRegOperands doesn't work for the scc register
482
+ // bank, because it does not cover the register class that we used to represent
483
+ // for it. So we need to manually set the register class here.
484
+ if (!MRI.getRegClassOrNull (CCReg))
485
+ MRI.setRegClass (CCReg, TRI.getConstrainedRegClassForOperand (CCOp, MRI));
486
+ MachineInstr *Select = BuildMI (*BB, &I, DL, TII.get (SelectOpcode), DstReg)
487
+ .add (I.getOperand (2 ))
488
+ .add (I.getOperand (3 ));
489
+
490
+ bool Ret = constrainSelectedInstRegOperands (*Select, TII, TRI, RBI) |
491
+ constrainSelectedInstRegOperands (*CopySCC, TII, TRI, RBI);
492
+ I.eraseFromParent ();
493
+ return Ret;
494
+ }
495
+
496
+ assert (Size == 32 );
497
+ // FIXME: Support 64-bit select
498
+ MachineInstr *Select =
499
+ BuildMI (*BB, &I, DL, TII.get (AMDGPU::V_CNDMASK_B32_e64), DstReg)
500
+ .addImm (0 )
501
+ .add (I.getOperand (3 ))
502
+ .addImm (0 )
503
+ .add (I.getOperand (2 ))
504
+ .add (I.getOperand (1 ));
505
+
506
+ bool Ret = constrainSelectedInstRegOperands (*Select, TII, TRI, RBI);
507
+ I.eraseFromParent ();
508
+ return Ret;
509
+ }
510
+
328
511
bool AMDGPUInstructionSelector::selectG_STORE (MachineInstr &I) const {
329
512
MachineBasicBlock *BB = I.getParent ();
330
513
MachineFunction *MF = BB->getParent ();
@@ -573,10 +756,14 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
573
756
return selectG_INTRINSIC (I, CoverageInfo);
574
757
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
575
758
return selectG_INTRINSIC_W_SIDE_EFFECTS (I, CoverageInfo);
759
+ case TargetOpcode::G_ICMP:
760
+ return selectG_ICMP (I);
576
761
case TargetOpcode::G_LOAD:
577
762
if (selectImpl (I, CoverageInfo))
578
763
return true ;
579
764
return selectG_LOAD (I);
765
+ case TargetOpcode::G_SELECT:
766
+ return selectG_SELECT (I);
580
767
case TargetOpcode::G_STORE:
581
768
return selectG_STORE (I);
582
769
}
0 commit comments