Skip to content

Commit e14caa7

Browse files
committed
GlobalISel: Use the original flags when lowering fneg to fsub
This was ignoring the flag on fneg, and using the source instruction's flags. Also fixes tests missing from r358702. Note the expansion itself isn't correct without nnan, but that should be fixed separately. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363637 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 644422a commit e14caa7

File tree

4 files changed

+108
-2
lines changed

4 files changed

+108
-2
lines changed

lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1463,9 +1463,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
14631463
auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
14641464
unsigned SubByReg = MI.getOperand(1).getReg();
14651465
unsigned ZeroReg = Zero->getOperand(0).getReg();
1466-
MachineInstr *SrcMI = MRI.getVRegDef(SubByReg);
14671466
MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg},
1468-
SrcMI->getFlags());
1467+
MI.getFlags());
14691468
MI.eraseFromParent();
14701469
return Legalized;
14711470
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
; RUN: llc -march=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel %s -o - | FileCheck %s
2+
3+
; Check flags are preserved for a regular instruction.
4+
; CHECK-LABEL: name: fadd_nnan
5+
; CHECK: nnan G_FADD
6+
define amdgpu_kernel void @fadd_nnan(float %arg0, float %arg1) {
7+
%res = fadd nnan float %arg0, %arg1
8+
store float %res, float addrspace(1)* undef
9+
ret void
10+
}
11+
12+
; Check flags are preserved for a specially handled intrinsic
13+
; CHECK-LABEL: name: fma_fast
14+
; CHECK: nnan ninf nsz arcp contract afn reassoc G_FMA
15+
define amdgpu_kernel void @fma_fast(float %arg0, float %arg1, float %arg2) {
16+
%res = call fast float @llvm.fma.f32(float %arg0, float %arg1, float %arg2)
17+
store float %res, float addrspace(1)* undef
18+
ret void
19+
}
20+
21+
; Check flags are preserved for an arbitrarry target intrinsic
22+
; CHECK-LABEL: name: rcp_nsz
23+
; CHECK: = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %8(s32)
24+
define amdgpu_kernel void @rcp_nsz(float %arg0) {
25+
%res = call nsz float @llvm.amdgcn.rcp.f32 (float %arg0)
26+
store float %res, float addrspace(1)* undef
27+
ret void
28+
}
29+
30+
declare float @llvm.fma.f32(float, float, float)
31+
declare float @llvm.amdgcn.rcp.f32(float)

test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,36 @@ body: |
5959
$vgpr0_vgpr1 = COPY %2
6060
...
6161

62+
---
63+
name: test_fsub_s64_fmf
64+
body: |
65+
bb.0:
66+
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
67+
68+
; SI-LABEL: name: test_fsub_s64_fmf
69+
; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
70+
; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
71+
; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]]
72+
; SI: %2:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]]
73+
; SI: $vgpr0_vgpr1 = COPY %2(s64)
74+
; VI-LABEL: name: test_fsub_s64_fmf
75+
; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
76+
; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
77+
; VI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]]
78+
; VI: %2:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]]
79+
; VI: $vgpr0_vgpr1 = COPY %2(s64)
80+
; GFX9-LABEL: name: test_fsub_s64_fmf
81+
; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
82+
; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
83+
; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]]
84+
; GFX9: %2:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]]
85+
; GFX9: $vgpr0_vgpr1 = COPY %2(s64)
86+
%0:_(s64) = COPY $vgpr0_vgpr1
87+
%1:_(s64) = COPY $vgpr2_vgpr3
88+
%2:_(s64) = nnan nsz G_FSUB %0, %1
89+
$vgpr0_vgpr1 = COPY %2
90+
...
91+
6292
---
6393
name: test_fsub_s16
6494
body: |

unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -729,4 +729,50 @@ TEST_F(GISelMITest, FewerElementsPhi) {
729729

730730
EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
731731
}
732+
733+
// FNEG expansion in terms of FSUB
734+
TEST_F(GISelMITest, LowerFNEG) {
735+
if (!TM)
736+
return;
737+
738+
// Declare your legalization info
739+
DefineLegalizerInfo(A, {
740+
getActionDefinitionsBuilder(G_FSUB).legalFor({s64});
741+
});
742+
743+
// Build Instr. Make sure FMF are preserved.
744+
auto FAdd =
745+
B.buildInstr(TargetOpcode::G_FADD, {LLT::scalar(64)}, {Copies[0], Copies[1]},
746+
MachineInstr::MIFlag::FmNsz);
747+
748+
// Should not propagate the flags of src instruction.
749+
auto FNeg0 =
750+
B.buildInstr(TargetOpcode::G_FNEG, {LLT::scalar(64)}, {FAdd.getReg(0)},
751+
{MachineInstr::MIFlag::FmArcp});
752+
753+
// Preserve the one flag.
754+
auto FNeg1 =
755+
B.buildInstr(TargetOpcode::G_FNEG, {LLT::scalar(64)}, {Copies[0]},
756+
MachineInstr::MIFlag::FmNoInfs);
757+
758+
AInfo Info(MF->getSubtarget());
759+
DummyGISelObserver Observer;
760+
LegalizerHelper Helper(*MF, Info, Observer, B);
761+
// Perform Legalization
762+
EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
763+
Helper.lower(*FNeg0, 0, LLT::scalar(64)));
764+
EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
765+
Helper.lower(*FNeg1, 0, LLT::scalar(64)));
766+
767+
auto CheckStr = R"(
768+
CHECK: [[FADD:%[0-9]+]]:_(s64) = nsz G_FADD %0:_, %1:_
769+
CHECK: [[CONST0:%[0-9]+]]:_(s64) = G_FCONSTANT double -0.000000e+00
770+
CHECK: [[FSUB0:%[0-9]+]]:_(s64) = arcp G_FSUB [[CONST0]]:_, [[FADD]]:_
771+
CHECK: [[CONST1:%[0-9]+]]:_(s64) = G_FCONSTANT double -0.000000e+00
772+
CHECK: [[FSUB1:%[0-9]+]]:_(s64) = ninf G_FSUB [[CONST1]]:_, %0:_
773+
)";
774+
775+
// Check
776+
EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
777+
}
732778
} // namespace

0 commit comments

Comments
 (0)