Skip to content

Commit 62b274e

Browse files
committed
AMDGPU/GlobalISel: Remove manual store select code
This regresses the weird types that are newly treated as legal load types, but fixes incorrectly using flat instrucions on SI. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@367512 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent f1377e4 commit 62b274e

10 files changed

+395
-447
lines changed

lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 2 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -842,56 +842,8 @@ bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
842842

843843
bool AMDGPUInstructionSelector::selectG_STORE(
844844
MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
845-
MachineBasicBlock *BB = I.getParent();
846-
MachineFunction *MF = BB->getParent();
847-
MachineRegisterInfo &MRI = MF->getRegInfo();
848-
const DebugLoc &DL = I.getDebugLoc();
849-
850-
LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
851-
if (PtrTy.getSizeInBits() != 64) {
852-
initM0(I);
853-
return selectImpl(I, CoverageInfo);
854-
}
855-
856-
if (selectImpl(I, CoverageInfo))
857-
return true;
858-
859-
unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
860-
unsigned Opcode;
861-
862-
// FIXME: Remove this when integers > s32 naturally selected.
863-
switch (StoreSize) {
864-
default:
865-
return false;
866-
case 32:
867-
Opcode = AMDGPU::FLAT_STORE_DWORD;
868-
break;
869-
case 64:
870-
Opcode = AMDGPU::FLAT_STORE_DWORDX2;
871-
break;
872-
case 96:
873-
Opcode = AMDGPU::FLAT_STORE_DWORDX3;
874-
break;
875-
case 128:
876-
Opcode = AMDGPU::FLAT_STORE_DWORDX4;
877-
break;
878-
}
879-
880-
MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
881-
.add(I.getOperand(1))
882-
.add(I.getOperand(0))
883-
.addImm(0) // offset
884-
.addImm(0) // glc
885-
.addImm(0) // slc
886-
.addImm(0); // dlc
887-
888-
889-
// Now that we selected an opcode, we need to constrain the register
890-
// operands to use appropriate classes.
891-
bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
892-
893-
I.eraseFromParent();
894-
return Ret;
845+
initM0(I);
846+
return selectImpl(I, CoverageInfo);
895847
}
896848

897849
static int sizeToSubRegIndex(unsigned Size) {

lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -777,8 +777,6 @@ def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
777777
def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
778778
def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
779779
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
780-
def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, i32>;
781-
def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, v2i32>;
782780
def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
783781
def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, v4i32>;
784782

@@ -787,8 +785,17 @@ def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
787785

788786
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
789787
def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
790-
def : FlatStorePat <FLAT_STORE_DWORD, store_flat, i32>;
791-
def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, v2i32, VReg_64>;
788+
789+
foreach vt = [i32, f32, v2i16, v2f16] in {
790+
def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>;
791+
def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
792+
}
793+
794+
foreach vt = VReg_64.RegTypes in {
795+
def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt, VReg_64>;
796+
def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
797+
}
798+
792799
def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32, VReg_96>;
793800
def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, v4i32, VReg_128>;
794801

@@ -860,8 +867,16 @@ def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
860867
def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
861868
def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, load_global, i16>;
862869

863-
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, i32>;
864-
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, v2i32>;
870+
foreach vt = [i32, f32, v2i16, v2f16] in {
871+
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, vt>;
872+
def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, vt, VGPR_32>;
873+
}
874+
875+
foreach vt = VReg_64.RegTypes in {
876+
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, vt>;
877+
def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, vt, VReg_64>;
878+
}
879+
865880
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;
866881
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, v4i32>;
867882

@@ -872,8 +887,6 @@ def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32, VGPR_32>;
872887
def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16, VGPR_32>;
873888
def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32, VGPR_32>;
874889
def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16, VGPR_32>;
875-
def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, i32, VGPR_32>;
876-
def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, v2i32, VReg_64>;
877890
def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32, VReg_96>;
878891
def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, v4i32, VReg_128>;
879892

test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir

Lines changed: 39 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -242,24 +242,25 @@ body: |
242242
243243
; GFX7-LABEL: name: load_flat_s64
244244
; GFX7: liveins: $vgpr0_vgpr1
245-
; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
246-
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8)
247-
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
245+
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
246+
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
247+
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
248248
; GFX8-LABEL: name: load_flat_s64
249249
; GFX8: liveins: $vgpr0_vgpr1
250-
; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
251-
; GFX8: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8)
252-
; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
250+
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
251+
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
252+
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
253253
; GFX9-LABEL: name: load_flat_s64
254254
; GFX9: liveins: $vgpr0_vgpr1
255-
; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
256-
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8)
257-
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
255+
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
256+
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
257+
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
258258
; GFX10-LABEL: name: load_flat_s64
259259
; GFX10: liveins: $vgpr0_vgpr1
260-
; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
261-
; GFX10: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8)
262-
; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
260+
; GFX10: $vcc_hi = IMPLICIT_DEF
261+
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
262+
; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
263+
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
263264
%0:vgpr(p1) = COPY $vgpr0_vgpr1
264265
%1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
265266
$vgpr0_vgpr1 = COPY %1
@@ -571,24 +572,25 @@ body: |
571572
572573
; GFX7-LABEL: name: load_flat_v2s16
573574
; GFX7: liveins: $vgpr0_vgpr1
574-
; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
575-
; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4)
576-
; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>)
575+
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
576+
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
577+
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
577578
; GFX8-LABEL: name: load_flat_v2s16
578579
; GFX8: liveins: $vgpr0_vgpr1
579-
; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
580-
; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4)
581-
; GFX8: $vgpr0 = COPY [[LOAD]](<2 x s16>)
580+
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
581+
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
582+
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
582583
; GFX9-LABEL: name: load_flat_v2s16
583584
; GFX9: liveins: $vgpr0_vgpr1
584-
; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
585-
; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4)
586-
; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>)
585+
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
586+
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
587+
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
587588
; GFX10-LABEL: name: load_flat_v2s16
588589
; GFX10: liveins: $vgpr0_vgpr1
589-
; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
590-
; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4)
591-
; GFX10: $vgpr0 = COPY [[LOAD]](<2 x s16>)
590+
; GFX10: $vcc_hi = IMPLICIT_DEF
591+
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
592+
; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
593+
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
592594
%0:vgpr(p1) = COPY $vgpr0_vgpr1
593595
%1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 0)
594596
$vgpr0 = COPY %1
@@ -608,24 +610,25 @@ body: |
608610
609611
; GFX7-LABEL: name: load_flat_v4s16
610612
; GFX7: liveins: $vgpr0_vgpr1
611-
; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
612-
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8)
613-
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
613+
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
614+
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
615+
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
614616
; GFX8-LABEL: name: load_flat_v4s16
615617
; GFX8: liveins: $vgpr0_vgpr1
616-
; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
617-
; GFX8: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8)
618-
; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
618+
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
619+
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
620+
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
619621
; GFX9-LABEL: name: load_flat_v4s16
620622
; GFX9: liveins: $vgpr0_vgpr1
621-
; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
622-
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8)
623-
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
623+
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
624+
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
625+
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
624626
; GFX10-LABEL: name: load_flat_v4s16
625627
; GFX10: liveins: $vgpr0_vgpr1
626-
; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
627-
; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8)
628-
; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
628+
; GFX10: $vcc_hi = IMPLICIT_DEF
629+
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
630+
; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
631+
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
629632
%0:vgpr(p1) = COPY $vgpr0_vgpr1
630633
%1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
631634
$vgpr0_vgpr1 = COPY %1

test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir

Lines changed: 39 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -248,24 +248,25 @@ body: |
248248
249249
; GFX7-LABEL: name: load_global_s64
250250
; GFX7: liveins: $vgpr0_vgpr1
251-
; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
252-
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
253-
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
251+
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
252+
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
253+
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
254254
; GFX8-LABEL: name: load_global_s64
255255
; GFX8: liveins: $vgpr0_vgpr1
256-
; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
257-
; GFX8: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
258-
; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
256+
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
257+
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
258+
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
259259
; GFX9-LABEL: name: load_global_s64
260260
; GFX9: liveins: $vgpr0_vgpr1
261-
; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
262-
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
263-
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
261+
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
262+
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
263+
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
264264
; GFX10-LABEL: name: load_global_s64
265265
; GFX10: liveins: $vgpr0_vgpr1
266-
; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
267-
; GFX10: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
268-
; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
266+
; GFX10: $vcc_hi = IMPLICIT_DEF
267+
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
268+
; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
269+
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
269270
%0:vgpr(p1) = COPY $vgpr0_vgpr1
270271
%1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
271272
$vgpr0_vgpr1 = COPY %1
@@ -581,24 +582,25 @@ body: |
581582
582583
; GFX7-LABEL: name: load_global_v2s16
583584
; GFX7: liveins: $vgpr0_vgpr1
584-
; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
585-
; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
586-
; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>)
585+
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
586+
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
587+
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
587588
; GFX8-LABEL: name: load_global_v2s16
588589
; GFX8: liveins: $vgpr0_vgpr1
589-
; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
590-
; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
591-
; GFX8: $vgpr0 = COPY [[LOAD]](<2 x s16>)
590+
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
591+
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
592+
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
592593
; GFX9-LABEL: name: load_global_v2s16
593594
; GFX9: liveins: $vgpr0_vgpr1
594-
; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
595-
; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
596-
; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>)
595+
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
596+
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
597+
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
597598
; GFX10-LABEL: name: load_global_v2s16
598599
; GFX10: liveins: $vgpr0_vgpr1
599-
; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
600-
; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
601-
; GFX10: $vgpr0 = COPY [[LOAD]](<2 x s16>)
600+
; GFX10: $vcc_hi = IMPLICIT_DEF
601+
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
602+
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
603+
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
602604
%0:vgpr(p1) = COPY $vgpr0_vgpr1
603605
%1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 1)
604606
$vgpr0 = COPY %1
@@ -618,24 +620,25 @@ body: |
618620
619621
; GFX7-LABEL: name: load_global_v4s16
620622
; GFX7: liveins: $vgpr0_vgpr1
621-
; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
622-
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
623-
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
623+
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
624+
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
625+
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
624626
; GFX8-LABEL: name: load_global_v4s16
625627
; GFX8: liveins: $vgpr0_vgpr1
626-
; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
627-
; GFX8: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
628-
; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
628+
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
629+
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
630+
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
629631
; GFX9-LABEL: name: load_global_v4s16
630632
; GFX9: liveins: $vgpr0_vgpr1
631-
; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
632-
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
633-
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
633+
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
634+
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
635+
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
634636
; GFX10-LABEL: name: load_global_v4s16
635637
; GFX10: liveins: $vgpr0_vgpr1
636-
; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
637-
; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
638-
; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
638+
; GFX10: $vcc_hi = IMPLICIT_DEF
639+
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
640+
; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
641+
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
639642
%0:vgpr(p1) = COPY $vgpr0_vgpr1
640643
%1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
641644
$vgpr0_vgpr1 = COPY %1

0 commit comments

Comments
 (0)