Skip to content

Commit

Permalink
AMDGPU: Don't spill SP reg like a normal CSR
Browse files Browse the repository at this point in the history
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313217 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
arsenm committed Sep 13, 2017
1 parent b004b50 commit a1a4168
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 5 deletions.
9 changes: 9 additions & 0 deletions lib/Target/AMDGPU/SIFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,15 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
}
}

void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const {
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

// The SP is specifically managed and we don't want extra spills of it.
SavedRegs.reset(MFI->getStackPtrOffsetReg());
}

MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
MachineFunction &MF,
MachineBasicBlock &MBB,
Expand Down
3 changes: 3 additions & 0 deletions lib/Target/AMDGPU/SIFrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const override;

void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS = nullptr) const override;

void processFunctionBeforeFrameFinalized(
MachineFunction &MF,
RegScavenger *RS = nullptr) const override;
Expand Down
4 changes: 4 additions & 0 deletions lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,10 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
DebugLoc DL = MBB.findDebugLoc(MI);

assert(SrcReg != MFI->getStackPtrOffsetReg() &&
SrcReg != MFI->getFrameOffsetReg() &&
SrcReg != MFI->getScratchWaveOffsetReg());

unsigned Size = FrameInfo.getObjectSize(FrameIndex);
unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
MachinePointerInfo PtrInfo
Expand Down
13 changes: 8 additions & 5 deletions test/CodeGen/AMDGPU/byval-frame-setup.ll
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ entry:
; GCN: s_mov_b32 s5, s32
; GCN-DAG: buffer_store_dword v32
; GCN-DAG: buffer_store_dword v33
; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32
; GCN: v_writelane_b32

; GCN-DAG: s_add_u32 s32, s32, 0xb00{{$}}
Expand All @@ -48,6 +49,7 @@ entry:
; GCN: buffer_store_dword [[ADD1]], off, s[0:3], s5 offset:20{{$}}

; GCN: v_readlane_b32
; GCN-NOT: v_readlane_b32 s32
; GCN: buffer_load_dword v32,
; GCN: buffer_load_dword v33,
; GCN: s_sub_u32 s32, s32, 0xb00{{$}}
Expand All @@ -69,8 +71,8 @@ entry:

; GCN-LABEL: {{^}}call_void_func_byval_struct_func:
; GCN: s_mov_b32 s5, s32
; GCN: s_add_u32 s32, s32, 0xc00{{$}}
; GCN: v_writelane_b32
; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}}
; GCN-DAG: v_writelane_b32

; GCN-DAG: s_add_u32 s32, s32, 0x800{{$}}
; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
Expand Down Expand Up @@ -101,11 +103,12 @@ entry:
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32

; GCN: s_swappc_b64
; GCN-NEXT: s_sub_u32 s32, s32, 0x800{{$}}

; GCN-NOT: v_readlane_b32 s32
; GCN: v_readlane_b32
; GCN-NOT: v_readlane_b32 s32

; GCN: s_sub_u32 s32, s32, 0xc00{{$}}
; GCN: s_sub_u32 s32, s32, 0x800{{$}}
; GCN-NEXT: s_sub_u32 s32, s32, 0xc00{{$}}
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
define void @call_void_func_byval_struct_func() #0 {
Expand Down
2 changes: 2 additions & 0 deletions test/CodeGen/AMDGPU/sibling-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,10 @@ define fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32* byval align 4 %arg1)

; Tail call disallowed with byval in parent.
; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_byval_i32_byval_parent:
; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4
; GCN: s_swappc_b64
; GCN-NOT: v_readlane_b32 s32
; GCN: s_setpc_b64
define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i32* byval %b.byval, i32 %c) #1 {
entry:
Expand Down

0 comments on commit a1a4168

Please sign in to comment.