Skip to content

Commit 962a797

Browse files
committed
--- Merging r127981 into '.':
U include/llvm/Target/TargetLowering.h U lib/Target/X86/X86ISelLowering.cpp U lib/Target/X86/X86ISelLowering.h U lib/Target/ARM/ARMISelLowering.h U lib/Target/ARM/ARMISelLowering.cpp U lib/Transforms/Scalar/CodeGenPrepare.cpp --- Merging r128194 into '.': G lib/Transforms/Scalar/CodeGenPrepare.cpp --- Merging r128196 into '.': G lib/Transforms/Scalar/CodeGenPrepare.cpp --- Merging r128197 into '.': A test/CodeGen/X86/tailcall-returndup-void.ll G lib/Transforms/Scalar/CodeGenPrepare.cpp git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_29@128200 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 450fcbb commit 962a797

File tree

7 files changed

+215
-8
lines changed

7 files changed

+215
-8
lines changed

include/llvm/Target/TargetLowering.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,6 +1287,14 @@ class TargetLowering {
12871287
return false;
12881288
}
12891289

1290+
/// mayBeEmittedAsTailCall - Return true if the target may be able emit the
1291+
/// call instruction as a tail call. This is used by optimization passes to
1292+
/// determine if it's profitable to duplicate return instructions to enable
1293+
/// tailcall optimization.
1294+
virtual bool mayBeEmittedAsTailCall(CallInst *CI) const {
1295+
return false;
1296+
}
1297+
12901298
/// LowerOperationWrapper - This callback is invoked by the type legalizer
12911299
/// to legalize nodes with an illegal operand type but legal result types.
12921300
/// It replaces the LowerOperation callback in the type Legalizer.

lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1803,6 +1803,16 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
18031803
return HasRet;
18041804
}
18051805

1806+
bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
1807+
if (!EnableARMTailCalls)
1808+
return false;
1809+
1810+
if (!CI->isTailCall())
1811+
return false;
1812+
1813+
return !Subtarget->isThumb1Only();
1814+
}
1815+
18061816
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
18071817
// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
18081818
// one of the above mentioned nodes. It has to be wrapped because otherwise

lib/Target/ARM/ARMISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,8 @@ namespace llvm {
455455

456456
virtual bool isUsedByReturnOnly(SDNode *N) const;
457457

458+
virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
459+
458460
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
459461
SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
460462
SDValue getVFPCmp(SDValue LHS, SDValue RHS,

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include "llvm/ADT/Statistic.h"
4646
#include "llvm/ADT/StringExtras.h"
4747
#include "llvm/ADT/VectorExtras.h"
48+
#include "llvm/Support/CallSite.h"
4849
#include "llvm/Support/Debug.h"
4950
#include "llvm/Support/Dwarf.h"
5051
#include "llvm/Support/ErrorHandling.h"
@@ -1580,6 +1581,18 @@ static bool IsTailCallConvention(CallingConv::ID CC) {
15801581
return (CC == CallingConv::Fast || CC == CallingConv::GHC);
15811582
}
15821583

1584+
bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
1585+
if (!CI->isTailCall())
1586+
return false;
1587+
1588+
CallSite CS(CI);
1589+
CallingConv::ID CalleeCC = CS.getCallingConv();
1590+
if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
1591+
return false;
1592+
1593+
return true;
1594+
}
1595+
15831596
/// FuncIsMadeTailCallSafe - Return true if the function is being made into
15841597
/// a tailcall target by changing its ABI.
15851598
static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {

lib/Target/X86/X86ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -843,6 +843,8 @@ namespace llvm {
843843

844844
virtual bool isUsedByReturnOnly(SDNode *N) const;
845845

846+
virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
847+
846848
virtual bool
847849
CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
848850
const SmallVectorImpl<ISD::OutputArg> &Outs,

lib/Transforms/Scalar/CodeGenPrepare.cpp

Lines changed: 143 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,17 @@ using namespace llvm;
4747
using namespace llvm::PatternMatch;
4848

4949
STATISTIC(NumBlocksElim, "Number of blocks eliminated");
50-
STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
51-
STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
50+
STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
51+
STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
5252
STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
5353
"sunken Cmps");
5454
STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
5555
"of sunken Casts");
5656
STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
5757
"computations were sunk");
58-
STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
59-
STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
58+
STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
59+
STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
60+
STATISTIC(NumRetsDup, "Number of return instructions duplicated");
6061

6162
namespace {
6263
class CodeGenPrepare : public FunctionPass {
@@ -71,11 +72,15 @@ namespace {
7172
/// update it.
7273
BasicBlock::iterator CurInstIterator;
7374

74-
// Keeps track of non-local addresses that have been sunk into a block. This
75-
// allows us to avoid inserting duplicate code for blocks with multiple
76-
// load/stores of the same address.
75+
/// Keeps track of non-local addresses that have been sunk into a block.
76+
/// This allows us to avoid inserting duplicate code for blocks with
77+
/// multiple load/stores of the same address.
7778
DenseMap<Value*, Value*> SunkAddrs;
7879

80+
/// UpdateDT - If CFG is modified in anyway, dominator tree may need to
81+
/// be updated.
82+
bool UpdateDT;
83+
7984
public:
8085
static char ID; // Pass identification, replacement for typeid
8186
explicit CodeGenPrepare(const TargetLowering *tli = 0)
@@ -100,6 +105,7 @@ namespace {
100105
bool OptimizeCallInst(CallInst *CI);
101106
bool MoveExtToFormExtLoad(Instruction *I);
102107
bool OptimizeExtUses(Instruction *I);
108+
bool DupRetToEnableTailCallOpts(ReturnInst *RI);
103109
};
104110
}
105111

@@ -114,22 +120,29 @@ FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
114120
bool CodeGenPrepare::runOnFunction(Function &F) {
115121
bool EverMadeChange = false;
116122

123+
UpdateDT = false;
117124
DT = getAnalysisIfAvailable<DominatorTree>();
118125
PFI = getAnalysisIfAvailable<ProfileInfo>();
126+
119127
// First pass, eliminate blocks that contain only PHI nodes and an
120128
// unconditional branch.
121129
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
122130

123131
bool MadeChange = true;
124132
while (MadeChange) {
125133
MadeChange = false;
126-
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
134+
for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
135+
BasicBlock *BB = I++;
127136
MadeChange |= OptimizeBlock(*BB);
137+
}
128138
EverMadeChange |= MadeChange;
129139
}
130140

131141
SunkAddrs.clear();
132142

143+
if (UpdateDT && DT)
144+
DT->DT->recalculate(F);
145+
133146
return EverMadeChange;
134147
}
135148

@@ -533,6 +546,125 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
533546
return Simplifier.fold(CI, TD);
534547
}
535548

549+
/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
550+
/// instructions to the predecessor to enable tail call optimizations. The
551+
/// case it is currently looking for is:
552+
/// bb0:
553+
/// %tmp0 = tail call i32 @f0()
554+
/// br label %return
555+
/// bb1:
556+
/// %tmp1 = tail call i32 @f1()
557+
/// br label %return
558+
/// bb2:
559+
/// %tmp2 = tail call i32 @f2()
560+
/// br label %return
561+
/// return:
562+
/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
563+
/// ret i32 %retval
564+
///
565+
/// =>
566+
///
567+
/// bb0:
568+
/// %tmp0 = tail call i32 @f0()
569+
/// ret i32 %tmp0
570+
/// bb1:
571+
/// %tmp1 = tail call i32 @f1()
572+
/// ret i32 %tmp1
573+
/// bb2:
574+
/// %tmp2 = tail call i32 @f2()
575+
/// ret i32 %tmp2
576+
///
577+
bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
578+
if (!TLI)
579+
return false;
580+
581+
Value *V = RI->getReturnValue();
582+
PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL;
583+
if (V && !PN)
584+
return false;
585+
586+
BasicBlock *BB = RI->getParent();
587+
if (PN && PN->getParent() != BB)
588+
return false;
589+
590+
// It's not safe to eliminate the sign / zero extension of the return value.
591+
// See llvm::isInTailCallPosition().
592+
const Function *F = BB->getParent();
593+
unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
594+
if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
595+
return false;
596+
597+
// Make sure there are no instructions between the PHI and return, or that the
598+
// return is the first instruction in the block.
599+
if (PN) {
600+
BasicBlock::iterator BI = BB->begin();
601+
do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
602+
if (&*BI != RI)
603+
return false;
604+
} else {
605+
if (&*BB->begin() != RI)
606+
return false;
607+
}
608+
609+
/// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
610+
/// call.
611+
SmallVector<CallInst*, 4> TailCalls;
612+
if (PN) {
613+
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
614+
CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
615+
// Make sure the phi value is indeed produced by the tail call.
616+
if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
617+
TLI->mayBeEmittedAsTailCall(CI))
618+
TailCalls.push_back(CI);
619+
}
620+
} else {
621+
SmallPtrSet<BasicBlock*, 4> VisitedBBs;
622+
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
623+
if (!VisitedBBs.insert(*PI))
624+
continue;
625+
626+
BasicBlock::InstListType &InstList = (*PI)->getInstList();
627+
BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
628+
BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
629+
if (++RI == RE)
630+
continue;
631+
CallInst *CI = dyn_cast<CallInst>(&*RI);
632+
if (CI && CI->getType()->isVoidTy() && TLI->mayBeEmittedAsTailCall(CI))
633+
TailCalls.push_back(CI);
634+
}
635+
}
636+
637+
bool Changed = false;
638+
for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
639+
CallInst *CI = TailCalls[i];
640+
CallSite CS(CI);
641+
642+
// Conservatively require the attributes of the call to match those of the
643+
// return. Ignore noalias because it doesn't affect the call sequence.
644+
unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes();
645+
if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
646+
continue;
647+
648+
// Make sure the call instruction is followed by an unconditional branch to
649+
// the return block.
650+
BasicBlock *CallBB = CI->getParent();
651+
BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
652+
if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
653+
continue;
654+
655+
// Duplicate the return into CallBB.
656+
(void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
657+
UpdateDT = Changed = true;
658+
++NumRetsDup;
659+
}
660+
661+
// If we eliminated all predecessors of the block, delete the block now.
662+
if (Changed && pred_begin(BB) == pred_end(BB))
663+
BB->eraseFromParent();
664+
665+
return Changed;
666+
}
667+
536668
//===----------------------------------------------------------------------===//
537669
// Memory Optimization
538670
//===----------------------------------------------------------------------===//
@@ -956,6 +1088,9 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
9561088
if (CallInst *CI = dyn_cast<CallInst>(I))
9571089
return OptimizeCallInst(CI);
9581090

1091+
if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
1092+
return DupRetToEnableTailCallOpts(RI);
1093+
9591094
return false;
9601095
}
9611096

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; RUN: llc < %s -march=x86-64 | FileCheck %s
2+
; CHECK: rBM_info
3+
; CHECK-NOT: ret
4+
5+
@sES_closure = external global [0 x i64]
6+
declare cc10 void @sEH_info(i64* noalias nocapture, i64* noalias nocapture, i64* noalias nocapture, i64, i64, i64) align 8
7+
8+
define cc10 void @rBM_info(i64* noalias nocapture %Base_Arg, i64* noalias nocapture %Sp_Arg, i64* noalias nocapture %Hp_Arg, i64 %R1_Arg, i64 %R2_Arg, i64 %R3_Arg) nounwind align 8 {
9+
c263:
10+
%ln265 = getelementptr inbounds i64* %Sp_Arg, i64 -2
11+
%ln266 = ptrtoint i64* %ln265 to i64
12+
%ln268 = icmp ult i64 %ln266, %R3_Arg
13+
br i1 %ln268, label %c26a, label %n26p
14+
15+
n26p: ; preds = %c263
16+
br i1 icmp ne (i64 and (i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 7), i64 0), label %c1ZP.i, label %n1ZQ.i
17+
18+
n1ZQ.i: ; preds = %n26p
19+
%ln1ZT.i = load i64* getelementptr inbounds ([0 x i64]* @sES_closure, i64 0, i64 0), align 8
20+
%ln1ZU.i = inttoptr i64 %ln1ZT.i to void (i64*, i64*, i64*, i64, i64, i64)*
21+
tail call cc10 void %ln1ZU.i(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 %R3_Arg) nounwind
22+
br label %rBL_info.exit
23+
24+
c1ZP.i: ; preds = %n26p
25+
tail call cc10 void @sEH_info(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 %R3_Arg) nounwind
26+
br label %rBL_info.exit
27+
28+
rBL_info.exit: ; preds = %c1ZP.i, %n1ZQ.i
29+
ret void
30+
31+
c26a: ; preds = %c263
32+
%ln27h = getelementptr inbounds i64* %Base_Arg, i64 -2
33+
%ln27j = load i64* %ln27h, align 8
34+
%ln27k = inttoptr i64 %ln27j to void (i64*, i64*, i64*, i64, i64, i64)*
35+
tail call cc10 void %ln27k(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 %R1_Arg, i64 %R2_Arg, i64 %R3_Arg) nounwind
36+
ret void
37+
}

0 commit comments

Comments
 (0)