@@ -47,16 +47,17 @@ using namespace llvm;
47
47
using namespace llvm ::PatternMatch;
48
48
49
49
STATISTIC (NumBlocksElim, " Number of blocks eliminated" );
50
- STATISTIC (NumPHIsElim, " Number of trivial PHIs eliminated" );
51
- STATISTIC (NumGEPsElim, " Number of GEPs converted to casts" );
50
+ STATISTIC (NumPHIsElim, " Number of trivial PHIs eliminated" );
51
+ STATISTIC (NumGEPsElim, " Number of GEPs converted to casts" );
52
52
STATISTIC (NumCmpUses, " Number of uses of Cmp expressions replaced with uses of "
53
53
" sunken Cmps" );
54
54
STATISTIC (NumCastUses, " Number of uses of Cast expressions replaced with uses "
55
55
" of sunken Casts" );
56
56
STATISTIC (NumMemoryInsts, " Number of memory instructions whose address "
57
57
" computations were sunk" );
58
- STATISTIC (NumExtsMoved, " Number of [s|z]ext instructions combined with loads" );
59
- STATISTIC (NumExtUses, " Number of uses of [s|z]ext instructions optimized" );
58
+ STATISTIC (NumExtsMoved, " Number of [s|z]ext instructions combined with loads" );
59
+ STATISTIC (NumExtUses, " Number of uses of [s|z]ext instructions optimized" );
60
+ STATISTIC (NumRetsDup, " Number of return instructions duplicated" );
60
61
61
62
namespace {
62
63
class CodeGenPrepare : public FunctionPass {
@@ -71,11 +72,15 @@ namespace {
71
72
// / update it.
72
73
BasicBlock::iterator CurInstIterator;
73
74
74
- // Keeps track of non-local addresses that have been sunk into a block. This
75
- // allows us to avoid inserting duplicate code for blocks with multiple
76
- // load/stores of the same address.
75
+ // / Keeps track of non-local addresses that have been sunk into a block.
76
+ // / This allows us to avoid inserting duplicate code for blocks with
77
+ // / multiple load/stores of the same address.
77
78
DenseMap<Value*, Value*> SunkAddrs;
78
79
80
+ // / UpdateDT - If CFG is modified in anyway, dominator tree may need to
81
+ // / be updated.
82
+ bool UpdateDT;
83
+
79
84
public:
80
85
static char ID; // Pass identification, replacement for typeid
81
86
explicit CodeGenPrepare (const TargetLowering *tli = 0 )
@@ -100,6 +105,7 @@ namespace {
100
105
bool OptimizeCallInst (CallInst *CI);
101
106
bool MoveExtToFormExtLoad (Instruction *I);
102
107
bool OptimizeExtUses (Instruction *I);
108
+ bool DupRetToEnableTailCallOpts (ReturnInst *RI);
103
109
};
104
110
}
105
111
@@ -114,22 +120,29 @@ FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
114
120
bool CodeGenPrepare::runOnFunction (Function &F) {
115
121
bool EverMadeChange = false ;
116
122
123
+ UpdateDT = false ;
117
124
DT = getAnalysisIfAvailable<DominatorTree>();
118
125
PFI = getAnalysisIfAvailable<ProfileInfo>();
126
+
119
127
// First pass, eliminate blocks that contain only PHI nodes and an
120
128
// unconditional branch.
121
129
EverMadeChange |= EliminateMostlyEmptyBlocks (F);
122
130
123
131
bool MadeChange = true ;
124
132
while (MadeChange) {
125
133
MadeChange = false ;
126
- for (Function::iterator BB = F.begin (), E = F.end (); BB != E; ++BB)
134
+ for (Function::iterator I = F.begin (), E = F.end (); I != E; ) {
135
+ BasicBlock *BB = I++;
127
136
MadeChange |= OptimizeBlock (*BB);
137
+ }
128
138
EverMadeChange |= MadeChange;
129
139
}
130
140
131
141
SunkAddrs.clear ();
132
142
143
+ if (UpdateDT && DT)
144
+ DT->DT ->recalculate (F);
145
+
133
146
return EverMadeChange;
134
147
}
135
148
@@ -533,6 +546,125 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
533
546
return Simplifier.fold (CI, TD);
534
547
}
535
548
549
+ // / DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
550
+ // / instructions to the predecessor to enable tail call optimizations. The
551
+ // / case it is currently looking for is:
552
+ // / bb0:
553
+ // / %tmp0 = tail call i32 @f0()
554
+ // / br label %return
555
+ // / bb1:
556
+ // / %tmp1 = tail call i32 @f1()
557
+ // / br label %return
558
+ // / bb2:
559
+ // / %tmp2 = tail call i32 @f2()
560
+ // / br label %return
561
+ // / return:
562
+ // / %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
563
+ // / ret i32 %retval
564
+ // /
565
+ // / =>
566
+ // /
567
+ // / bb0:
568
+ // / %tmp0 = tail call i32 @f0()
569
+ // / ret i32 %tmp0
570
+ // / bb1:
571
+ // / %tmp1 = tail call i32 @f1()
572
+ // / ret i32 %tmp1
573
+ // / bb2:
574
+ // / %tmp2 = tail call i32 @f2()
575
+ // / ret i32 %tmp2
576
+ // /
577
+ bool CodeGenPrepare::DupRetToEnableTailCallOpts (ReturnInst *RI) {
578
+ if (!TLI)
579
+ return false ;
580
+
581
+ Value *V = RI->getReturnValue ();
582
+ PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL ;
583
+ if (V && !PN)
584
+ return false ;
585
+
586
+ BasicBlock *BB = RI->getParent ();
587
+ if (PN && PN->getParent () != BB)
588
+ return false ;
589
+
590
+ // It's not safe to eliminate the sign / zero extension of the return value.
591
+ // See llvm::isInTailCallPosition().
592
+ const Function *F = BB->getParent ();
593
+ unsigned CallerRetAttr = F->getAttributes ().getRetAttributes ();
594
+ if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
595
+ return false ;
596
+
597
+ // Make sure there are no instructions between the PHI and return, or that the
598
+ // return is the first instruction in the block.
599
+ if (PN) {
600
+ BasicBlock::iterator BI = BB->begin ();
601
+ do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
602
+ if (&*BI != RI)
603
+ return false ;
604
+ } else {
605
+ if (&*BB->begin () != RI)
606
+ return false ;
607
+ }
608
+
609
+ // / Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
610
+ // / call.
611
+ SmallVector<CallInst*, 4 > TailCalls;
612
+ if (PN) {
613
+ for (unsigned I = 0 , E = PN->getNumIncomingValues (); I != E; ++I) {
614
+ CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue (I));
615
+ // Make sure the phi value is indeed produced by the tail call.
616
+ if (CI && CI->hasOneUse () && CI->getParent () == PN->getIncomingBlock (I) &&
617
+ TLI->mayBeEmittedAsTailCall (CI))
618
+ TailCalls.push_back (CI);
619
+ }
620
+ } else {
621
+ SmallPtrSet<BasicBlock*, 4 > VisitedBBs;
622
+ for (pred_iterator PI = pred_begin (BB), PE = pred_end (BB); PI != PE; ++PI) {
623
+ if (!VisitedBBs.insert (*PI))
624
+ continue ;
625
+
626
+ BasicBlock::InstListType &InstList = (*PI)->getInstList ();
627
+ BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin ();
628
+ BasicBlock::InstListType::reverse_iterator RE = InstList.rend ();
629
+ if (++RI == RE)
630
+ continue ;
631
+ CallInst *CI = dyn_cast<CallInst>(&*RI);
632
+ if (CI && CI->getType ()->isVoidTy () && TLI->mayBeEmittedAsTailCall (CI))
633
+ TailCalls.push_back (CI);
634
+ }
635
+ }
636
+
637
+ bool Changed = false ;
638
+ for (unsigned i = 0 , e = TailCalls.size (); i != e; ++i) {
639
+ CallInst *CI = TailCalls[i];
640
+ CallSite CS (CI);
641
+
642
+ // Conservatively require the attributes of the call to match those of the
643
+ // return. Ignore noalias because it doesn't affect the call sequence.
644
+ unsigned CalleeRetAttr = CS.getAttributes ().getRetAttributes ();
645
+ if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
646
+ continue ;
647
+
648
+ // Make sure the call instruction is followed by an unconditional branch to
649
+ // the return block.
650
+ BasicBlock *CallBB = CI->getParent ();
651
+ BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator ());
652
+ if (!BI || !BI->isUnconditional () || BI->getSuccessor (0 ) != BB)
653
+ continue ;
654
+
655
+ // Duplicate the return into CallBB.
656
+ (void )FoldReturnIntoUncondBranch (RI, BB, CallBB);
657
+ UpdateDT = Changed = true ;
658
+ ++NumRetsDup;
659
+ }
660
+
661
+ // If we eliminated all predecessors of the block, delete the block now.
662
+ if (Changed && pred_begin (BB) == pred_end (BB))
663
+ BB->eraseFromParent ();
664
+
665
+ return Changed;
666
+ }
667
+
536
668
// ===----------------------------------------------------------------------===//
537
669
// Memory Optimization
538
670
// ===----------------------------------------------------------------------===//
@@ -956,6 +1088,9 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
956
1088
if (CallInst *CI = dyn_cast<CallInst>(I))
957
1089
return OptimizeCallInst (CI);
958
1090
1091
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
1092
+ return DupRetToEnableTailCallOpts (RI);
1093
+
959
1094
return false ;
960
1095
}
961
1096
0 commit comments