Skip to content

Commit f9ab36c

Browse files
committed
[PGO] Handle cases of non-instrument BBs
As shown in PR41279, some basic blocks (such as catchswitch) cannot be instrumented. This patch filters out these BBs in PGO instrumentation. It also sets the profile count to the fail-to-instrument edge, so that we can propagate the counts in the CFG. Differential Revision: https://reviews.llvm.org/D62700 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362995 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 150fff8 commit f9ab36c

File tree

5 files changed

+191
-43
lines changed

5 files changed

+191
-43
lines changed

lib/Transforms/Instrumentation/PGOInstrumentation.cpp

Lines changed: 85 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,12 @@ struct BBInfo {
544544
const std::string infoString() const {
545545
return (Twine("Index=") + Twine(Index)).str();
546546
}
547+
548+
// Empty function -- only applicable to UseBBInfo.
549+
void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
550+
551+
// Empty function -- only applicable to UseBBInfo.
552+
void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
547553
};
548554

549555
// This class implements the CFG edges. Note the CFG can be a multi-graph.
@@ -748,7 +754,7 @@ void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
748754
}
749755

750756
// Collect all the BBs that will be instruments and return them in
751-
// InstrumentBBs.
757+
// InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
752758
template <class Edge, class BBInfo>
753759
void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
754760
std::vector<BasicBlock *> &InstrumentBBs) {
@@ -763,6 +769,18 @@ void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
763769
if (InstrBB)
764770
InstrumentBBs.push_back(InstrBB);
765771
}
772+
773+
// Set up InEdges/OutEdges for all BBs.
774+
for (auto &E : MST.AllEdges) {
775+
if (E->Removed)
776+
continue;
777+
const BasicBlock *SrcBB = E->SrcBB;
778+
const BasicBlock *DestBB = E->DestBB;
779+
BBInfo &SrcInfo = getBBInfo(SrcBB);
780+
BBInfo &DestInfo = getBBInfo(DestBB);
781+
SrcInfo.addOutEdge(E.get());
782+
DestInfo.addInEdge(E.get());
783+
}
766784
}
767785

768786
// Given a CFG E to be instrumented, find which BB to place the instrumented
@@ -780,34 +798,45 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
780798
if (DestBB == nullptr)
781799
return SrcBB;
782800

801+
auto canInstrument = [this](BasicBlock *BB) -> BasicBlock * {
802+
// There are basic blocks (such as catchswitch) cannot be instrumented.
803+
// If the returned first insertion point is the end of BB, skip this BB.
804+
if (BB->getFirstInsertionPt() == BB->end()) {
805+
LLVM_DEBUG(dbgs() << "Cannot instrument BB index=" << getBBInfo(BB).Index
806+
<< "\n");
807+
return nullptr;
808+
}
809+
return BB;
810+
};
811+
783812
// Instrument the SrcBB if it has a single successor,
784813
// otherwise, the DestBB if this is not a critical edge.
785814
Instruction *TI = SrcBB->getTerminator();
786815
if (TI->getNumSuccessors() <= 1)
787-
return SrcBB;
816+
return canInstrument(SrcBB);
788817
if (!E->IsCritical)
789-
return DestBB;
818+
return canInstrument(DestBB);
790819

791-
// For a critical edge, we have to split. Instrument the newly
792-
// created BB.
793-
IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
794-
LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
795-
<< " --> " << getBBInfo(DestBB).Index << "\n");
796820
unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
797821
BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
798822
if (!InstrBB) {
799823
LLVM_DEBUG(
800824
dbgs() << "Fail to split critical edge: not instrument this edge.\n");
801825
return nullptr;
802826
}
827+
// For a critical edge, we have to split. Instrument the newly
828+
// created BB.
829+
IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
830+
LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
831+
<< " --> " << getBBInfo(DestBB).Index << "\n");
803832
// Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
804833
MST.addEdge(SrcBB, InstrBB, 0);
805834
// Second one: Add new edge of InstrBB->DestBB.
806835
Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
807836
NewEdge1.InMST = true;
808837
E->Removed = true;
809838

810-
return InstrBB;
839+
return canInstrument(InstrBB);
811840
}
812841

813842
// Visit all edge and instrument the edges not in MST, and do value profiling.
@@ -925,6 +954,18 @@ struct UseBBInfo : public BBInfo {
925954
return BBInfo::infoString();
926955
return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str();
927956
}
957+
958+
// Add an OutEdge and update the edge count.
959+
void addOutEdge(PGOUseEdge *E) {
960+
OutEdges.push_back(E);
961+
UnknownCountOutEdge++;
962+
}
963+
964+
// Add an InEdge and update the edge count.
965+
void addInEdge(PGOUseEdge *E) {
966+
InEdges.push_back(E);
967+
UnknownCountInEdge++;
968+
}
928969
};
929970

930971
} // end anonymous namespace
@@ -1069,24 +1110,50 @@ bool PGOUseFunc::setInstrumentedCounts(
10691110
if (NumCounters != CountFromProfile.size()) {
10701111
return false;
10711112
}
1113+
// Set the profile count to the Instrumented BBs.
10721114
uint32_t I = 0;
10731115
for (BasicBlock *InstrBB : InstrumentBBs) {
10741116
uint64_t CountValue = CountFromProfile[I++];
10751117
UseBBInfo &Info = getBBInfo(InstrBB);
10761118
Info.setBBInfoCount(CountValue);
1077-
// If only one in-edge, the edge profile count should be the same as BB
1078-
// profile count.
1079-
if (Info.InEdges.size() == 1) {
1080-
Info.InEdges[0]->setEdgeCount(CountValue);
1081-
}
1119+
}
1120+
ProfileCountSize = CountFromProfile.size();
1121+
CountPosition = I;
1122+
1123+
// Set the edge count and update the count of unknown edges for BBs.
1124+
auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1125+
E->setEdgeCount(Value);
1126+
this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1127+
this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1128+
};
1129+
1130+
// Set the profile count the Instrumented edges. There are BBs that not in
1131+
// MST but not instrumented. Need to set the edge count value so that we can
1132+
// populate the profile counts later.
1133+
for (auto &E : FuncInfo.MST.AllEdges) {
1134+
if (E->Removed || E->InMST)
1135+
continue;
1136+
const BasicBlock *SrcBB = E->SrcBB;
1137+
UseBBInfo &SrcInfo = getBBInfo(SrcBB);
1138+
10821139
// If only one out-edge, the edge profile count should be the same as BB
10831140
// profile count.
1084-
if (Info.OutEdges.size() == 1) {
1085-
Info.OutEdges[0]->setEdgeCount(CountValue);
1141+
if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
1142+
setEdgeCount(E.get(), SrcInfo.CountValue);
1143+
else {
1144+
const BasicBlock *DestBB = E->DestBB;
1145+
UseBBInfo &DestInfo = getBBInfo(DestBB);
1146+
// If only one in-edge, the edge profile count should be the same as BB
1147+
// profile count.
1148+
if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
1149+
setEdgeCount(E.get(), DestInfo.CountValue);
10861150
}
1151+
if (E->CountValid)
1152+
continue;
1153+
// E's count should have been set from profile. If not, this meenas E skips
1154+
// the instrumentation. We set the count to 0.
1155+
setEdgeCount(E.get(), 0);
10871156
}
1088-
ProfileCountSize = CountFromProfile.size();
1089-
CountPosition = I;
10901157
return true;
10911158
}
10921159

@@ -1180,26 +1247,6 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros)
11801247
// Populate the counters from instrumented BBs to all BBs.
11811248
// In the end of this operation, all BBs should have a valid count value.
11821249
void PGOUseFunc::populateCounters() {
1183-
// First set up Count variable for all BBs.
1184-
for (auto &E : FuncInfo.MST.AllEdges) {
1185-
if (E->Removed)
1186-
continue;
1187-
1188-
const BasicBlock *SrcBB = E->SrcBB;
1189-
const BasicBlock *DestBB = E->DestBB;
1190-
UseBBInfo &SrcInfo = getBBInfo(SrcBB);
1191-
UseBBInfo &DestInfo = getBBInfo(DestBB);
1192-
SrcInfo.OutEdges.push_back(E.get());
1193-
DestInfo.InEdges.push_back(E.get());
1194-
SrcInfo.UnknownCountOutEdge++;
1195-
DestInfo.UnknownCountInEdge++;
1196-
1197-
if (!E->CountValid)
1198-
continue;
1199-
DestInfo.UnknownCountInEdge--;
1200-
SrcInfo.UnknownCountOutEdge--;
1201-
}
1202-
12031250
bool Changes = true;
12041251
unsigned NumPasses = 0;
12051252
while (Changes) {
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
:ir
2+
foo
3+
60927483247
4+
4
5+
3
6+
2
7+
3
8+
2
9+
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
:ir
2+
f
3+
62077759478
4+
2
5+
3
6+
2
7+

test/Transforms/PGOProfile/PR41279.ll

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
; Test that instrumentaiton works fine for the case of failing the split critical edges.
22
; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
33
; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
4+
; RUN: llvm-profdata merge %S/Inputs/PR41279.proftext -o %t.profdata
5+
; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
6+
; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
47

58
declare void @f3({ i8*, i64 }*, { i8*, i64 }*, i64)
69
declare { i8*, i64 } @f0({ i8*, i64 }*)
@@ -9,17 +12,22 @@ declare void @invok2({ i8*, i64 }*, i8* noalias readonly align 1, i64)
912
declare void @invok1({ i8*, i64 }*, { i8*, i64 }*, i64)
1013
declare i32 @__CxxFrameHandler3(...)
1114

12-
define internal void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @__CxxFrameHandler3 {
15+
define void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @__CxxFrameHandler3 {
16+
; USE-LABEL: @foo
17+
; USE-SAME: !prof ![[FUNC_ENTRY_COUNT:[0-9]+]]
18+
1319
%3 = alloca i8, align 1
1420
store i8 0, i8* %3, align 1
1521
%4 = call i64 @f1()
1622
%5 = icmp ult i64 %4, 32
1723
br i1 %5, label %7, label %13
24+
; USE: br i1 %5, label %7, label %13
25+
; USE-SAME: !prof ![[BW_ENTRY1:[0-9]+]]
1826

1927
6:
2028
cleanupret from %17 unwind to caller
2129
; GEN: 6:
22-
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 2)
30+
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 2)
2331

2432
7:
2533
store i8 1, i8* %3, align 1
@@ -42,13 +50,13 @@ define internal void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @
4250
store i8 0, i8* %3, align 1
4351
br label %14
4452
; GEN: 12:
45-
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 1)
53+
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 1)
4654

4755
13:
4856
call void @f3({ i8*, i64 }* %0, { i8*, i64 }* %1, i64 1)
4957
br label %14
5058
; GEN: 13:
51-
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 0)
59+
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 0)
5260

5361
14:
5462
ret void
@@ -57,11 +65,20 @@ define internal void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @
5765
store i8 0, i8* %3, align 1
5866
br label %6
5967
; GEN: 15:
60-
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 3)
68+
; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 60927483247, i32 4, i32 3)
6169

6270
16:
6371
%17 = cleanuppad within none []
6472
%18 = load i8, i8* %3, align 1
6573
%19 = trunc i8 %18 to i1
6674
br i1 %19, label %15, label %6
75+
; USE: br i1 %19, label %15, label %6
76+
; USE-SAME: !prof ![[BW_ENTRY2:[0-9]+]]
6777
}
78+
79+
; USE-DAG: {{![0-9]+}} = !{i32 1, !"ProfileSummary", {{![0-9]+}}}
80+
; USE-DAG: {{![0-9]+}} = !{!"DetailedSummary", {{![0-9]+}}}
81+
; USE-DAG: ![[FUNC_ENTRY_COUNT]] = !{!"function_entry_count", i64 8}
82+
; USE_DAG: ![[BW_ENTRY1]] = !{!"branch_weights", i32 5, i32 3}
83+
; USE_DAG: ![[BW_ENTRY2]] = !{!"branch_weights", i32 2, i32 1}
84+
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
; Test that instrumentaiton works fine for the case of catchswitch stmts.
2+
; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
3+
; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
4+
; RUN: llvm-profdata merge %S/Inputs/PR41279_2.proftext -o %t.profdata
5+
; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
6+
; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
7+
8+
9+
define dso_local void @f() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
10+
; USE-LABEL: @f
11+
; USE-SAME: !prof ![[FUNC_ENTRY_COUNT:[0-9]+]]
12+
; USE-DAG: {{![0-9]+}} = !{i32 1, !"ProfileSummary", {{![0-9]+}}}
13+
; USE-DAG: {{![0-9]+}} = !{!"DetailedSummary", {{![0-9]+}}}
14+
; USE-DAG: ![[FUNC_ENTRY_COUNT]] = !{!"function_entry_count", i64 5}
15+
entry:
16+
%__exception_code = alloca i32, align 4
17+
%__exception_code2 = alloca i32, align 4
18+
invoke void @f() #2
19+
to label %invoke.cont unwind label %catch.dispatch
20+
21+
catch.dispatch:
22+
%0 = catchswitch within none [label %__except] unwind to caller
23+
24+
__except:
25+
%1 = catchpad within %0 [i8* null]
26+
catchret from %1 to label %__except1
27+
28+
__except1:
29+
%2 = call i32 @llvm.eh.exceptioncode(token %1)
30+
store i32 %2, i32* %__exception_code, align 4
31+
br label %__try.cont7
32+
;GEN: _except1:
33+
;GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__profn_f, i32 0, i32 0), i64 62077759478, i32 2, i32 1)
34+
35+
invoke.cont:
36+
br label %__try.cont
37+
38+
__try.cont:
39+
invoke void @f()
40+
to label %invoke.cont3 unwind label %catch.dispatch4
41+
42+
catch.dispatch4:
43+
%3 = catchswitch within none [label %__except5] unwind to caller
44+
; GEN: catch.dispatch4:
45+
; GEN-NOT: call void @llvm.instrprof.increment
46+
47+
__except5:
48+
%4 = catchpad within %3 [i8* null]
49+
catchret from %4 to label %__except6
50+
51+
__except6:
52+
%5 = call i32 @llvm.eh.exceptioncode(token %4)
53+
store i32 %5, i32* %__exception_code2, align 4
54+
br label %__try.cont7
55+
56+
__try.cont7:
57+
ret void
58+
59+
invoke.cont3:
60+
br label %__try.cont7
61+
;GEN: invoke.cont3:
62+
;GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__profn_f, i32 0, i32 0), i64 62077759478, i32 2, i32 0)
63+
64+
}
65+
66+
declare dso_local i32 @__C_specific_handler(...)
67+
68+
declare i32 @llvm.eh.exceptioncode(token)

0 commit comments

Comments
 (0)