From 32bc0430f70b057d1bba623252e92ab9f279028d Mon Sep 17 00:00:00 2001 From: Zac Medico Date: Fri, 25 Aug 2017 09:41:07 -0700 Subject: [PATCH 1/2] interval_set: optimize intersect_of insert operations Use the std::map insert method with hint iterator to optimize inserts. This increases performance more than 3.5 times for large numbers of intervals. This will help performance especially in the PGPool::update method, where profiling data has shown that intersection operations are a hot spot. The following benchmark data is for 400000 intervals: 4 +-+--+----+----+----+----+----+----+----+----+--+-+ P + + + + + + + + ************* E | ******** | R 3.5 +-+ **** +-+ F | ****** | O | ** | R 3 +-+ **** +-+ M | *** | A | ** | N 2.5 +-+ * +-+ C | ** | E | * | 2 +-+ ** +-+ R | ** | A | ** | T 1.5 +** +-+ I |** | O +* + + + + + + + + + + 1 +*+--+----+----+----+----+----+----+----+----+--+-+ 0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 SET SIZE RATIO The above chart was generated using benchmark results from the following program: #include #include #include "include/interval_set.h" int main(int argc, char *argv[]) { const int interval_count = std::stoi(argv[1]); const int interval_distance = 4; const int interval_size = 2; const int sample_count = 8; const int max_offset = interval_count * interval_distance; interval_set a, b, intersection; for (int i = 0; i < max_offset; i+=interval_distance) { a.insert(i, interval_size); } for (int m = 1; m < 100; m++) { float ratio = 1 / float(m); for (int i = 0; i < max_offset; i+=interval_distance*m) { b.insert(i, interval_size); } struct timeb start, end; int ms = 0; for (int i = 0; i < sample_count; i++) { ftime(&start); intersection.intersection_of(a, b); ftime(&end); ms += (int) (1000.0 * (end.time - start.time) + (end.millitm - start.millitm)); intersection.clear(); } b.clear(); std::cout << ratio << "\t" << ms << std::endl << std::flush; } } Signed-off-by: Zac Medico --- src/include/interval_set.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/include/interval_set.h b/src/include/interval_set.h index b84d8187052d6..46e37de79928d 100644 --- a/src/include/interval_set.h +++ b/src/include/interval_set.h @@ -463,7 +463,8 @@ class interval_set { typename std::map::const_iterator pa = a.m.begin(); typename std::map::const_iterator pb = b.m.begin(); - + typename decltype(m)::iterator mi = m.begin(); + while (pa != a.m.end() && pb != b.m.end()) { // passing? if (pa->first + pa->second <= pb->first) @@ -473,7 +474,9 @@ class interval_set { T start = MAX(pa->first, pb->first); T en = MIN(pa->first+pa->second, pb->first+pb->second); assert(en > start); - insert(start, en-start); + typename decltype(m)::value_type i{start, en - start}; + mi = m.insert(mi, i); + _size += i.second; if (pa->first+pa->second > pb->first+pb->second) pb++; else From b6a035666c2765f8895ee9991348dbc025613ed7 Mon Sep 17 00:00:00 2001 From: Zac Medico Date: Sun, 27 Aug 2017 05:25:01 -0700 Subject: [PATCH 2/2] interval_set: optimize intersect_of for identical spans Optimize comparisons for identical spans of intervals. When this patch is combined with the previous map insert optimization, a benchmark using 400000 identical intervals shows a 7 times performance improvement in comparison to without the patches. Signed-off-by: Zac Medico --- src/include/interval_set.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/include/interval_set.h b/src/include/interval_set.h index 46e37de79928d..e05eacae41efa 100644 --- a/src/include/interval_set.h +++ b/src/include/interval_set.h @@ -471,6 +471,17 @@ class interval_set { { pa++; continue; } if (pb->first + pb->second <= pa->first) { pb++; continue; } + + if (*pa == *pb) { + do { + mi = m.insert(mi, *pa); + _size += pa->second; + ++pa; + ++pb; + } while (pa != a.m.end() && pb != b.m.end() && *pa == *pb); + continue; + } + T start = MAX(pa->first, pb->first); T en = MIN(pa->first+pa->second, pb->first+pb->second); assert(en > start);