forked from envoyproxy/envoy
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
utility: Add performance annotation library (envoyproxy#2626)
This is broken out from envoyproxy#2615 as it likely deserves its own review. This provides a mechanism to annotate and measure the costs of functions that are data-dependent, e.g. regexes. This is step 3a-ish, in the plan to improve startup performance. This is another step toward addressing envoyproxy#2373 Adds perf annotation library that can be used to instrument code but disappear completely from generated code unless enabled with bazel --define=perf_annotation=enabled Produces tables in this format. Duration(us) # Calls Mean(ns) StdDev(ns) Min(ns) Max(ns) Category Description 4600 4 1150000 129099 1000000 1300000 alpha 1 200 1 200000 nan 200000 200000 gamma 2 87 3 29000 1000 28000 30000 beta 3 Instrumentation can be coded into the system but is turned off via compiler macros so there is zero cost in production. Risk Level: Low -- new utility library not used by anything yet. Release Notes: N/A Signed-off-by: Joshua Marantz <[email protected]>
- Loading branch information
Showing
12 changed files
with
560 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
#ifndef ENVOY_PERF_ANNOTATION | ||
#define ENVOY_PERF_ANNOTATION | ||
#endif | ||
|
||
#include "common/common/perf_annotation.h" | ||
|
||
#include <unistd.h> | ||
|
||
#include <chrono> | ||
#include <iostream> | ||
#include <string> | ||
|
||
#include "common/common/utility.h" | ||
|
||
#include "absl/strings/str_cat.h" | ||
|
||
namespace Envoy { | ||
|
||
PerfOperation::PerfOperation() | ||
: start_time_(ProdMonotonicTimeSource::instance_.currentTime()), | ||
context_(PerfAnnotationContext::getOrCreate()) {} | ||
|
||
void PerfOperation::record(absl::string_view category, absl::string_view description) { | ||
const MonotonicTime end_time = ProdMonotonicTimeSource::instance_.currentTime(); | ||
const std::chrono::nanoseconds duration = | ||
std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time_); | ||
context_->record(duration, category, description); | ||
} | ||
|
||
// The ctor is explicitly declared private to encourage clients to use getOrCreate(), at | ||
// least for now. Given that it's declared it must be instantiated. It's not inlined | ||
// because the contructor is non-trivial due to the contained unordered_map. | ||
PerfAnnotationContext::PerfAnnotationContext() {} | ||
|
||
void PerfAnnotationContext::record(std::chrono::nanoseconds duration, absl::string_view category, | ||
absl::string_view description) { | ||
CategoryDescription key((std::string(category)), (std::string(description))); | ||
{ | ||
#if PERF_THREAD_SAFE | ||
std::unique_lock<std::mutex> lock(mutex_); | ||
#endif | ||
DurationStats& stats = duration_stats_map_[key]; | ||
stats.stddev_.update(static_cast<double>(duration.count())); | ||
if ((stats.stddev_.count() == 1) || (duration < stats.min_)) { | ||
stats.min_ = duration; | ||
} | ||
stats.max_ = std::max(stats.max_, duration); | ||
stats.total_ += duration; | ||
} | ||
} | ||
|
||
// TODO(jmarantz): Consider hooking up perf information-dump into admin console, if | ||
// we find a performance problem we want to annotate with a live server. | ||
void PerfAnnotationContext::dump() { std::cout << toString() << std::endl; } | ||
|
||
std::string PerfAnnotationContext::toString() { | ||
PerfAnnotationContext* context = getOrCreate(); | ||
std::string out; | ||
#if PERF_THREAD_SAFE | ||
std::unique_lock<std::mutex> lock(context->mutex_); | ||
#endif | ||
|
||
// The map is from category/description -> [duration, time]. Reverse-sort by duration. | ||
std::vector<const DurationStatsMap::value_type*> sorted_values; | ||
sorted_values.reserve(context->duration_stats_map_.size()); | ||
for (const auto& iter : context->duration_stats_map_) { | ||
sorted_values.push_back(&iter); | ||
} | ||
std::sort( | ||
sorted_values.begin(), sorted_values.end(), | ||
[](const DurationStatsMap::value_type* a, const DurationStatsMap::value_type* b) -> bool { | ||
const DurationStats& a_stats = a->second; | ||
const DurationStats& b_stats = b->second; | ||
return a_stats.total_ > b_stats.total_; | ||
}); | ||
|
||
// Organize the report so it lines up in columns. Note that the widest duration comes first, | ||
// though that may not be descending order of calls or per_call time, so we need two passes | ||
// to compute column widths. First collect the column headers and their widths. | ||
// | ||
// TODO(jmarantz): Add a mechanism for dumping to HTML for viewing results in web browser. | ||
static const char* headers[] = {"Duration(us)", "# Calls", "Mean(ns)", "StdDev(ns)", | ||
"Min(ns)", "Max(ns)", "Category", "Description"}; | ||
constexpr int num_columns = ARRAY_SIZE(headers); | ||
size_t widths[num_columns]; | ||
std::vector<std::string> columns[num_columns]; | ||
for (size_t i = 0; i < num_columns; ++i) { | ||
std::string column(headers[i]); | ||
widths[i] = column.size(); | ||
columns[i].emplace_back(column); | ||
} | ||
|
||
// Compute all the column strings and their max widths. | ||
for (const auto& p : sorted_values) { | ||
const DurationStats& stats = p->second; | ||
const auto microseconds_string = [](std::chrono::nanoseconds ns) -> std::string { | ||
return std::to_string(std::chrono::duration_cast<std::chrono::microseconds>(ns).count()); | ||
}; | ||
const auto nanoseconds_string = [](std::chrono::nanoseconds ns) -> std::string { | ||
return std::to_string(std::chrono::duration_cast<std::chrono::nanoseconds>(ns).count()); | ||
}; | ||
columns[0].push_back(microseconds_string(stats.total_)); | ||
const uint64_t count = stats.stddev_.count(); | ||
columns[1].push_back(std::to_string(count)); | ||
columns[2].push_back( | ||
(count == 0) | ||
? "NaN" | ||
: std::to_string( | ||
std::chrono::duration_cast<std::chrono::nanoseconds>(stats.total_).count() / | ||
count)); | ||
columns[3].push_back(fmt::format("{}", stats.stddev_.computeStandardDeviation())); | ||
columns[4].push_back(nanoseconds_string(stats.min_)); | ||
columns[5].push_back(nanoseconds_string(stats.max_)); | ||
const CategoryDescription& category_description = p->first; | ||
columns[6].push_back(category_description.first); | ||
columns[7].push_back(category_description.second); | ||
for (size_t i = 0; i < num_columns; ++i) { | ||
widths[i] = std::max(widths[i], columns[i].back().size()); | ||
} | ||
} | ||
|
||
// Create format-strings to right justify each column, e.g. {:>14} for a column of width 14. | ||
std::vector<std::string> formats; | ||
for (size_t i = 0; i < num_columns; ++i) { | ||
formats.push_back(absl::StrCat("{:>", widths[i], "}")); | ||
} | ||
|
||
// Write out the table. | ||
for (size_t row = 0; row < columns[0].size(); ++row) { | ||
for (size_t i = 0; i < num_columns; ++i) { | ||
const std::string& str = columns[i][row]; | ||
absl::StrAppend(&out, fmt::format(formats[i], str), (i != (num_columns - 1) ? " " : "\n")); | ||
} | ||
} | ||
return out; | ||
} | ||
|
||
void PerfAnnotationContext::clear() { | ||
PerfAnnotationContext* context = getOrCreate(); | ||
#if PERF_THREAD_SAFE | ||
std::unique_lock<std::mutex> lock(context->mutex_); | ||
#endif | ||
context->duration_stats_map_.clear(); | ||
} | ||
|
||
PerfAnnotationContext* PerfAnnotationContext::getOrCreate() { | ||
static PerfAnnotationContext* context = new PerfAnnotationContext(); | ||
return context; | ||
} | ||
|
||
} // namespace Envoy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
#pragma once | ||
|
||
#ifdef ENVOY_PERF_ANNOTATION | ||
|
||
#include <chrono> | ||
#include <cstdint> | ||
#include <mutex> | ||
#include <unordered_map> | ||
|
||
#include "common/common/utility.h" | ||
|
||
#include "absl/strings/string_view.h" | ||
|
||
// Performance Annotation system, enabled with | ||
// bazel --define=perf_annotation=enabled ... | ||
// or, in individual .cc files: | ||
// #define ENVOY_PERF_ANNOTATION | ||
// In the absense of such directives, the support classes are built and tested. | ||
// However, the macros for instrumenting code for performance analysis will expand | ||
// to nothing. | ||
// | ||
// See also: https://github.com/LLNL/Caliper -- it may be worth integrating with | ||
// that for added functionality, partiicularly around loops. | ||
// | ||
// See also, for a much more comprehensive study in performance annotation: | ||
// https://labs.vmware.com/vmtj/methodology-for-performance-analysis-of-vmware-vsphere-under-tier-1-applications | ||
// https://dl.acm.org/citation.cfm?id=1899945&dl=ACM&coll=DL | ||
|
||
/** | ||
* Initiates a performance operation, storing its state in perf_var. A perf_var | ||
* can then be reported multiple times. | ||
*/ | ||
#define PERF_OPERATION(perf_var) Envoy::PerfOperation(perf_var) | ||
|
||
/** | ||
* Records performance data initiated with PERF_OPERATION. The category and description | ||
* are joined with in the library, but only if perf is enabled. This way, any concatenation | ||
* overhead is skipped when perf-annotation is disabled. | ||
*/ | ||
#define PERF_RECORD(perf, category, description) \ | ||
do { \ | ||
perf.record(category, description); \ | ||
} while (false) | ||
|
||
/** | ||
* Dumps recorded performance data to stdout. Expands to nothing if not enabled. | ||
*/ | ||
#define PERF_DUMP() Envoy::PerfAnnotationContext::dump() | ||
|
||
/** | ||
* Returns the aggregated performance data as a formatted multi-line string, showing a | ||
* formatted table of values. Returns "" if perf-annotation is disabled. | ||
*/ | ||
#define PERF_TO_STRING() Envoy::PerfAnnotationContext::toString() | ||
|
||
/** | ||
* Clears all performance data. | ||
*/ | ||
#define PERF_CLEAR() Envoy::PerfAnnotationContext::clear() | ||
|
||
/** | ||
* Controls whether performacne collection and reporting is thread safe. For now, | ||
* leaving this enabled for predictability across multiiple applications, on the assumption | ||
* that an uncontended mutex lock has vanishingly small cost. In the future we may try | ||
* to make this system thread-unsafe if mutext contention disturbs the metrics. | ||
*/ | ||
#define PERF_THREAD_SAFE true | ||
|
||
namespace Envoy { | ||
|
||
/** | ||
* Defines a context for collecting performance data. Note that this class is | ||
* fully declared and defined even if ENVOY_PERF_AUTOMATION is off. We depend on | ||
* the macros to disable performance collection for production. | ||
*/ | ||
class PerfAnnotationContext { | ||
public: | ||
/** | ||
* Records time consumed by a category and description, which are shown as separate | ||
* columns in the generated output table. | ||
* | ||
* @param duration the duration. | ||
* @param category the name of a category for the recording. | ||
* @param description the name of description for the recording. | ||
*/ | ||
void record(std::chrono::nanoseconds duration, absl::string_view category, | ||
absl::string_view description); | ||
|
||
/** | ||
* Renders the aggregated statistics as a string. | ||
* @return std::string the performance data as a formatted string. | ||
*/ | ||
static std::string toString(); | ||
|
||
/** | ||
* Dumps aggregated statistics (if any) to stdout. | ||
*/ | ||
static void dump(); | ||
|
||
/** | ||
* Thread-safe lazy-initialization of a PerfAnnotationContext on first use. | ||
* @return PerfAnnotationContext* the context. | ||
*/ | ||
static PerfAnnotationContext* getOrCreate(); | ||
|
||
/** | ||
* Clears out all aggregated statistics. | ||
*/ | ||
static void clear(); | ||
|
||
private: | ||
/** | ||
* PerfAnnotationContext construction should be done via getOrCreate(). | ||
*/ | ||
PerfAnnotationContext(); | ||
|
||
using CategoryDescription = std::pair<std::string, std::string>; | ||
|
||
struct DurationStats { | ||
std::chrono::nanoseconds total_{0}; | ||
std::chrono::nanoseconds min_{0}; | ||
std::chrono::nanoseconds max_{0}; | ||
WelfordStandardDeviation stddev_; | ||
}; | ||
|
||
struct Hash { | ||
size_t operator()(const CategoryDescription& a) const { | ||
return std::hash<std::string>()(a.first) + 13 * std::hash<std::string>()(a.second); | ||
} | ||
}; | ||
|
||
using DurationStatsMap = std::unordered_map<CategoryDescription, DurationStats, Hash>; | ||
|
||
DurationStatsMap duration_stats_map_; // Maps {category, description} to DurationStats. | ||
#if PERF_THREAD_SAFE | ||
std::mutex mutex_; | ||
#endif | ||
}; | ||
|
||
/** | ||
* Represents an operation for reporting timing to the perf system. Usage: | ||
* | ||
* f() { | ||
* PerfOperation perf_op; | ||
* computeIntensiveWork(); | ||
* perf_op.record("category", "description"); | ||
* } | ||
*/ | ||
class PerfOperation { | ||
public: | ||
PerfOperation(); | ||
|
||
/** | ||
* Report an event relative to the operation in progress. Note report can be called | ||
* multiple times on a single PerfOperation, with distinct category/description combinations. | ||
* @param category the name of a category for the recording. | ||
* @param description the name of description for the recording. | ||
*/ | ||
void record(absl::string_view category, absl::string_view description); | ||
|
||
private: | ||
MonotonicTime start_time_; | ||
PerfAnnotationContext* context_; | ||
}; | ||
|
||
} // namespace Envoy | ||
|
||
#else | ||
|
||
// Macros that expand to nothing when performance collection is disabled. These are contrived to | ||
// work syntactically as a C++ statement (e.g. if (foo) PERF_RECORD(...) else PERF_RECORD(...)). | ||
|
||
#define PERF_OPERATION(perf_var) \ | ||
do { \ | ||
} while (false) | ||
#define PERF_RECORD(perf, category, description) \ | ||
do { \ | ||
} while (false) | ||
#define PERF_DUMP() \ | ||
do { \ | ||
} while (false) | ||
#define PERF_TO_STRING() "" | ||
#define PERF_CLEAR() \ | ||
do { \ | ||
} while (false) | ||
|
||
#endif |
Oops, something went wrong.