Skip to content

Commit

Permalink
prometheus.cc: Optimize reporting
Browse files Browse the repository at this point in the history
This patch adds multiple functionality to Prometheus reporting:
1. Add summary reporting. Summaries are used in seastar to report aggregated
   percentile information (for example p95 and p99)
   The main usage is to report per-shard summary of a latency
   histograms.
2. Support aggregated metrics. With an aggregated metrics,
   Prometheus would aggregate multiple metrics based on labels and
   would report the result. Usually this would be for reporting a single
   latency histogram per node instead of per shard. But it could be used
   for counters and gauge as well.
3. Skip empty counters, histograms and summaries. It's a common practice to
   register lots of metrics even if they are not being used.
   Histograms have a huge effect on performance, so not reporting an empty
   histogram is a great performance boost both for the application and
   for the Prometheus server.
   This is true for Summaries and Counters as well, marking a metrics
   with skip_when_empty would mean Prometheus will not report those
   metrics.
4. As an optimization, the stringstream that is used per metric is
   reused and clear insted of recreated.

Signed-off-by: Amnon Heiman <amnon@scylladb.com>
  • Loading branch information
amnonh authored and Vlad Lazar committed Jun 24, 2022
1 parent 9c84d52 commit 3f35820
Showing 1 changed file with 136 additions and 37 deletions.
173 changes: 136 additions & 37 deletions src/core/prometheus.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ static std::string to_str(seastar::metrics::impl::data_type dt) {
return "counter";
case seastar::metrics::impl::data_type::HISTOGRAM:
return "histogram";
case seastar::metrics::impl::data_type::SUMMARY:
return "summary";
}
return "untyped";
}
Expand All @@ -63,6 +65,7 @@ static std::string to_str(const seastar::metrics::impl::metric_value& v) {
case seastar::metrics::impl::data_type::COUNTER:
return std::to_string(v.i());
case seastar::metrics::impl::data_type::HISTOGRAM:
case seastar::metrics::impl::data_type::SUMMARY:
break;
}
return ""; // we should never get here but it makes the compiler happy
Expand Down Expand Up @@ -478,63 +481,159 @@ metric_family_range get_range(const metrics_families_per_shard& mf, const sstrin

}

void write_histogram(std::stringstream& s, const config& ctx, const sstring& name, const seastar::metrics::histogram& h, std::map<sstring, sstring> labels) {
add_name(s, name + "_sum", labels, ctx);
s << h.sample_sum;
s << "\n";
add_name(s, name + "_count", labels, ctx);
s << h.sample_count;
s << "\n";

auto& le = labels["le"];
auto bucket = name + "_bucket";
for (auto i : h.buckets) {
le = std::to_string(i.upper_bound);
add_name(s, bucket, labels, ctx);
s << i.count;
s << "\n";
}
labels["le"] = "+Inf";
add_name(s, bucket, labels, ctx);
s << h.sample_count;
s << "\n";
}

void write_summary(std::stringstream& s, const config& ctx, const sstring& name, const seastar::metrics::histogram& h, std::map<sstring, sstring> labels) {
if (h.sample_sum) {
add_name(s, name + "_sum", labels, ctx);
s << h.sample_sum;
s << "\n";
}
if (h.sample_count) {
add_name(s, name + "_count", labels, ctx);
s << h.sample_count;
s << "\n";
}
auto& le = labels["quantile"];
for (auto i : h.buckets) {
le = std::to_string(i.upper_bound);
add_name(s, name, labels, ctx);
s << i.count;
s << "\n";
}
}
/*!
* \brief a helper class to aggregate metrics over labels
*
* This class sum multiple metrics based on a list of labels.
* It returns one or more metrics each aggregated by the aggregate_by labels.
*
* To use it, you define what labels it should aggregate by and then pass to
* it metrics with their labels.
* For example if a metrics has a 'shard' and 'name' labels and you aggregate by 'shard'
* it would return a map of metrics each with only the 'name' label
*
*/
class metric_aggregate_by_labels {
std::vector<std::string> _labels_to_aggregate_by;
std::unordered_map<std::map<sstring, sstring>, seastar::metrics::impl::metric_value> _values;
public:
metric_aggregate_by_labels(std::vector<std::string> labels) : _labels_to_aggregate_by(std::move(labels)) {
}
/*!
* \brief add a metric
*
* This method gets a metric and its labels and add it to the aggregated metric.
* For example, if a metric has the labels {'shard':'0', 'name':'myhist'} and we are aggregating
* over 'shard'
* The metric would be added to the aggregated metric with labels {'name':'myhist'}.
*
*/
void add(const seastar::metrics::impl::metric_value& m, std::map<sstring, sstring> labels) {
for (auto&& l : _labels_to_aggregate_by) {
labels.erase(l);
}
if (_values.find(labels) == _values.end()) {
_values.emplace(labels, m);
} else {
_values[labels] += m;
}
}
const std::unordered_map<std::map<sstring, sstring>, seastar::metrics::impl::metric_value>& get_values() const {
return _values;
}
bool empty() const {
return _values.empty();
}
};

std::string get_value_as_string(std::stringstream& s, const mi::metric_value& value) {
std::string value_str;
try {
value_str = to_str(value);
} catch (const std::range_error& e) {
seastar_logger.debug("prometheus: write_text_representation: {}: {}", s.str(), e.what());
value_str = "NaN";
} catch (...) {
auto ex = std::current_exception();
// print this error as it's ignored later on by `connection::start_response`
seastar_logger.error("prometheus: write_text_representation: {}: {}", s.str(), ex);
std::rethrow_exception(std::move(ex));
}
return value_str;
}

future<> write_text_representation(output_stream<char>& out, const config& ctx, const metric_family_range& m) {
return seastar::async([&ctx, &out, &m] () mutable {
bool found = false;
std::stringstream s;
for (metric_family& metric_family : m) {
auto name = ctx.prefix + "_" + metric_family.name();
found = false;
metric_family.foreach_metric([&out, &ctx, &found, &name, &metric_family](auto value, auto value_info) mutable {
std::stringstream s;
metric_aggregate_by_labels aggregated_values(metric_family.metadata().aggregate_labels);
bool should_aggregate = !metric_family.metadata().aggregate_labels.empty();
metric_family.foreach_metric([&s, &out, &ctx, &found, &name, &metric_family, &aggregated_values, should_aggregate](auto value, auto value_info) mutable {
s.clear();
s.str("");
if (value_info.skip_when_empty && value.is_empty()) {
return;
}
if (!found) {
if (metric_family.metadata().d.str() != "") {
s << "# HELP " << name << " " << metric_family.metadata().d.str() << "\n";
}
s << "# TYPE " << name << " " << to_str(metric_family.metadata().type) << "\n";
found = true;
}
if (value.type() == mi::data_type::HISTOGRAM) {
auto&& h = value.get_histogram();
std::map<sstring, sstring> labels = value_info.id.labels();
add_name(s, name + "_sum", labels, ctx);
s << h.sample_sum;
s << "\n";
add_name(s, name + "_count", labels, ctx);
s << h.sample_count;
s << "\n";

auto& le = labels["le"];
auto bucket = name + "_bucket";
for (auto i : h.buckets) {
le = std::to_string(i.upper_bound);
add_name(s, bucket, labels, ctx);
s << i.count;
s << "\n";
}
labels["le"] = "+Inf";
add_name(s, bucket, labels, ctx);
s << h.sample_count;
s << "\n";
if (should_aggregate) {
aggregated_values.add(value, value_info.id.labels());
} else if (value.type() == mi::data_type::SUMMARY) {
write_summary(s, ctx, name, value.get_histogram(), value_info.id.labels());
} else if (value.type() == mi::data_type::HISTOGRAM) {
write_histogram(s, ctx, name, value.get_histogram(), value_info.id.labels());
} else {
add_name(s, name, value_info.id.labels(), ctx);
std::string value_str;
try {
value_str = to_str(value);
} catch (const std::range_error& e) {
seastar_logger.debug("prometheus: write_text_representation: {}: {}", s.str(), e.what());
value_str = "NaN";
} catch (...) {
auto ex = std::current_exception();
// print this error as it's ignored later on by `connection::start_response`
seastar_logger.error("prometheus: write_text_representation: {}: {}", s.str(), ex);
std::rethrow_exception(std::move(ex));
}
s << value_str;
s << get_value_as_string(s, value);
s << "\n";
}
out.write(s.str()).get();
thread::maybe_yield();
});
if (!aggregated_values.empty()) {
for (auto&& h : aggregated_values.get_values()) {
s.clear();
s.str("");
if (h.second.type() == mi::data_type::HISTOGRAM) {
write_histogram(s, ctx, name, h.second.get_histogram(), h.first);
} else {
add_name(s, name, h.first, ctx);
s << get_value_as_string(s, h.second);
s << "\n";
}
out.write(s.str()).get();
thread::maybe_yield();
}
}
}
});
}
Expand Down

0 comments on commit 3f35820

Please sign in to comment.