Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Aggregate metrics to reduce cardinality #5166

Merged
merged 10 commits into from
Jul 4, 2022
44 changes: 31 additions & 13 deletions src/v/raft/probe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,77 +33,95 @@ probe::create_metric_labels(const model::ntp& ntp) {
void probe::setup_metrics(const model::ntp& ntp) {
namespace sm = ss::metrics;
auto labels = create_metric_labels(ntp);
auto aggregate_labels
= config::shard_local_cfg().aggregate_metrics()
? std::vector<sm::label>{sm::shard_label, sm::label("partition")}
: std::vector<sm::label>{};
;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this an extra semi-colon here on the line by itself?


_metrics.add_group(
prometheus_sanitize::metrics_name("raft"),
{sm::make_counter(
"received_vote_requests",
[this] { return _vote_requests; },
sm::description("Number of vote requests received"),
labels),
labels)
.aggregate(aggregate_labels),
sm::make_counter(
"received_append_requests",
[this] { return _append_requests; },
sm::description("Number of append requests received"),
labels),
labels)
.aggregate(aggregate_labels),
sm::make_counter(
"sent_vote_requests",
[this] { return _vote_requests_sent; },
sm::description("Number of vote requests sent"),
labels),
labels)
.aggregate(aggregate_labels),
sm::make_counter(
"replicate_ack_all_requests",
[this] { return _replicate_requests_ack_all; },
sm::description(
"Number of replicate requests with quorum ack consistency"),
labels),
labels)
.aggregate(aggregate_labels),
sm::make_counter(
"replicate_ack_leader_requests",
[this] { return _replicate_requests_ack_leader; },
sm::description(
"Number of replicate requests with leader ack consistency"),
labels),
labels)
.aggregate(aggregate_labels),
sm::make_counter(
"replicate_ack_none_requests",
[this] { return _replicate_requests_ack_none; },
sm::description(
"Number of replicate requests with no ack consistency"),
labels),
labels)
.aggregate(aggregate_labels),
sm::make_counter(
"done_replicate_requests",
[this] { return _replicate_requests_done; },
sm::description("Number of finished replicate requests"),
labels),
labels)
.aggregate(aggregate_labels),
sm::make_counter(
"log_flushes",
[this] { return _log_flushes; },
sm::description("Number of log flushes"),
labels),
labels)
.aggregate(aggregate_labels),
sm::make_counter(
"log_truncations",
[this] { return _log_truncations; },
sm::description("Number of log truncations"),
labels),
labels)
.aggregate(aggregate_labels),
sm::make_counter(
"leadership_changes",
[this] { return _leadership_changes; },
sm::description("Number of leadership changes"),
labels),
labels)
.aggregate(aggregate_labels),
sm::make_counter(
"replicate_request_errors",
[this] { return _replicate_request_error; },
sm::description("Number of failed replicate requests"),
labels),
labels)
.aggregate(aggregate_labels),
sm::make_counter(
"heartbeat_requests_errors",
[this] { return _heartbeat_request_error; },
sm::description("Number of failed heartbeat requests"),
labels),
labels)
.aggregate(aggregate_labels),
sm::make_counter(
"recovery_requests_errors",
[this] { return _recovery_request_error; },
sm::description("Number of failed recovery requests"),
labels)});
labels)
.aggregate(aggregate_labels)});
}

} // namespace raft