Skip to content

Commit

Permalink
query-scheduler: instrument number of cancelled requests (#3696)
Browse files Browse the repository at this point in the history
To help determine the impact of issue 3695 I am adding a counter to
measure the number of requests that end up being discarded because they
are cancelled, but still occupy capacity in the queue.
  • Loading branch information
dimitarvdimitrov committed Dec 12, 2022
1 parent 8db451c commit 520ae78
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* [ENHANCEMENT] Distributor: remove labels with empty values #2439
* [ENHANCEMENT] Query-frontend: track query HTTP requests in the Activity Tracker. #3561
* [ENHANCEMENT] Store-gateway: Add experimental alternate implementation of index-header reader that does not use mmap. This reader is expected to improve stability of the store-gateway. This implementation can be enabled with the flag `-blocks-storage.bucket-store.index-header.stream-reader-enabled`. #3639
* [ENHANCEMENT] Query-scheduler: add `cortex_query_scheduler_cancelled_requests_total` metric to track the number of requests that are already cancelled when dequeued. #3696
* [BUGFIX] Log the names of services that are not yet running rather than `unsupported value type` when calling `/ready` and some services are not running. #3625
* [BUGFIX] Alertmanager: Fix template spurious deletion with relative data dir. #3604

Expand Down
7 changes: 7 additions & 0 deletions pkg/scheduler/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ type Scheduler struct {
// Metrics.
queueLength *prometheus.GaugeVec
discardedRequests *prometheus.CounterVec
cancelledRequests *prometheus.CounterVec
connectedQuerierClients prometheus.GaugeFunc
connectedFrontendClients prometheus.GaugeFunc
queueDuration prometheus.Histogram
Expand Down Expand Up @@ -125,6 +126,10 @@ func NewScheduler(cfg Config, limits Limits, log log.Logger, registerer promethe
Help: "Number of queries in the queue.",
}, []string{"user"})

s.cancelledRequests = promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{
Name: "cortex_query_scheduler_cancelled_requests_total",
Help: "Total number of query requests that were cancelled after enqueuing.",
}, []string{"user"})
s.discardedRequests = promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{
Name: "cortex_query_scheduler_discarded_requests_total",
Help: "Total number of query requests discarded.",
Expand Down Expand Up @@ -462,6 +467,7 @@ func (s *Scheduler) forwardRequestToQuerier(querier schedulerpb.SchedulerForQuer
// If the upstream request is cancelled (eg. frontend issued CANCEL or closed connection),
// we need to cancel the downstream req. Only way we can do that is to close the stream (by returning error here).
// Querier is expecting this semantics.
s.cancelledRequests.WithLabelValues(req.userID).Inc()
return req.ctx.Err()

case err := <-errCh:
Expand Down Expand Up @@ -565,6 +571,7 @@ func (s *Scheduler) stopping(_ error) error {
func (s *Scheduler) cleanupMetricsForInactiveUser(user string) {
s.queueLength.DeleteLabelValues(user)
s.discardedRequests.DeleteLabelValues(user)
s.cancelledRequests.DeleteLabelValues(user)
}

func (s *Scheduler) getConnectedFrontendClientsMetric() float64 {
Expand Down

0 comments on commit 520ae78

Please sign in to comment.