diff --git a/CHANGELOG.md b/CHANGELOG.md index ffe14ab9c2a..37af58df165 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,7 @@ * The following metric is exposed to tell how many requests have been rejected: * `cortex_discarded_requests_total` * [ENHANCEMENT] Store-gateway: Add the experimental ability to run requests in a dedicated OS thread pool. This feature can be configured using `-store-gateway.thread-pool-size` and is disabled by default. Replaces the ability to run index header operations in a dedicated thread pool. #1660 #1812 -* [ENHANCEMENT] Improved error messages to make them easier to understand; each now have a unique, global identifier that you can use to look up in the runbooks for more information. #1907 #1919 #1888 #1939 #1984 #2009 +* [ENHANCEMENT] Improved error messages to make them easier to understand; each now have a unique, global identifier that you can use to look up in the runbooks for more information. #1907 #1919 #1888 #1939 #1984 #2009 #2104 * [ENHANCEMENT] Memberlist KV: incoming messages are now processed on per-key goroutine. This may reduce loss of "maintanance" packets in busy memberlist installations, but use more CPU. New `memberlist_client_received_broadcasts_dropped_total` counter tracks number of dropped per-key messages. #1912 * [ENHANCEMENT] Blocks Storage, Alertmanager, Ruler: add support a prefix to the bucket store (`*_storage.storage_prefix`). This enables using the same bucket for the three components. #1686 #1951 * [ENHANCEMENT] Upgrade Docker base images to `alpine:3.16.0`. #2028 diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index 1e26b84ad58..7d81f9714bc 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -598,7 +598,7 @@ func (d *Distributor) PushWithCleanup(ctx context.Context, req *mimirpb.WriteReq // Return a 429 here to tell the client it is going too fast. // Client may discard the data or slow down and re-send. // Prometheus v2.26 added a remote-write option 'retry_on_http_429'. - return nil, httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewRequestRateLimitedError(d.requestRateLimiter.Limit(now, userID), d.requestRateLimiter.Burst(now, userID)).Error()) + return nil, httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewRequestRateLimitedError(d.limits.RequestRate(userID), d.limits.RequestBurstSize(userID)).Error()) } d.activeUsers.UpdateUserTimestamp(userID, now) @@ -795,7 +795,7 @@ func (d *Distributor) PushWithCleanup(ctx context.Context, req *mimirpb.WriteReq // Return a 429 here to tell the client it is going too fast. // Client may discard the data or slow down and re-send. // Prometheus v2.26 added a remote-write option 'retry_on_http_429'. - return nil, httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewIngestionRateLimitedError(d.ingestionRateLimiter.Limit(now, userID), d.ingestionRateLimiter.Burst(now, userID), validatedSamples, validatedExemplars, len(validatedMetadata)).Error()) + return nil, httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewIngestionRateLimitedError(d.limits.IngestionRate(userID), d.limits.IngestionBurstSize(userID)).Error()) } // totalN included samples and metadata. Ingester follows this pattern when computing its ingestion rate. diff --git a/pkg/distributor/distributor_test.go b/pkg/distributor/distributor_test.go index 459bcd7fa56..d705b066a68 100644 --- a/pkg/distributor/distributor_test.go +++ b/pkg/distributor/distributor_test.go @@ -184,7 +184,7 @@ func TestDistributor_Push(t *testing.T) { happyIngesters: 3, samples: samplesIn{num: 25, startTimestampMs: 123456789000}, metadata: 5, - expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewIngestionRateLimitedError(20, 20, 25, 0, 5).Error()), + expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewIngestionRateLimitedError(20, 20).Error()), metricNames: []string{lastSeenTimestamp}, expectedMetrics: ` # HELP cortex_distributor_latest_seen_sample_timestamp_seconds Unix timestamp of latest received sample per user. @@ -431,7 +431,7 @@ func TestDistributor_PushRequestRateLimiter(t *testing.T) { pushes: []testPush{ {expectedError: nil}, {expectedError: nil}, - {expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewRequestRateLimitedError(2, 2).Error())}, + {expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewRequestRateLimitedError(4, 2).Error())}, }, }, "request limit is disabled when set to 0": { @@ -452,7 +452,7 @@ func TestDistributor_PushRequestRateLimiter(t *testing.T) { {expectedError: nil}, {expectedError: nil}, {expectedError: nil}, - {expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewRequestRateLimitedError(1, 3).Error())}, + {expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewRequestRateLimitedError(2, 3).Error())}, }, }, } @@ -512,10 +512,10 @@ func TestDistributor_PushIngestionRateLimiter(t *testing.T) { pushes: []testPush{ {samples: 2, expectedError: nil}, {samples: 1, expectedError: nil}, - {samples: 2, metadata: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewIngestionRateLimitedError(5, 5, 2, 0, 1).Error())}, + {samples: 2, metadata: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewIngestionRateLimitedError(10, 5).Error())}, {samples: 2, expectedError: nil}, - {samples: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewIngestionRateLimitedError(5, 5, 1, 0, 0).Error())}, - {metadata: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewIngestionRateLimitedError(5, 5, 0, 0, 1).Error())}, + {samples: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewIngestionRateLimitedError(10, 5).Error())}, + {metadata: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewIngestionRateLimitedError(10, 5).Error())}, }, }, "for each distributor, set an ingestion burst limit.": { @@ -525,10 +525,10 @@ func TestDistributor_PushIngestionRateLimiter(t *testing.T) { pushes: []testPush{ {samples: 10, expectedError: nil}, {samples: 5, expectedError: nil}, - {samples: 5, metadata: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewIngestionRateLimitedError(5, 20, 5, 0, 1).Error())}, + {samples: 5, metadata: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewIngestionRateLimitedError(10, 20).Error())}, {samples: 5, expectedError: nil}, - {samples: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewIngestionRateLimitedError(5, 20, 1, 0, 0).Error())}, - {metadata: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewIngestionRateLimitedError(5, 20, 0, 0, 1).Error())}, + {samples: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewIngestionRateLimitedError(10, 20).Error())}, + {metadata: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, validation.NewIngestionRateLimitedError(10, 20).Error())}, }, }, } diff --git a/pkg/util/validation/errors.go b/pkg/util/validation/errors.go index 43b06385c16..040f2afad6a 100644 --- a/pkg/util/validation/errors.go +++ b/pkg/util/validation/errors.go @@ -277,13 +277,13 @@ func NewMaxQueryLengthError(actualQueryLen, maxQueryLength time.Duration) LimitE func NewRequestRateLimitedError(limit float64, burst int) LimitError { return LimitError(globalerror.RequestRateLimited.MessageWithLimitConfig( - fmt.Sprintf("the request has been rejected because the tenant exceeded the request rate limit, set to %v req/s with a maximum allowed burst of %d", limit, burst), + fmt.Sprintf("the request has been rejected because the tenant exceeded the request rate limit, set to %v requests/s across all distributors with a maximum allowed burst of %d", limit, burst), requestRateFlag, requestBurstSizeFlag)) } -func NewIngestionRateLimitedError(limit float64, burst, numSamples, numExemplars, numMetadata int) LimitError { +func NewIngestionRateLimitedError(limit float64, burst int) LimitError { return LimitError(globalerror.IngestionRateLimited.MessageWithLimitConfig( - fmt.Sprintf("the request has been rejected because the tenant exceeded the ingestion rate limit, set to %v items/s with a maximum allowed burst of %d, while adding %d samples, %d exemplars and %d metadata", limit, burst, numSamples, numExemplars, numMetadata), + fmt.Sprintf("the request has been rejected because the tenant exceeded the ingestion rate limit, set to %v items/s with a maximum allowed burst of %d. This limit is applied on the total number of samples, exemplars and metadata received across all distributors.", limit, burst), ingestionRateFlag, ingestionBurstSizeFlag)) }