cortexproject · alanprot · Apr 7, 2022 · Apr 6, 2022 · Apr 6, 2022 · Apr 6, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@
 * [CHANGE] Distributor: Apply `max_fetched_series_per_query` limit for `/series` API. #4683
 * [FEATURE] Ruler: Add `external_labels` option to tag all alerts with a given set of labels.
 * [FEATURE] Compactor: Add `-compactor.skip-blocks-with-out-of-order-chunks-enabled` configuration to mark blocks containing index with out-of-order chunks for no compact instead of halting the compaction
+* [FEATURE] Querier/Query-Frontend: Add `-querier.per-step-stats-enabled` and `-frontend.cache-queryable-samples-stats` configurations to enable query sample statistics
 
 ## 1.12.0 in progress
 

diff --git a/docs/blocks-storage/querier.md b/docs/blocks-storage/querier.md
@@ -136,6 +136,10 @@ querier:
   # CLI flag: -querier.at-modifier-enabled
   [at_modifier_enabled: <boolean> | default = false]
 
+  # Enable returning samples stats per steps in query response.
+  # CLI flag: -querier.per-step-stats-enabled
+  [per_step_stats_enabled: <boolean> | default = false]
+
   # The time after which a metric should be queried from storage and not just
   # ingesters. 0 means all queries are sent to store. When running the blocks
   # storage, if this option is enabled, the time range of the query sent to the

diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md
@@ -889,6 +889,10 @@ The `querier_config` configures the Cortex querier.
 # CLI flag: -querier.at-modifier-enabled
 [at_modifier_enabled: <boolean> | default = false]
 
+# Enable returning samples stats per steps in query response.
+# CLI flag: -querier.per-step-stats-enabled
+[per_step_stats_enabled: <boolean> | default = false]
+
 # The time after which a metric should be queried from storage and not just
 # ingesters. 0 means all queries are sent to store. When running the blocks
 # storage, if this option is enabled, the time range of the query sent to the
@@ -1153,6 +1157,10 @@ results_cache:
   # CLI flag: -frontend.compression
   [compression: <string> | default = ""]
 
+  # Cache Statistics queryable samples on results cache.
+  # CLI flag: -frontend.cache-queryable-samples-stats
+  [cache_queryable_samples_stats: <boolean> | default = false]
+
 # Cache query results.
 # CLI flag: -querier.cache-results
 [cache_results: <boolean> | default = false]

diff --git a/pkg/cortex/cortex.go b/pkg/cortex/cortex.go
@@ -224,7 +224,7 @@ func (c *Config) Validate(log log.Logger) error {
 	if err := c.Worker.Validate(log); err != nil {
 		return errors.Wrap(err, "invalid frontend_worker config")
 	}
-	if err := c.QueryRange.Validate(); err != nil {
+	if err := c.QueryRange.Validate(c.Querier); err != nil {
 		return errors.Wrap(err, "invalid query_range config")
 	}
 	if err := c.TableManager.Validate(); err != nil {

diff --git a/pkg/cortex/modules.go b/pkg/cortex/modules.go
@@ -523,11 +523,12 @@ func (t *Cortex) initQueryFrontendTripperware() (serv services.Service, err erro
 		queryrange.PrometheusResponseExtractor{},
 		t.Cfg.Schema,
 		promql.EngineOpts{
-			Logger:           util_log.Logger,
-			Reg:              prometheus.DefaultRegisterer,
-			MaxSamples:       t.Cfg.Querier.MaxSamples,
-			Timeout:          t.Cfg.Querier.Timeout,
-			EnableAtModifier: t.Cfg.Querier.AtModifierEnabled,
+			Logger:             util_log.Logger,
+			Reg:                prometheus.DefaultRegisterer,
+			MaxSamples:         t.Cfg.Querier.MaxSamples,
+			Timeout:            t.Cfg.Querier.Timeout,
+			EnableAtModifier:   t.Cfg.Querier.AtModifierEnabled,
+			EnablePerStepStats: t.Cfg.Querier.EnablePerStepStats,
 			NoStepSubqueryIntervalFn: func(int64) int64 {
 				return t.Cfg.Querier.DefaultEvaluationInterval.Milliseconds()
 			},

diff --git a/pkg/querier/querier.go b/pkg/querier/querier.go
@@ -45,6 +45,7 @@ type Config struct {
 	QueryIngestersWithin time.Duration `yaml:"query_ingesters_within"`
 	QueryStoreForLabels  bool          `yaml:"query_store_for_labels_enabled"`
 	AtModifierEnabled    bool          `yaml:"at_modifier_enabled"`
+	EnablePerStepStats   bool          `yaml:"per_step_stats_enabled"`
 
 	// QueryStoreAfter the time after which queries should also be sent to the store and not just ingesters.
 	QueryStoreAfter    time.Duration `yaml:"query_store_after"`
@@ -92,6 +93,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.DurationVar(&cfg.QueryIngestersWithin, "querier.query-ingesters-within", 0, "Maximum lookback beyond which queries are not sent to ingester. 0 means all queries are sent to ingester.")
 	f.BoolVar(&cfg.QueryStoreForLabels, "querier.query-store-for-labels-enabled", false, "Query long-term store for series, label values and label names APIs. Works only with blocks engine.")
 	f.BoolVar(&cfg.AtModifierEnabled, "querier.at-modifier-enabled", false, "Enable the @ modifier in PromQL.")
+	f.BoolVar(&cfg.EnablePerStepStats, "querier.per-step-stats-enabled", false, "Enable returning samples stats per steps in query response.")
 	f.DurationVar(&cfg.MaxQueryIntoFuture, "querier.max-query-into-future", 10*time.Minute, "Maximum duration into the future you can query. 0 to disable.")
 	f.DurationVar(&cfg.DefaultEvaluationInterval, "querier.default-evaluation-interval", time.Minute, "The default evaluation interval or step size for subqueries.")
 	f.DurationVar(&cfg.QueryStoreAfter, "querier.query-store-after", 0, "The time after which a metric should be queried from storage and not just ingesters. 0 means all queries are sent to store. When running the blocks storage, if this option is enabled, the time range of the query sent to the store will be manipulated to ensure the query end is not more recent than 'now - query-store-after'.")
@@ -174,6 +176,7 @@ func New(cfg Config, limits *validation.Overrides, distributor Distributor, stor
 		MaxSamples:         cfg.MaxSamples,
 		Timeout:            cfg.Timeout,
 		LookbackDelta:      cfg.LookbackDelta,
+		EnablePerStepStats: cfg.EnablePerStepStats,
 		EnableAtModifier:   cfg.AtModifierEnabled,
 		NoStepSubqueryIntervalFn: func(int64) int64 {
 			return cfg.DefaultEvaluationInterval.Milliseconds()

diff --git a/pkg/querier/queryrange/query_range.go b/pkg/querier/queryrange/query_range.go
@@ -12,6 +12,7 @@ import (
 	"strconv"
 	"strings"
 	"time"
+	"unsafe"
 
 	"github.com/gogo/protobuf/proto"
 	"github.com/gogo/status"
@@ -88,6 +89,10 @@ type Request interface {
 	proto.Message
 	// LogToSpan writes information about this request to an OpenTracing span
 	LogToSpan(opentracing.Span)
+	// GetStats returns the stats of the request.
+	GetStats() string
+	// WithStats clones the current `PrometheusRequest` with a new stats.
+	WithStats(stats string) Request
 }
 
 // Response represents a query range response.
@@ -114,6 +119,13 @@ func (q *PrometheusRequest) WithQuery(query string) Request {
 	return &new
 }
 
+// WithStats clones the current `PrometheusRequest` with a new stats.
+func (q *PrometheusRequest) WithStats(stats string) Request {
+	new := *q
+	new.Stats = stats
+	return &new
+}
+
 // LogToSpan logs the current `PrometheusRequest` parameters to the specified span.
 func (q *PrometheusRequest) LogToSpan(sp opentracing.Span) {
 	sp.LogFields(
@@ -174,6 +186,7 @@ func (prometheusCodec) MergeResponse(responses ...Response) (Response, error) {
 		Data: PrometheusData{
 			ResultType: model.ValMatrix.String(),
 			Result:     matrixMerge(promResponses),
+			Stats:      statsMerge(promResponses),
 		},
 	}
 
@@ -220,6 +233,7 @@ func (prometheusCodec) DecodeRequest(_ context.Context, r *http.Request, forward
 	}
 
 	result.Query = r.FormValue("query")
+	result.Stats = r.FormValue("stats")
 	result.Path = r.URL.Path
 
 	// Include the specified headers from http request in prometheusRequest.
@@ -252,6 +266,7 @@ func (prometheusCodec) EncodeRequest(ctx context.Context, r Request) (*http.Requ
 		"end":   []string{encodeTime(promReq.End)},
 		"step":  []string{encodeDurationMs(promReq.Step)},
 		"query": []string{promReq.Query},
+		"stats": []string{promReq.Stats},
 	}
 	u := &url.URL{
 		Path:     promReq.Path,
@@ -380,6 +395,46 @@ func (s *SampleStream) MarshalJSON() ([]byte, error) {
 	return json.Marshal(stream)
 }
 
+// statsMerge merge the stats from 2 responses
+// this function is similar to matrixMerge
+func statsMerge(resps []*PrometheusResponse) *PrometheusResponseStats {
+	output := map[int64]*PrometheusResponseQueryableSamplesStatsPerStep{}
+	hasStats := false
+	for _, resp := range resps {
+		if resp.Data.Stats == nil {
+			continue
+		}
+
+		hasStats = true
+		if resp.Data.Stats.Samples == nil {
+			continue
+		}
+
+		for _, s := range resp.Data.Stats.Samples.TotalQueryableSamplesPerStep {
+			output[s.GetTimestampMs()] = s
+		}
+	}
+
+	if !hasStats {
+		return nil
+	}
+
+	keys := make([]int64, 0, len(output))
+	for key := range output {
+		keys = append(keys, key)
+	}
+
+	sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] })
+
+	result := &PrometheusResponseStats{Samples: &PrometheusResponseSamplesStats{}}
+	for _, key := range keys {
+		result.Samples.TotalQueryableSamplesPerStep = append(result.Samples.TotalQueryableSamplesPerStep, output[key])
+		result.Samples.TotalQueryableSamples += output[key].Value
+	}
+
+	return result
+}
+
 func matrixMerge(resps []*PrometheusResponse) []SampleStream {
 	output := map[string]*SampleStream{}
 	for _, resp := range resps {
@@ -473,3 +528,41 @@ func decorateWithParamName(err error, field string) error {
 	}
 	return fmt.Errorf(errTmpl, field, err)
 }
+
+func PrometheusResponseQueryableSamplesStatsPerStepJsoniterDecode(ptr unsafe.Pointer, iter *jsoniter.Iterator) {
+	if !iter.ReadArray() {
+		iter.ReportError("queryrange.PrometheusResponseQueryableSamplesStatsPerStep", "expected [")
+		return
+	}
+
+	t := model.Time(iter.ReadFloat64() * float64(time.Second/time.Millisecond))
+
+	if !iter.ReadArray() {
+		iter.ReportError("queryrange.PrometheusResponseQueryableSamplesStatsPerStep", "expected ,")
+		return
+	}
+	v := iter.ReadInt64()
+
+	if iter.ReadArray() {
+		iter.ReportError("queryrange.PrometheusResponseQueryableSamplesStatsPerStep", "expected ]")
+	}
+
+	*(*PrometheusResponseQueryableSamplesStatsPerStep)(ptr) = PrometheusResponseQueryableSamplesStatsPerStep{
+		TimestampMs: int64(t),
+		Value:       v,
+	}
+}
+
+func PrometheusResponseQueryableSamplesStatsPerStepJsoniterEncode(ptr unsafe.Pointer, stream *jsoniter.Stream) {
+	stats := (*PrometheusResponseQueryableSamplesStatsPerStep)(ptr)
+	stream.WriteArrayStart()
+	stream.WriteFloat64(float64(stats.TimestampMs) / float64(time.Second/time.Millisecond))
+	stream.WriteMore()
+	stream.WriteInt64(stats.Value)
+	stream.WriteArrayEnd()
+}
+
+func init() {
+	jsoniter.RegisterTypeEncoderFunc("queryrange.PrometheusResponseQueryableSamplesStatsPerStep", PrometheusResponseQueryableSamplesStatsPerStepJsoniterEncode, func(unsafe.Pointer) bool { return false })
+	jsoniter.RegisterTypeDecoderFunc("queryrange.PrometheusResponseQueryableSamplesStatsPerStep", PrometheusResponseQueryableSamplesStatsPerStepJsoniterDecode)
+}