diff --git a/CHANGELOG.md b/CHANGELOG.md index 180b4934dfc..111c2cdcf25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ * [FEATURE] Continuous-test: now runable as a module with `mimir -target=continuous-test`. #7747 * [FEATURE] Store-gateway: Allow specific tenants to be enabled or disabled via `-store-gateway.enabled-tenants` or `-store-gateway.disabled-tenants` CLI flags or their corresponding YAML settings. #7653 * [FEATURE] New `-.s3.bucket-lookup-type` flag configures lookup style type, used to access bucket in s3 compatible providers. #7684 -* [FEATURE] Querier: add experimental streaming PromQL engine, enabled with `-querier.promql-engine=streaming`. #7693 #7898 #7899 #8023 #8058 #8096 +* [FEATURE] Querier: add experimental streaming PromQL engine, enabled with `-querier.promql-engine=streaming`. #7693 #7898 #7899 #8023 #8058 #8096 #8121 * [FEATURE] New `/ingester/unregister-on-shutdown` HTTP endpoint allows dynamic access to ingesters' `-ingester.ring.unregister-on-shutdown` configuration. #7739 * [FEATURE] Server: added experimental [PROXY protocol support](https://www.haproxy.org/download/2.3/doc/proxy-protocol.txt). The PROXY protocol support can be enabled via `-server.proxy-protocol-enabled=true`. When enabled, the support is added both to HTTP and gRPC listening ports. #7698 * [FEATURE] mimirtool: Add `runtime-config verify` sub-command, for verifying Mimir runtime config files. #8123 diff --git a/pkg/streamingpromql/benchmarks/benchmarks.go b/pkg/streamingpromql/benchmarks/benchmarks.go index c94781ee7b9..7e84b54f4f0 100644 --- a/pkg/streamingpromql/benchmarks/benchmarks.go +++ b/pkg/streamingpromql/benchmarks/benchmarks.go @@ -69,13 +69,18 @@ func (c BenchCase) Run(ctx context.Context, t testing.TB, start, end time.Time, return res, qry.Close } -// These test cases are taken from https://github.com/prometheus/prometheus/blob/main/promql/bench_test.go. +// These test cases are taken from https://github.com/prometheus/prometheus/blob/main/promql/bench_test.go +// and enhanced/added to. func TestCases(metricSizes []int) []BenchCase { cases := []BenchCase{ // Plain retrieval. { Expr: "a_X", }, + // Histogram retrieval + { + Expr: "nh_X", + }, // Range vector selector. { Expr: "a_X[1m]", diff --git a/pkg/streamingpromql/benchmarks/comparison_test.go b/pkg/streamingpromql/benchmarks/comparison_test.go index 7d7d7e4dfa0..df5b2d38b59 100644 --- a/pkg/streamingpromql/benchmarks/comparison_test.go +++ b/pkg/streamingpromql/benchmarks/comparison_test.go @@ -163,6 +163,28 @@ func TestBenchmarkSetup(t *testing.T) { } require.Equal(t, expectedPoints, series.Floats) + + // Check native histograms are set up correctly + query, err = streamingEngine.NewRangeQuery(ctx, q, nil, "nh_1", time.Unix(0, 0), time.Unix(int64(15*intervalSeconds), 0), interval) + require.NoError(t, err) + + t.Cleanup(query.Close) + result = query.Exec(ctx) + require.NoError(t, result.Err) + + matrix, err = result.Matrix() + require.NoError(t, err) + + require.Len(t, matrix, 1) + series = matrix[0] + require.Equal(t, labels.FromStrings("__name__", "nh_1"), series.Metric) + require.Len(t, series.Floats, 0) + require.Len(t, series.Histograms, 16) + + // Check one histogram point is as expected + require.Equal(t, int64(0), series.Histograms[0].T) + require.Equal(t, 12.0, series.Histograms[0].H.Count) + require.Equal(t, 18.4, series.Histograms[0].H.Sum) } // Why do we do this rather than require.Equal(t, expected, actual)? @@ -194,7 +216,11 @@ func requireEqualResults(t testing.TB, expected, actual *promql.Result) { require.Equal(t, expectedSample.Metric, actualSample.Metric) require.Equal(t, expectedSample.T, actualSample.T) require.Equal(t, expectedSample.H, actualSample.H) - require.InEpsilon(t, expectedSample.F, actualSample.F, 1e-10) + if expectedSample.F == 0 { + require.Equal(t, expectedSample.F, actualSample.F) + } else { + require.InEpsilon(t, expectedSample.F, actualSample.F, 1e-10) + } } case parser.ValueTypeMatrix: expectedMatrix, err := expected.Matrix() @@ -214,7 +240,11 @@ func requireEqualResults(t testing.TB, expected, actual *promql.Result) { actualPoint := actualSeries.Floats[j] require.Equal(t, expectedPoint.T, actualPoint.T) - require.InEpsilonf(t, expectedPoint.F, actualPoint.F, 1e-10, "expected series %v to have points %v, but result is %v", expectedSeries.Metric.String(), expectedSeries.Floats, actualSeries.Floats) + if expectedPoint.F == 0 { + require.Equal(t, expectedPoint.F, actualPoint.F) + } else { + require.InEpsilonf(t, expectedPoint.F, actualPoint.F, 1e-10, "expected series %v to have points %v, but result is %v", expectedSeries.Metric.String(), expectedSeries.Floats, actualSeries.Floats) + } } } default: diff --git a/pkg/streamingpromql/benchmarks/ingester.go b/pkg/streamingpromql/benchmarks/ingester.go index 61b5621d942..5c6947c949e 100644 --- a/pkg/streamingpromql/benchmarks/ingester.go +++ b/pkg/streamingpromql/benchmarks/ingester.go @@ -14,6 +14,7 @@ import ( "net" "path/filepath" "strconv" + "strings" "time" "github.com/go-kit/log" @@ -174,7 +175,7 @@ func pushTestData(ing *ingester.Ingester, metricSizes []int) error { totalMetrics := 0 for _, size := range metricSizes { - totalMetrics += (2 + histogramBuckets + 1) * size // 2 non-histogram metrics + 5 metrics for histogram buckets + 1 metric for +Inf histogram bucket + totalMetrics += (2 + histogramBuckets + 1 + 1) * size // 2 non-histogram metrics + 5 metrics for histogram buckets + 1 metric for +Inf histogram bucket + 1 metric for native-histograms } metrics := make([]labels.Labels, 0, totalMetrics) @@ -183,6 +184,7 @@ func pushTestData(ing *ingester.Ingester, metricSizes []int) error { aName := "a_" + strconv.Itoa(size) bName := "b_" + strconv.Itoa(size) histogramName := "h_" + strconv.Itoa(size) + nativeHistogramName := "nh_" + strconv.Itoa(size) if size == 1 { // We don't want a "l" label on metrics with one series (some test cases rely on this label not being present). @@ -192,6 +194,7 @@ func pushTestData(ing *ingester.Ingester, metricSizes []int) error { metrics = append(metrics, labels.FromStrings("__name__", histogramName, "le", strconv.Itoa(le))) } metrics = append(metrics, labels.FromStrings("__name__", histogramName, "le", "+Inf")) + metrics = append(metrics, labels.FromStrings("__name__", nativeHistogramName)) } else { for i := 0; i < size; i++ { metrics = append(metrics, labels.FromStrings("__name__", aName, "l", strconv.Itoa(i))) @@ -200,34 +203,69 @@ func pushTestData(ing *ingester.Ingester, metricSizes []int) error { metrics = append(metrics, labels.FromStrings("__name__", histogramName, "l", strconv.Itoa(i), "le", strconv.Itoa(le))) } metrics = append(metrics, labels.FromStrings("__name__", histogramName, "l", strconv.Itoa(i), "le", "+Inf")) + metrics = append(metrics, labels.FromStrings("__name__", nativeHistogramName, "l", strconv.Itoa(i))) } } } ctx := user.InjectOrgID(context.Background(), UserID) - req := &mimirpb.WriteRequest{ - Timeseries: make([]mimirpb.PreallocTimeseries, len(metrics)), - } - for i, m := range metrics { - series := mimirpb.PreallocTimeseries{TimeSeries: &mimirpb.TimeSeries{ - Labels: mimirpb.FromLabelsToLabelAdapters(m), - Samples: make([]mimirpb.Sample, NumIntervals), - }} + // Batch samples into separate requests + // There is no precise science behind this number currently. + // A quick run locally found batching by 100 did not increase the loading time by any noticeable amount. + // Additionally memory usage maxed about 4GB for the whole process. + batchSize := 100 + for start := 0; start < NumIntervals; start += batchSize { + end := start + batchSize + if end > NumIntervals { + end = NumIntervals + } - for s := 0; s < NumIntervals; s++ { - series.Samples[s].TimestampMs = int64(s) * interval.Milliseconds() - series.Samples[s].Value = float64(s) + float64(i)/float64(len(metrics)) + req := &mimirpb.WriteRequest{ + Timeseries: make([]mimirpb.PreallocTimeseries, len(metrics)), } - req.Timeseries[i] = series - } + for metricIdx, m := range metrics { + if strings.HasPrefix(m.Get("__name__"), "nh_") { + series := mimirpb.PreallocTimeseries{TimeSeries: &mimirpb.TimeSeries{ + Labels: mimirpb.FromLabelsToLabelAdapters(m.Copy()), + Histograms: make([]mimirpb.Histogram, end-start), + }} + + for ts := start; ts < end; ts++ { + // TODO(jhesketh): Fix this with some better data + series.Histograms[ts-start].Timestamp = int64(ts) * interval.Milliseconds() + series.Histograms[ts-start].Count = &mimirpb.Histogram_CountInt{CountInt: 12} + series.Histograms[ts-start].ZeroCount = &mimirpb.Histogram_ZeroCountInt{ZeroCountInt: 2} + series.Histograms[ts-start].ZeroThreshold = 0.001 + series.Histograms[ts-start].Sum = 18.4 + series.Histograms[ts-start].Schema = 0 + series.Histograms[ts-start].NegativeSpans = []mimirpb.BucketSpan{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}} + series.Histograms[ts-start].NegativeDeltas = []int64{1, 1, -1, 0} + series.Histograms[ts-start].PositiveSpans = []mimirpb.BucketSpan{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}} + series.Histograms[ts-start].PositiveDeltas = []int64{1, 1, -1, 0} + } - if _, err := ing.Push(ctx, req); err != nil { - return fmt.Errorf("failed to push samples to ingester: %w", err) - } + req.Timeseries[metricIdx] = series + } else { + series := mimirpb.PreallocTimeseries{TimeSeries: &mimirpb.TimeSeries{ + Labels: mimirpb.FromLabelsToLabelAdapters(m.Copy()), + Samples: make([]mimirpb.Sample, end-start), + }} - ing.Flush() + for ts := start; ts < end; ts++ { + series.Samples[ts-start].TimestampMs = int64(ts) * interval.Milliseconds() + series.Samples[ts-start].Value = float64(ts) + float64(metricIdx)/float64(len(metrics)) + } + + req.Timeseries[metricIdx] = series + } + } + if _, err := ing.Push(ctx, req); err != nil { + return fmt.Errorf("failed to push samples to ingester: %w", err) + } + ing.Flush() + } return nil } diff --git a/pkg/streamingpromql/operator/instant_vector_selector.go b/pkg/streamingpromql/operator/instant_vector_selector.go index 5fd06435bc3..d4903272bb4 100644 --- a/pkg/streamingpromql/operator/instant_vector_selector.go +++ b/pkg/streamingpromql/operator/instant_vector_selector.go @@ -7,7 +7,6 @@ package operator import ( "context" - "errors" "fmt" "github.com/prometheus/prometheus/model/histogram" @@ -48,13 +47,11 @@ func (v *InstantVectorSelector) NextSeries(_ context.Context) (InstantVectorSeri v.memoizedIterator.Reset(v.chunkIterator) - data := InstantVectorSeriesData{ - Floats: GetFPointSlice(v.numSteps), // TODO: only allocate this if we have any floats (once we support native histograms) - } + data := InstantVectorSeriesData{} for stepT := v.Selector.Start; stepT <= v.Selector.End; stepT += v.Selector.Interval { var t int64 - var val float64 + var f float64 var h *histogram.FloatHistogram ts := stepT @@ -70,26 +67,42 @@ func (v *InstantVectorSelector) NextSeries(_ context.Context) (InstantVectorSeri return InstantVectorSeriesData{}, v.memoizedIterator.Err() } case chunkenc.ValFloat: - t, val = v.memoizedIterator.At() + t, f = v.memoizedIterator.At() + case chunkenc.ValHistogram, chunkenc.ValFloatHistogram: + t, h = v.memoizedIterator.AtFloatHistogram() default: return InstantVectorSeriesData{}, fmt.Errorf("streaming PromQL engine: unknown value type %s", valueType.String()) } if valueType == chunkenc.ValNone || t > ts { var ok bool - t, val, h, ok = v.memoizedIterator.PeekPrev() - if h != nil { - return InstantVectorSeriesData{}, errors.New("streaming PromQL engine doesn't support histograms yet") - } + t, f, h, ok = v.memoizedIterator.PeekPrev() if !ok || t < ts-v.Selector.LookbackDelta.Milliseconds() { continue } } - if value.IsStaleNaN(val) || (h != nil && value.IsStaleNaN(h.Sum)) { + if value.IsStaleNaN(f) || (h != nil && value.IsStaleNaN(h.Sum)) { continue } - data.Floats = append(data.Floats, promql.FPoint{T: stepT, F: val}) + // if (f, h) have been set by PeekPrev, we do not know if f is 0 because that's the actual value, or because + // the previous value had a histogram. + // PeekPrev will set the histogram to nil, or the value to 0 if the other type exists. + // So check if histograms is nil first. If we don't have a histogram, then we should have a value and vice-versa. + if h != nil { + if len(data.Histograms) == 0 { + // Only create the slice once we know the series is a histogram or not. + // (It is possible to over-allocate in the case where we have both floats and histograms, but that won't be common). + data.Histograms = GetHPointSlice(v.numSteps) + } + data.Histograms = append(data.Histograms, promql.HPoint{T: stepT, H: h}) + } else { + if len(data.Floats) == 0 { + // Only create the slice once we know the series is a histogram or not + data.Floats = GetFPointSlice(v.numSteps) + } + data.Floats = append(data.Floats, promql.FPoint{T: stepT, F: f}) + } } if v.memoizedIterator.Err() != nil { diff --git a/pkg/streamingpromql/operator/pool.go b/pkg/streamingpromql/operator/pool.go index adb259dada1..b33f90db58e 100644 --- a/pkg/streamingpromql/operator/pool.go +++ b/pkg/streamingpromql/operator/pool.go @@ -21,6 +21,10 @@ var ( return make([]promql.FPoint, 0, size) }) + hPointSlicePool = pool.NewBucketedPool(1, maxExpectedPointsPerSeries, seriesPerResultBucketFactor, func(size int) []promql.HPoint { + return make([]promql.HPoint, 0, size) + }) + matrixPool = pool.NewBucketedPool(1, maxExpectedSeriesPerResult, seriesPerResultBucketFactor, func(size int) promql.Matrix { return make(promql.Matrix, 0, size) }) @@ -51,6 +55,14 @@ func PutFPointSlice(s []promql.FPoint) { fPointSlicePool.Put(s) } +func GetHPointSlice(size int) []promql.HPoint { + return hPointSlicePool.Get(size) +} + +func PutHPointSlice(s []promql.HPoint) { + hPointSlicePool.Put(s) +} + func GetMatrix(size int) promql.Matrix { return matrixPool.Get(size) } diff --git a/pkg/streamingpromql/query.go b/pkg/streamingpromql/query.go index d74cae7efd9..93f424049b9 100644 --- a/pkg/streamingpromql/query.go +++ b/pkg/streamingpromql/query.go @@ -278,6 +278,11 @@ func (q *Query) Exec(ctx context.Context) *promql.Result { return q.result } +func returnSeriesDataSlices(d operator.InstantVectorSeriesData) { + operator.PutFPointSlice(d.Floats) + operator.PutHPointSlice(d.Histograms) +} + func (q *Query) populateVectorFromInstantVectorOperator(ctx context.Context, o operator.InstantVectorOperator, series []operator.SeriesMetadata) (promql.Vector, error) { ts := timeMilliseconds(q.statement.Start) v := operator.GetVector(len(series)) @@ -292,26 +297,29 @@ func (q *Query) populateVectorFromInstantVectorOperator(ctx context.Context, o o return nil, err } - if len(d.Floats)+len(d.Histograms) != 1 { - operator.PutFPointSlice(d.Floats) - // TODO: put histogram point slice back in pool - - if len(d.Floats)+len(d.Histograms) == 0 { + if len(d.Floats) == 1 && len(d.Histograms) == 0 { + point := d.Floats[0] + v = append(v, promql.Sample{ + Metric: s.Labels, + T: ts, + F: point.F, + }) + } else if len(d.Floats) == 0 && len(d.Histograms) == 1 { + point := d.Histograms[0] + v = append(v, promql.Sample{ + Metric: s.Labels, + T: ts, + H: point.H, + }) + } else { + returnSeriesDataSlices(d) + // A series may have no data points. + if len(d.Floats) == 0 && len(d.Histograms) == 0 { continue } - - return nil, fmt.Errorf("expected exactly one sample for series %s, but got %v", s.Labels.String(), len(d.Floats)) + return nil, fmt.Errorf("expected exactly one sample for series %s, but got %v floats, %v histograms", s.Labels.String(), len(d.Floats), len(d.Histograms)) } - - point := d.Floats[0] - v = append(v, promql.Sample{ - Metric: s.Labels, - T: ts, - F: point.F, - }) - - operator.PutFPointSlice(d.Floats) - // TODO: put histogram point slice back in pool + returnSeriesDataSlices(d) } return v, nil @@ -331,9 +339,7 @@ func (q *Query) populateMatrixFromInstantVectorOperator(ctx context.Context, o o } if len(d.Floats) == 0 && len(d.Histograms) == 0 { - operator.PutFPointSlice(d.Floats) - // TODO: put histogram point slice back in pool - + returnSeriesDataSlices(d) continue } @@ -394,7 +400,7 @@ func (q *Query) Close() { case promql.Matrix: for _, s := range v { operator.PutFPointSlice(s.Floats) - // TODO: put histogram point slice back in pool + operator.PutHPointSlice(s.Histograms) } operator.PutMatrix(v) diff --git a/pkg/streamingpromql/testdata/ours/native_histograms.test b/pkg/streamingpromql/testdata/ours/native_histograms.test new file mode 100644 index 00000000000..6ada96ef656 --- /dev/null +++ b/pkg/streamingpromql/testdata/ours/native_histograms.test @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: AGPL-3.0-only +# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/native_histograms.test +# Provenance-includes-license: Apache-2.0 +# Provenance-includes-copyright: The Prometheus Authors + +# buckets:[1 2 1] means 1 observation in the 1st bucket, 2 observations in the 2nd and 1 observation in the 3rd (total 4). +load 5m + single_histogram {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:20 count:7 buckets:[9 10 1]}} + +eval instant at 5m single_histogram + {__name__="single_histogram"} {{schema:0 sum:20 count:7 buckets:[9 10 1]}} + +eval range from 0 to 5m step 1m single_histogram + {__name__="single_histogram"} {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:20 count:7 buckets:[9 10 1]}} + +clear + +# Test metric with mixed floats and histograms +load 1m + mixed_metric 1 2 3 {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:8 count:6 buckets:[1 4 1]}} + +eval range from 0 to 4m step 1m mixed_metric + {__name__="mixed_metric"} 1 2 3 {{count:4 sum:5 buckets:[1 2 1]}} {{count:6 sum:8 buckets:[1 4 1]}} diff --git a/pkg/streamingpromql/testdata/upstream/native_histograms.test b/pkg/streamingpromql/testdata/upstream/native_histograms.test new file mode 100644 index 00000000000..ff107a7c20c --- /dev/null +++ b/pkg/streamingpromql/testdata/upstream/native_histograms.test @@ -0,0 +1,335 @@ +# SPDX-License-Identifier: AGPL-3.0-only +# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/native_histograms.test +# Provenance-includes-license: Apache-2.0 +# Provenance-includes-copyright: The Prometheus Authors + +# Minimal valid case: an empty histogram. +load 5m + empty_histogram {{}} + +eval instant at 5m empty_histogram + {__name__="empty_histogram"} {{}} + +# Unsupported by streaming engine. +# eval instant at 5m histogram_count(empty_histogram) +# {} 0 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_sum(empty_histogram) +# {} 0 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_avg(empty_histogram) +# {} NaN + +# Unsupported by streaming engine. +# eval instant at 5m histogram_fraction(-Inf, +Inf, empty_histogram) +# {} NaN + +# Unsupported by streaming engine. +# eval instant at 5m histogram_fraction(0, 8, empty_histogram) +# {} NaN + + + +# buckets:[1 2 1] means 1 observation in the 1st bucket, 2 observations in the 2nd and 1 observation in the 3rd (total 4). +load 5m + single_histogram {{schema:0 sum:5 count:4 buckets:[1 2 1]}} + +# histogram_count extracts the count property from the histogram. +# Unsupported by streaming engine. +# eval instant at 5m histogram_count(single_histogram) +# {} 4 + +# histogram_sum extracts the sum property from the histogram. +# Unsupported by streaming engine. +# eval instant at 5m histogram_sum(single_histogram) +# {} 5 + +# histogram_avg calculates the average from sum and count properties. +# Unsupported by streaming engine. +# eval instant at 5m histogram_avg(single_histogram) +# {} 1.25 + +# We expect half of the values to fall in the range 1 < x <= 2. +# Unsupported by streaming engine. +# eval instant at 5m histogram_fraction(1, 2, single_histogram) +# {} 0.5 + +# We expect all values to fall in the range 0 < x <= 8. +# Unsupported by streaming engine. +# eval instant at 5m histogram_fraction(0, 8, single_histogram) +# {} 1 + +# Median is 1.5 due to linear estimation of the midpoint of the middle bucket, whose values are within range 1 < x <= 2. +# Unsupported by streaming engine. +# eval instant at 5m histogram_quantile(0.5, single_histogram) +# {} 1.5 + + + +# Repeat the same histogram 10 times. +load 5m + multi_histogram {{schema:0 sum:5 count:4 buckets:[1 2 1]}}x10 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_count(multi_histogram) +# {} 4 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_sum(multi_histogram) +# {} 5 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_avg(multi_histogram) +# {} 1.25 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_fraction(1, 2, multi_histogram) +# {} 0.5 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_quantile(0.5, multi_histogram) +# {} 1.5 + + +# Each entry should look the same as the first. +# Unsupported by streaming engine. +# eval instant at 50m histogram_count(multi_histogram) +# {} 4 + +# Unsupported by streaming engine. +# eval instant at 50m histogram_sum(multi_histogram) +# {} 5 + +# Unsupported by streaming engine. +# eval instant at 50m histogram_avg(multi_histogram) +# {} 1.25 + +# Unsupported by streaming engine. +# eval instant at 50m histogram_fraction(1, 2, multi_histogram) +# {} 0.5 + +# Unsupported by streaming engine. +# eval instant at 50m histogram_quantile(0.5, multi_histogram) +# {} 1.5 + + + +# Accumulate the histogram addition for 10 iterations, offset is a bucket position where offset:0 is always the bucket +# with an upper limit of 1 and offset:1 is the bucket which follows to the right. Negative offsets represent bucket +# positions for upper limits <1 (tending toward zero), where offset:-1 is the bucket to the left of offset:0. +load 5m + incr_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}}+{{sum:2 count:1 buckets:[1] offset:1}}x10 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_count(incr_histogram) +# {} 5 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_sum(incr_histogram) +# {} 6 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_avg(incr_histogram) +# {} 1.2 + +# We expect 3/5ths of the values to fall in the range 1 < x <= 2. +# Unsupported by streaming engine. +# eval instant at 5m histogram_fraction(1, 2, incr_histogram) +# {} 0.6 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_quantile(0.5, incr_histogram) +# {} 1.5 + + +eval instant at 50m incr_histogram + {__name__="incr_histogram"} {{count:14 sum:24 buckets:[1 12 1]}} + +# Unsupported by streaming engine. +# eval instant at 50m histogram_count(incr_histogram) +# {} 14 + +# Unsupported by streaming engine. +# eval instant at 50m histogram_sum(incr_histogram) +# {} 24 + +# Unsupported by streaming engine. +# eval instant at 50m histogram_avg(incr_histogram) +# {} 1.7142857142857142 + +# We expect 12/14ths of the values to fall in the range 1 < x <= 2. +# Unsupported by streaming engine. +# eval instant at 50m histogram_fraction(1, 2, incr_histogram) +# {} 0.8571428571428571 + +# Unsupported by streaming engine. +# eval instant at 50m histogram_quantile(0.5, incr_histogram) +# {} 1.5 + +# Per-second average rate of increase should be 1/(5*60) for count and buckets, then 2/(5*60) for sum. +# Unsupported by streaming engine. +# eval instant at 50m rate(incr_histogram[5m]) +# {} {{count:0.0033333333333333335 sum:0.006666666666666667 offset:1 buckets:[0.0033333333333333335]}} + +# Calculate the 50th percentile of observations over the last 10m. +# Unsupported by streaming engine. +# eval instant at 50m histogram_quantile(0.5, rate(incr_histogram[10m])) +# {} 1.5 + + + +# Schema represents the histogram resolution, different schema have compatible bucket boundaries, e.g.: +# 0: 1 2 4 8 16 32 64 (higher resolution) +# -1: 1 4 16 64 (lower resolution) +# +# Histograms can be merged as long as the histogram to the right is same resolution or higher. +load 5m + low_res_histogram {{schema:-1 sum:4 count:1 buckets:[1] offset:1}}+{{schema:0 sum:4 count:4 buckets:[2 2] offset:1}}x1 + +eval instant at 5m low_res_histogram + {__name__="low_res_histogram"} {{schema:-1 count:5 sum:8 offset:1 buckets:[5]}} + +# Unsupported by streaming engine. +# eval instant at 5m histogram_count(low_res_histogram) +# {} 5 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_sum(low_res_histogram) +# {} 8 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_avg(low_res_histogram) +# {} 1.6 + +# We expect all values to fall into the lower-resolution bucket with the range 1 < x <= 4. +# Unsupported by streaming engine. +# eval instant at 5m histogram_fraction(1, 4, low_res_histogram) +# {} 1 + + + +# z_bucket:1 means there is one observation in the zero bucket and z_bucket_w:0.5 means the zero bucket has the range +# 0 < x <= 0.5. Sum and count are expected to represent all observations in the histogram, including those in the zero bucket. +load 5m + single_zero_histogram {{schema:0 z_bucket:1 z_bucket_w:0.5 sum:0.25 count:1}} + +# Unsupported by streaming engine. +# eval instant at 5m histogram_count(single_zero_histogram) +# {} 1 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_sum(single_zero_histogram) +# {} 0.25 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_avg(single_zero_histogram) +# {} 0.25 + +# When only the zero bucket is populated, or there are negative buckets, the distribution is assumed to be equally +# distributed around zero; i.e. that there are an equal number of positive and negative observations. Therefore the +# entire distribution must lie within the full range of the zero bucket, in this case: -0.5 < x <= +0.5. +# Unsupported by streaming engine. +# eval instant at 5m histogram_fraction(-0.5, 0.5, single_zero_histogram) +# {} 1 + +# Half of the observations are estimated to be zero, as this is the midpoint between -0.5 and +0.5. +# Unsupported by streaming engine. +# eval instant at 5m histogram_quantile(0.5, single_zero_histogram) +# {} 0 + + + +# Let's turn single_histogram upside-down. +load 5m + negative_histogram {{schema:0 sum:-5 count:4 n_buckets:[1 2 1]}} + +# Unsupported by streaming engine. +# eval instant at 5m histogram_count(negative_histogram) +# {} 4 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_sum(negative_histogram) +# {} -5 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_avg(negative_histogram) +# {} -1.25 + +# We expect half of the values to fall in the range -2 < x <= -1. +# Unsupported by streaming engine. +# eval instant at 5m histogram_fraction(-2, -1, negative_histogram) +# {} 0.5 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_quantile(0.5, negative_histogram) +# {} -1.5 + + + +# Two histogram samples. +load 5m + two_samples_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}} {{schema:0 sum:-4 count:4 n_buckets:[1 2 1]}} + +# We expect to see the newest sample. +# Unsupported by streaming engine. +# eval instant at 10m histogram_count(two_samples_histogram) +# {} 4 + +# Unsupported by streaming engine. +# eval instant at 10m histogram_sum(two_samples_histogram) +# {} -4 + +# Unsupported by streaming engine. +# eval instant at 10m histogram_avg(two_samples_histogram) +# {} -1 + +# Unsupported by streaming engine. +# eval instant at 10m histogram_fraction(-2, -1, two_samples_histogram) +# {} 0.5 + +# Unsupported by streaming engine. +# eval instant at 10m histogram_quantile(0.5, two_samples_histogram) +# {} -1.5 + + + +# Add two histograms with negated data. +load 5m + balanced_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}}+{{schema:0 sum:-4 count:4 n_buckets:[1 2 1]}}x1 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_count(balanced_histogram) +# {} 8 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_sum(balanced_histogram) +# {} 0 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_avg(balanced_histogram) +# {} 0 + +# Unsupported by streaming engine. +# eval instant at 5m histogram_fraction(0, 4, balanced_histogram) +# {} 0.5 + +# If the quantile happens to be located in a span of empty buckets, the actually returned value is the lower bound of +# the first populated bucket after the span of empty buckets. +# Unsupported by streaming engine. +# eval instant at 5m histogram_quantile(0.5, balanced_histogram) +# {} 0.5 + +# Add histogram to test sum(last_over_time) regression +load 5m + incr_sum_histogram{number="1"} {{schema:0 sum:0 count:0 buckets:[1]}}+{{schema:0 sum:1 count:1 buckets:[1]}}x10 + incr_sum_histogram{number="2"} {{schema:0 sum:0 count:0 buckets:[1]}}+{{schema:0 sum:2 count:1 buckets:[1]}}x10 + +# Unsupported by streaming engine. +# eval instant at 50m histogram_sum(sum(incr_sum_histogram)) +# {} 30 + +# Unsupported by streaming engine. +# eval instant at 50m histogram_sum(sum(last_over_time(incr_sum_histogram[5m]))) +# {} 30 diff --git a/pkg/streamingpromql/testdata/upstream/native_histograms.test.disabled b/pkg/streamingpromql/testdata/upstream/native_histograms.test.disabled deleted file mode 100644 index 392294edd3c..00000000000 --- a/pkg/streamingpromql/testdata/upstream/native_histograms.test.disabled +++ /dev/null @@ -1,276 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-only -# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/native_histograms.test -# Provenance-includes-license: Apache-2.0 -# Provenance-includes-copyright: The Prometheus Authors - -# Minimal valid case: an empty histogram. -load 5m - empty_histogram {{}} - -eval instant at 5m empty_histogram - {__name__="empty_histogram"} {{}} - -eval instant at 5m histogram_count(empty_histogram) - {} 0 - -eval instant at 5m histogram_sum(empty_histogram) - {} 0 - -eval instant at 5m histogram_avg(empty_histogram) - {} NaN - -eval instant at 5m histogram_fraction(-Inf, +Inf, empty_histogram) - {} NaN - -eval instant at 5m histogram_fraction(0, 8, empty_histogram) - {} NaN - - - -# buckets:[1 2 1] means 1 observation in the 1st bucket, 2 observations in the 2nd and 1 observation in the 3rd (total 4). -load 5m - single_histogram {{schema:0 sum:5 count:4 buckets:[1 2 1]}} - -# histogram_count extracts the count property from the histogram. -eval instant at 5m histogram_count(single_histogram) - {} 4 - -# histogram_sum extracts the sum property from the histogram. -eval instant at 5m histogram_sum(single_histogram) - {} 5 - -# histogram_avg calculates the average from sum and count properties. -eval instant at 5m histogram_avg(single_histogram) - {} 1.25 - -# We expect half of the values to fall in the range 1 < x <= 2. -eval instant at 5m histogram_fraction(1, 2, single_histogram) - {} 0.5 - -# We expect all values to fall in the range 0 < x <= 8. -eval instant at 5m histogram_fraction(0, 8, single_histogram) - {} 1 - -# Median is 1.5 due to linear estimation of the midpoint of the middle bucket, whose values are within range 1 < x <= 2. -eval instant at 5m histogram_quantile(0.5, single_histogram) - {} 1.5 - - - -# Repeat the same histogram 10 times. -load 5m - multi_histogram {{schema:0 sum:5 count:4 buckets:[1 2 1]}}x10 - -eval instant at 5m histogram_count(multi_histogram) - {} 4 - -eval instant at 5m histogram_sum(multi_histogram) - {} 5 - -eval instant at 5m histogram_avg(multi_histogram) - {} 1.25 - -eval instant at 5m histogram_fraction(1, 2, multi_histogram) - {} 0.5 - -eval instant at 5m histogram_quantile(0.5, multi_histogram) - {} 1.5 - - -# Each entry should look the same as the first. -eval instant at 50m histogram_count(multi_histogram) - {} 4 - -eval instant at 50m histogram_sum(multi_histogram) - {} 5 - -eval instant at 50m histogram_avg(multi_histogram) - {} 1.25 - -eval instant at 50m histogram_fraction(1, 2, multi_histogram) - {} 0.5 - -eval instant at 50m histogram_quantile(0.5, multi_histogram) - {} 1.5 - - - -# Accumulate the histogram addition for 10 iterations, offset is a bucket position where offset:0 is always the bucket -# with an upper limit of 1 and offset:1 is the bucket which follows to the right. Negative offsets represent bucket -# positions for upper limits <1 (tending toward zero), where offset:-1 is the bucket to the left of offset:0. -load 5m - incr_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}}+{{sum:2 count:1 buckets:[1] offset:1}}x10 - -eval instant at 5m histogram_count(incr_histogram) - {} 5 - -eval instant at 5m histogram_sum(incr_histogram) - {} 6 - -eval instant at 5m histogram_avg(incr_histogram) - {} 1.2 - -# We expect 3/5ths of the values to fall in the range 1 < x <= 2. -eval instant at 5m histogram_fraction(1, 2, incr_histogram) - {} 0.6 - -eval instant at 5m histogram_quantile(0.5, incr_histogram) - {} 1.5 - - -eval instant at 50m incr_histogram - {__name__="incr_histogram"} {{count:14 sum:24 buckets:[1 12 1]}} - -eval instant at 50m histogram_count(incr_histogram) - {} 14 - -eval instant at 50m histogram_sum(incr_histogram) - {} 24 - -eval instant at 50m histogram_avg(incr_histogram) - {} 1.7142857142857142 - -# We expect 12/14ths of the values to fall in the range 1 < x <= 2. -eval instant at 50m histogram_fraction(1, 2, incr_histogram) - {} 0.8571428571428571 - -eval instant at 50m histogram_quantile(0.5, incr_histogram) - {} 1.5 - -# Per-second average rate of increase should be 1/(5*60) for count and buckets, then 2/(5*60) for sum. -eval instant at 50m rate(incr_histogram[5m]) - {} {{count:0.0033333333333333335 sum:0.006666666666666667 offset:1 buckets:[0.0033333333333333335]}} - -# Calculate the 50th percentile of observations over the last 10m. -eval instant at 50m histogram_quantile(0.5, rate(incr_histogram[10m])) - {} 1.5 - - - -# Schema represents the histogram resolution, different schema have compatible bucket boundaries, e.g.: -# 0: 1 2 4 8 16 32 64 (higher resolution) -# -1: 1 4 16 64 (lower resolution) -# -# Histograms can be merged as long as the histogram to the right is same resolution or higher. -load 5m - low_res_histogram {{schema:-1 sum:4 count:1 buckets:[1] offset:1}}+{{schema:0 sum:4 count:4 buckets:[2 2] offset:1}}x1 - -eval instant at 5m low_res_histogram - {__name__="low_res_histogram"} {{schema:-1 count:5 sum:8 offset:1 buckets:[5]}} - -eval instant at 5m histogram_count(low_res_histogram) - {} 5 - -eval instant at 5m histogram_sum(low_res_histogram) - {} 8 - -eval instant at 5m histogram_avg(low_res_histogram) - {} 1.6 - -# We expect all values to fall into the lower-resolution bucket with the range 1 < x <= 4. -eval instant at 5m histogram_fraction(1, 4, low_res_histogram) - {} 1 - - - -# z_bucket:1 means there is one observation in the zero bucket and z_bucket_w:0.5 means the zero bucket has the range -# 0 < x <= 0.5. Sum and count are expected to represent all observations in the histogram, including those in the zero bucket. -load 5m - single_zero_histogram {{schema:0 z_bucket:1 z_bucket_w:0.5 sum:0.25 count:1}} - -eval instant at 5m histogram_count(single_zero_histogram) - {} 1 - -eval instant at 5m histogram_sum(single_zero_histogram) - {} 0.25 - -eval instant at 5m histogram_avg(single_zero_histogram) - {} 0.25 - -# When only the zero bucket is populated, or there are negative buckets, the distribution is assumed to be equally -# distributed around zero; i.e. that there are an equal number of positive and negative observations. Therefore the -# entire distribution must lie within the full range of the zero bucket, in this case: -0.5 < x <= +0.5. -eval instant at 5m histogram_fraction(-0.5, 0.5, single_zero_histogram) - {} 1 - -# Half of the observations are estimated to be zero, as this is the midpoint between -0.5 and +0.5. -eval instant at 5m histogram_quantile(0.5, single_zero_histogram) - {} 0 - - - -# Let's turn single_histogram upside-down. -load 5m - negative_histogram {{schema:0 sum:-5 count:4 n_buckets:[1 2 1]}} - -eval instant at 5m histogram_count(negative_histogram) - {} 4 - -eval instant at 5m histogram_sum(negative_histogram) - {} -5 - -eval instant at 5m histogram_avg(negative_histogram) - {} -1.25 - -# We expect half of the values to fall in the range -2 < x <= -1. -eval instant at 5m histogram_fraction(-2, -1, negative_histogram) - {} 0.5 - -eval instant at 5m histogram_quantile(0.5, negative_histogram) - {} -1.5 - - - -# Two histogram samples. -load 5m - two_samples_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}} {{schema:0 sum:-4 count:4 n_buckets:[1 2 1]}} - -# We expect to see the newest sample. -eval instant at 10m histogram_count(two_samples_histogram) - {} 4 - -eval instant at 10m histogram_sum(two_samples_histogram) - {} -4 - -eval instant at 10m histogram_avg(two_samples_histogram) - {} -1 - -eval instant at 10m histogram_fraction(-2, -1, two_samples_histogram) - {} 0.5 - -eval instant at 10m histogram_quantile(0.5, two_samples_histogram) - {} -1.5 - - - -# Add two histograms with negated data. -load 5m - balanced_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}}+{{schema:0 sum:-4 count:4 n_buckets:[1 2 1]}}x1 - -eval instant at 5m histogram_count(balanced_histogram) - {} 8 - -eval instant at 5m histogram_sum(balanced_histogram) - {} 0 - -eval instant at 5m histogram_avg(balanced_histogram) - {} 0 - -eval instant at 5m histogram_fraction(0, 4, balanced_histogram) - {} 0.5 - -# If the quantile happens to be located in a span of empty buckets, the actually returned value is the lower bound of -# the first populated bucket after the span of empty buckets. -eval instant at 5m histogram_quantile(0.5, balanced_histogram) - {} 0.5 - -# Add histogram to test sum(last_over_time) regression -load 5m - incr_sum_histogram{number="1"} {{schema:0 sum:0 count:0 buckets:[1]}}+{{schema:0 sum:1 count:1 buckets:[1]}}x10 - incr_sum_histogram{number="2"} {{schema:0 sum:0 count:0 buckets:[1]}}+{{schema:0 sum:2 count:1 buckets:[1]}}x10 - -eval instant at 50m histogram_sum(sum(incr_sum_histogram)) - {} 30 - -eval instant at 50m histogram_sum(sum(last_over_time(incr_sum_histogram[5m]))) - {} 30 diff --git a/tools/benchmark-query-engine/README.md b/tools/benchmark-query-engine/README.md index 96171d4d8f4..057f963d582 100644 --- a/tools/benchmark-query-engine/README.md +++ b/tools/benchmark-query-engine/README.md @@ -4,7 +4,7 @@ Each benchmark is run in a separate process to provide some kind of guarantee th An ingester is started in the `benchmark-query-engine` process (ie. not the benchmark process) to ensure the TSDB does not skew results. -Results from `benchmark-query-engine` can be summarised with `benchstat`, as well as [`compare.sh`](../../pkg/querier/engine/streaming/compare.sh). +Results from `benchmark-query-engine` can be summarised with `benchstat`, as well as [`compare.sh`](./compare.sh). Usage: diff --git a/pkg/streamingpromql/compare.sh b/tools/benchmark-query-engine/compare.sh similarity index 90% rename from pkg/streamingpromql/compare.sh rename to tools/benchmark-query-engine/compare.sh index 5c7cbbf345d..de21345e39d 100755 --- a/pkg/streamingpromql/compare.sh +++ b/tools/benchmark-query-engine/compare.sh @@ -4,7 +4,7 @@ set -euo pipefail -RESULTS_FILE="$1" # Should be the path to a file produced by a command like `go run ./tools/benchmark-query-engine -count=6 | tee output.txt` +RESULTS_FILE="$1" # Should be the path to a file produced by a command like `go run . -count=6 | tee output.txt` PROMETHEUS_RESULTS_FILE=$(mktemp /tmp/prometheus.XXXX) STREAMING_RESULTS_FILE=$(mktemp /tmp/streaming.XXXX)