diff --git a/CHANGELOG.md b/CHANGELOG.md
index 180b4934dfc..111c2cdcf25 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,7 +14,7 @@
 * [FEATURE] Continuous-test: now runable as a module with `mimir -target=continuous-test`. #7747
 * [FEATURE] Store-gateway: Allow specific tenants to be enabled or disabled via `-store-gateway.enabled-tenants` or `-store-gateway.disabled-tenants` CLI flags or their corresponding YAML settings. #7653
 * [FEATURE] New `-<prefix>.s3.bucket-lookup-type` flag configures lookup style type, used to access bucket in s3 compatible providers. #7684
-* [FEATURE] Querier: add experimental streaming PromQL engine, enabled with `-querier.promql-engine=streaming`. #7693 #7898 #7899 #8023 #8058 #8096
+* [FEATURE] Querier: add experimental streaming PromQL engine, enabled with `-querier.promql-engine=streaming`. #7693 #7898 #7899 #8023 #8058 #8096 #8121
 * [FEATURE] New `/ingester/unregister-on-shutdown` HTTP endpoint allows dynamic access to ingesters' `-ingester.ring.unregister-on-shutdown` configuration. #7739
 * [FEATURE] Server: added experimental [PROXY protocol support](https://www.haproxy.org/download/2.3/doc/proxy-protocol.txt). The PROXY protocol support can be enabled via `-server.proxy-protocol-enabled=true`. When enabled, the support is added both to HTTP and gRPC listening ports. #7698
 * [FEATURE] mimirtool: Add `runtime-config verify` sub-command, for verifying Mimir runtime config files. #8123
diff --git a/pkg/streamingpromql/benchmarks/benchmarks.go b/pkg/streamingpromql/benchmarks/benchmarks.go
index c94781ee7b9..7e84b54f4f0 100644
--- a/pkg/streamingpromql/benchmarks/benchmarks.go
+++ b/pkg/streamingpromql/benchmarks/benchmarks.go
@@ -69,13 +69,18 @@ func (c BenchCase) Run(ctx context.Context, t testing.TB, start, end time.Time,
 	return res, qry.Close
 }
 
-// These test cases are taken from https://github.com/prometheus/prometheus/blob/main/promql/bench_test.go.
+// These test cases are taken from https://github.com/prometheus/prometheus/blob/main/promql/bench_test.go
+// and enhanced/added to.
 func TestCases(metricSizes []int) []BenchCase {
 	cases := []BenchCase{
 		// Plain retrieval.
 		{
 			Expr: "a_X",
 		},
+		// Histogram retrieval
+		{
+			Expr: "nh_X",
+		},
 		// Range vector selector.
 		{
 			Expr:             "a_X[1m]",
diff --git a/pkg/streamingpromql/benchmarks/comparison_test.go b/pkg/streamingpromql/benchmarks/comparison_test.go
index 7d7d7e4dfa0..df5b2d38b59 100644
--- a/pkg/streamingpromql/benchmarks/comparison_test.go
+++ b/pkg/streamingpromql/benchmarks/comparison_test.go
@@ -163,6 +163,28 @@ func TestBenchmarkSetup(t *testing.T) {
 	}
 
 	require.Equal(t, expectedPoints, series.Floats)
+
+	// Check native histograms are set up correctly
+	query, err = streamingEngine.NewRangeQuery(ctx, q, nil, "nh_1", time.Unix(0, 0), time.Unix(int64(15*intervalSeconds), 0), interval)
+	require.NoError(t, err)
+
+	t.Cleanup(query.Close)
+	result = query.Exec(ctx)
+	require.NoError(t, result.Err)
+
+	matrix, err = result.Matrix()
+	require.NoError(t, err)
+
+	require.Len(t, matrix, 1)
+	series = matrix[0]
+	require.Equal(t, labels.FromStrings("__name__", "nh_1"), series.Metric)
+	require.Len(t, series.Floats, 0)
+	require.Len(t, series.Histograms, 16)
+
+	// Check one histogram point is as expected
+	require.Equal(t, int64(0), series.Histograms[0].T)
+	require.Equal(t, 12.0, series.Histograms[0].H.Count)
+	require.Equal(t, 18.4, series.Histograms[0].H.Sum)
 }
 
 // Why do we do this rather than require.Equal(t, expected, actual)?
@@ -194,7 +216,11 @@ func requireEqualResults(t testing.TB, expected, actual *promql.Result) {
 			require.Equal(t, expectedSample.Metric, actualSample.Metric)
 			require.Equal(t, expectedSample.T, actualSample.T)
 			require.Equal(t, expectedSample.H, actualSample.H)
-			require.InEpsilon(t, expectedSample.F, actualSample.F, 1e-10)
+			if expectedSample.F == 0 {
+				require.Equal(t, expectedSample.F, actualSample.F)
+			} else {
+				require.InEpsilon(t, expectedSample.F, actualSample.F, 1e-10)
+			}
 		}
 	case parser.ValueTypeMatrix:
 		expectedMatrix, err := expected.Matrix()
@@ -214,7 +240,11 @@ func requireEqualResults(t testing.TB, expected, actual *promql.Result) {
 				actualPoint := actualSeries.Floats[j]
 
 				require.Equal(t, expectedPoint.T, actualPoint.T)
-				require.InEpsilonf(t, expectedPoint.F, actualPoint.F, 1e-10, "expected series %v to have points %v, but result is %v", expectedSeries.Metric.String(), expectedSeries.Floats, actualSeries.Floats)
+				if expectedPoint.F == 0 {
+					require.Equal(t, expectedPoint.F, actualPoint.F)
+				} else {
+					require.InEpsilonf(t, expectedPoint.F, actualPoint.F, 1e-10, "expected series %v to have points %v, but result is %v", expectedSeries.Metric.String(), expectedSeries.Floats, actualSeries.Floats)
+				}
 			}
 		}
 	default:
diff --git a/pkg/streamingpromql/benchmarks/ingester.go b/pkg/streamingpromql/benchmarks/ingester.go
index 61b5621d942..5c6947c949e 100644
--- a/pkg/streamingpromql/benchmarks/ingester.go
+++ b/pkg/streamingpromql/benchmarks/ingester.go
@@ -14,6 +14,7 @@ import (
 	"net"
 	"path/filepath"
 	"strconv"
+	"strings"
 	"time"
 
 	"github.com/go-kit/log"
@@ -174,7 +175,7 @@ func pushTestData(ing *ingester.Ingester, metricSizes []int) error {
 	totalMetrics := 0
 
 	for _, size := range metricSizes {
-		totalMetrics += (2 + histogramBuckets + 1) * size // 2 non-histogram metrics + 5 metrics for histogram buckets + 1 metric for +Inf histogram bucket
+		totalMetrics += (2 + histogramBuckets + 1 + 1) * size // 2 non-histogram metrics + 5 metrics for histogram buckets + 1 metric for +Inf histogram bucket + 1 metric for native-histograms
 	}
 
 	metrics := make([]labels.Labels, 0, totalMetrics)
@@ -183,6 +184,7 @@ func pushTestData(ing *ingester.Ingester, metricSizes []int) error {
 		aName := "a_" + strconv.Itoa(size)
 		bName := "b_" + strconv.Itoa(size)
 		histogramName := "h_" + strconv.Itoa(size)
+		nativeHistogramName := "nh_" + strconv.Itoa(size)
 
 		if size == 1 {
 			// We don't want a "l" label on metrics with one series (some test cases rely on this label not being present).
@@ -192,6 +194,7 @@ func pushTestData(ing *ingester.Ingester, metricSizes []int) error {
 				metrics = append(metrics, labels.FromStrings("__name__", histogramName, "le", strconv.Itoa(le)))
 			}
 			metrics = append(metrics, labels.FromStrings("__name__", histogramName, "le", "+Inf"))
+			metrics = append(metrics, labels.FromStrings("__name__", nativeHistogramName))
 		} else {
 			for i := 0; i < size; i++ {
 				metrics = append(metrics, labels.FromStrings("__name__", aName, "l", strconv.Itoa(i)))
@@ -200,34 +203,69 @@ func pushTestData(ing *ingester.Ingester, metricSizes []int) error {
 					metrics = append(metrics, labels.FromStrings("__name__", histogramName, "l", strconv.Itoa(i), "le", strconv.Itoa(le)))
 				}
 				metrics = append(metrics, labels.FromStrings("__name__", histogramName, "l", strconv.Itoa(i), "le", "+Inf"))
+				metrics = append(metrics, labels.FromStrings("__name__", nativeHistogramName, "l", strconv.Itoa(i)))
 			}
 		}
 	}
 
 	ctx := user.InjectOrgID(context.Background(), UserID)
-	req := &mimirpb.WriteRequest{
-		Timeseries: make([]mimirpb.PreallocTimeseries, len(metrics)),
-	}
 
-	for i, m := range metrics {
-		series := mimirpb.PreallocTimeseries{TimeSeries: &mimirpb.TimeSeries{
-			Labels:  mimirpb.FromLabelsToLabelAdapters(m),
-			Samples: make([]mimirpb.Sample, NumIntervals),
-		}}
+	// Batch samples into separate requests
+	// There is no precise science behind this number currently.
+	// A quick run locally found batching by 100 did not increase the loading time by any noticeable amount.
+	// Additionally memory usage maxed about 4GB for the whole process.
+	batchSize := 100
+	for start := 0; start < NumIntervals; start += batchSize {
+		end := start + batchSize
+		if end > NumIntervals {
+			end = NumIntervals
+		}
 
-		for s := 0; s < NumIntervals; s++ {
-			series.Samples[s].TimestampMs = int64(s) * interval.Milliseconds()
-			series.Samples[s].Value = float64(s) + float64(i)/float64(len(metrics))
+		req := &mimirpb.WriteRequest{
+			Timeseries: make([]mimirpb.PreallocTimeseries, len(metrics)),
 		}
 
-		req.Timeseries[i] = series
-	}
+		for metricIdx, m := range metrics {
+			if strings.HasPrefix(m.Get("__name__"), "nh_") {
+				series := mimirpb.PreallocTimeseries{TimeSeries: &mimirpb.TimeSeries{
+					Labels:     mimirpb.FromLabelsToLabelAdapters(m.Copy()),
+					Histograms: make([]mimirpb.Histogram, end-start),
+				}}
+
+				for ts := start; ts < end; ts++ {
+					// TODO(jhesketh): Fix this with some better data
+					series.Histograms[ts-start].Timestamp = int64(ts) * interval.Milliseconds()
+					series.Histograms[ts-start].Count = &mimirpb.Histogram_CountInt{CountInt: 12}
+					series.Histograms[ts-start].ZeroCount = &mimirpb.Histogram_ZeroCountInt{ZeroCountInt: 2}
+					series.Histograms[ts-start].ZeroThreshold = 0.001
+					series.Histograms[ts-start].Sum = 18.4
+					series.Histograms[ts-start].Schema = 0
+					series.Histograms[ts-start].NegativeSpans = []mimirpb.BucketSpan{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}
+					series.Histograms[ts-start].NegativeDeltas = []int64{1, 1, -1, 0}
+					series.Histograms[ts-start].PositiveSpans = []mimirpb.BucketSpan{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}
+					series.Histograms[ts-start].PositiveDeltas = []int64{1, 1, -1, 0}
+				}
 
-	if _, err := ing.Push(ctx, req); err != nil {
-		return fmt.Errorf("failed to push samples to ingester: %w", err)
-	}
+				req.Timeseries[metricIdx] = series
+			} else {
+				series := mimirpb.PreallocTimeseries{TimeSeries: &mimirpb.TimeSeries{
+					Labels:  mimirpb.FromLabelsToLabelAdapters(m.Copy()),
+					Samples: make([]mimirpb.Sample, end-start),
+				}}
 
-	ing.Flush()
+				for ts := start; ts < end; ts++ {
+					series.Samples[ts-start].TimestampMs = int64(ts) * interval.Milliseconds()
+					series.Samples[ts-start].Value = float64(ts) + float64(metricIdx)/float64(len(metrics))
+				}
+
+				req.Timeseries[metricIdx] = series
+			}
+		}
+		if _, err := ing.Push(ctx, req); err != nil {
+			return fmt.Errorf("failed to push samples to ingester: %w", err)
+		}
+		ing.Flush()
+	}
 
 	return nil
 }
diff --git a/pkg/streamingpromql/operator/instant_vector_selector.go b/pkg/streamingpromql/operator/instant_vector_selector.go
index 5fd06435bc3..d4903272bb4 100644
--- a/pkg/streamingpromql/operator/instant_vector_selector.go
+++ b/pkg/streamingpromql/operator/instant_vector_selector.go
@@ -7,7 +7,6 @@ package operator
 
 import (
 	"context"
-	"errors"
 	"fmt"
 
 	"github.com/prometheus/prometheus/model/histogram"
@@ -48,13 +47,11 @@ func (v *InstantVectorSelector) NextSeries(_ context.Context) (InstantVectorSeri
 
 	v.memoizedIterator.Reset(v.chunkIterator)
 
-	data := InstantVectorSeriesData{
-		Floats: GetFPointSlice(v.numSteps), // TODO: only allocate this if we have any floats (once we support native histograms)
-	}
+	data := InstantVectorSeriesData{}
 
 	for stepT := v.Selector.Start; stepT <= v.Selector.End; stepT += v.Selector.Interval {
 		var t int64
-		var val float64
+		var f float64
 		var h *histogram.FloatHistogram
 
 		ts := stepT
@@ -70,26 +67,42 @@ func (v *InstantVectorSelector) NextSeries(_ context.Context) (InstantVectorSeri
 				return InstantVectorSeriesData{}, v.memoizedIterator.Err()
 			}
 		case chunkenc.ValFloat:
-			t, val = v.memoizedIterator.At()
+			t, f = v.memoizedIterator.At()
+		case chunkenc.ValHistogram, chunkenc.ValFloatHistogram:
+			t, h = v.memoizedIterator.AtFloatHistogram()
 		default:
 			return InstantVectorSeriesData{}, fmt.Errorf("streaming PromQL engine: unknown value type %s", valueType.String())
 		}
 
 		if valueType == chunkenc.ValNone || t > ts {
 			var ok bool
-			t, val, h, ok = v.memoizedIterator.PeekPrev()
-			if h != nil {
-				return InstantVectorSeriesData{}, errors.New("streaming PromQL engine doesn't support histograms yet")
-			}
+			t, f, h, ok = v.memoizedIterator.PeekPrev()
 			if !ok || t < ts-v.Selector.LookbackDelta.Milliseconds() {
 				continue
 			}
 		}
-		if value.IsStaleNaN(val) || (h != nil && value.IsStaleNaN(h.Sum)) {
+		if value.IsStaleNaN(f) || (h != nil && value.IsStaleNaN(h.Sum)) {
 			continue
 		}
 
-		data.Floats = append(data.Floats, promql.FPoint{T: stepT, F: val})
+		// if (f, h) have been set by PeekPrev, we do not know if f is 0 because that's the actual value, or because
+		// the previous value had a histogram.
+		// PeekPrev will set the histogram to nil, or the value to 0 if the other type exists.
+		// So check if histograms is nil first. If we don't have a histogram, then we should have a value and vice-versa.
+		if h != nil {
+			if len(data.Histograms) == 0 {
+				// Only create the slice once we know the series is a histogram or not.
+				// (It is possible to over-allocate in the case where we have both floats and histograms, but that won't be common).
+				data.Histograms = GetHPointSlice(v.numSteps)
+			}
+			data.Histograms = append(data.Histograms, promql.HPoint{T: stepT, H: h})
+		} else {
+			if len(data.Floats) == 0 {
+				// Only create the slice once we know the series is a histogram or not
+				data.Floats = GetFPointSlice(v.numSteps)
+			}
+			data.Floats = append(data.Floats, promql.FPoint{T: stepT, F: f})
+		}
 	}
 
 	if v.memoizedIterator.Err() != nil {
diff --git a/pkg/streamingpromql/operator/pool.go b/pkg/streamingpromql/operator/pool.go
index adb259dada1..b33f90db58e 100644
--- a/pkg/streamingpromql/operator/pool.go
+++ b/pkg/streamingpromql/operator/pool.go
@@ -21,6 +21,10 @@ var (
 		return make([]promql.FPoint, 0, size)
 	})
 
+	hPointSlicePool = pool.NewBucketedPool(1, maxExpectedPointsPerSeries, seriesPerResultBucketFactor, func(size int) []promql.HPoint {
+		return make([]promql.HPoint, 0, size)
+	})
+
 	matrixPool = pool.NewBucketedPool(1, maxExpectedSeriesPerResult, seriesPerResultBucketFactor, func(size int) promql.Matrix {
 		return make(promql.Matrix, 0, size)
 	})
@@ -51,6 +55,14 @@ func PutFPointSlice(s []promql.FPoint) {
 	fPointSlicePool.Put(s)
 }
 
+func GetHPointSlice(size int) []promql.HPoint {
+	return hPointSlicePool.Get(size)
+}
+
+func PutHPointSlice(s []promql.HPoint) {
+	hPointSlicePool.Put(s)
+}
+
 func GetMatrix(size int) promql.Matrix {
 	return matrixPool.Get(size)
 }
diff --git a/pkg/streamingpromql/query.go b/pkg/streamingpromql/query.go
index d74cae7efd9..93f424049b9 100644
--- a/pkg/streamingpromql/query.go
+++ b/pkg/streamingpromql/query.go
@@ -278,6 +278,11 @@ func (q *Query) Exec(ctx context.Context) *promql.Result {
 	return q.result
 }
 
+func returnSeriesDataSlices(d operator.InstantVectorSeriesData) {
+	operator.PutFPointSlice(d.Floats)
+	operator.PutHPointSlice(d.Histograms)
+}
+
 func (q *Query) populateVectorFromInstantVectorOperator(ctx context.Context, o operator.InstantVectorOperator, series []operator.SeriesMetadata) (promql.Vector, error) {
 	ts := timeMilliseconds(q.statement.Start)
 	v := operator.GetVector(len(series))
@@ -292,26 +297,29 @@ func (q *Query) populateVectorFromInstantVectorOperator(ctx context.Context, o o
 			return nil, err
 		}
 
-		if len(d.Floats)+len(d.Histograms) != 1 {
-			operator.PutFPointSlice(d.Floats)
-			// TODO: put histogram point slice back in pool
-
-			if len(d.Floats)+len(d.Histograms) == 0 {
+		if len(d.Floats) == 1 && len(d.Histograms) == 0 {
+			point := d.Floats[0]
+			v = append(v, promql.Sample{
+				Metric: s.Labels,
+				T:      ts,
+				F:      point.F,
+			})
+		} else if len(d.Floats) == 0 && len(d.Histograms) == 1 {
+			point := d.Histograms[0]
+			v = append(v, promql.Sample{
+				Metric: s.Labels,
+				T:      ts,
+				H:      point.H,
+			})
+		} else {
+			returnSeriesDataSlices(d)
+			// A series may have no data points.
+			if len(d.Floats) == 0 && len(d.Histograms) == 0 {
 				continue
 			}
-
-			return nil, fmt.Errorf("expected exactly one sample for series %s, but got %v", s.Labels.String(), len(d.Floats))
+			return nil, fmt.Errorf("expected exactly one sample for series %s, but got %v floats, %v histograms", s.Labels.String(), len(d.Floats), len(d.Histograms))
 		}
-
-		point := d.Floats[0]
-		v = append(v, promql.Sample{
-			Metric: s.Labels,
-			T:      ts,
-			F:      point.F,
-		})
-
-		operator.PutFPointSlice(d.Floats)
-		// TODO: put histogram point slice back in pool
+		returnSeriesDataSlices(d)
 	}
 
 	return v, nil
@@ -331,9 +339,7 @@ func (q *Query) populateMatrixFromInstantVectorOperator(ctx context.Context, o o
 		}
 
 		if len(d.Floats) == 0 && len(d.Histograms) == 0 {
-			operator.PutFPointSlice(d.Floats)
-			// TODO: put histogram point slice back in pool
-
+			returnSeriesDataSlices(d)
 			continue
 		}
 
@@ -394,7 +400,7 @@ func (q *Query) Close() {
 	case promql.Matrix:
 		for _, s := range v {
 			operator.PutFPointSlice(s.Floats)
-			// TODO: put histogram point slice back in pool
+			operator.PutHPointSlice(s.Histograms)
 		}
 
 		operator.PutMatrix(v)
diff --git a/pkg/streamingpromql/testdata/ours/native_histograms.test b/pkg/streamingpromql/testdata/ours/native_histograms.test
new file mode 100644
index 00000000000..6ada96ef656
--- /dev/null
+++ b/pkg/streamingpromql/testdata/ours/native_histograms.test
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/native_histograms.test
+# Provenance-includes-license: Apache-2.0
+# Provenance-includes-copyright: The Prometheus Authors
+
+# buckets:[1 2 1] means 1 observation in the 1st bucket, 2 observations in the 2nd and 1 observation in the 3rd (total 4).
+load 5m
+	single_histogram {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:20 count:7 buckets:[9 10 1]}}
+
+eval instant at 5m single_histogram
+	{__name__="single_histogram"} {{schema:0 sum:20 count:7 buckets:[9 10 1]}}
+
+eval range from 0 to 5m step 1m single_histogram
+	{__name__="single_histogram"} {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:20 count:7 buckets:[9 10 1]}}
+
+clear
+
+# Test metric with mixed floats and histograms
+load 1m
+	mixed_metric 1 2 3 {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:8 count:6 buckets:[1 4 1]}}
+
+eval range from 0 to 4m step 1m mixed_metric
+	{__name__="mixed_metric"} 1 2 3 {{count:4 sum:5 buckets:[1 2 1]}} {{count:6 sum:8 buckets:[1 4 1]}}
diff --git a/pkg/streamingpromql/testdata/upstream/native_histograms.test b/pkg/streamingpromql/testdata/upstream/native_histograms.test
new file mode 100644
index 00000000000..ff107a7c20c
--- /dev/null
+++ b/pkg/streamingpromql/testdata/upstream/native_histograms.test
@@ -0,0 +1,335 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/native_histograms.test
+# Provenance-includes-license: Apache-2.0
+# Provenance-includes-copyright: The Prometheus Authors
+
+# Minimal valid case: an empty histogram.
+load 5m
+	empty_histogram	{{}}
+
+eval instant at 5m empty_histogram
+	{__name__="empty_histogram"} {{}}
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_count(empty_histogram)
+# 	{} 0
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_sum(empty_histogram)
+# 	{} 0
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_avg(empty_histogram)
+# 	{} NaN
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_fraction(-Inf, +Inf, empty_histogram)
+# 	{} NaN
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_fraction(0, 8, empty_histogram)
+# 	{} NaN
+
+
+
+# buckets:[1 2 1] means 1 observation in the 1st bucket, 2 observations in the 2nd and 1 observation in the 3rd (total 4).
+load 5m
+	single_histogram	{{schema:0 sum:5 count:4 buckets:[1 2 1]}}
+
+# histogram_count extracts the count property from the histogram.
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_count(single_histogram)
+# 	{} 4
+
+# histogram_sum extracts the sum property from the histogram.
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_sum(single_histogram)
+# 	{} 5
+
+# histogram_avg calculates the average from sum and count properties.
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_avg(single_histogram)
+# 	{} 1.25
+
+# We expect half of the values to fall in the range 1 < x <= 2.
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_fraction(1, 2, single_histogram)
+# 	{} 0.5
+
+# We expect all values to fall in the range 0 < x <= 8.
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_fraction(0, 8, single_histogram)
+# 	{} 1
+
+# Median is 1.5 due to linear estimation of the midpoint of the middle bucket, whose values are within range 1 < x <= 2.
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_quantile(0.5, single_histogram)
+# 	{} 1.5
+
+
+
+# Repeat the same histogram 10 times.
+load 5m
+	multi_histogram	{{schema:0 sum:5 count:4 buckets:[1 2 1]}}x10
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_count(multi_histogram)
+# 	{} 4
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_sum(multi_histogram)
+# 	{} 5
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_avg(multi_histogram)
+# 	{} 1.25
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_fraction(1, 2, multi_histogram)
+# 	{} 0.5
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_quantile(0.5, multi_histogram)
+# 	{} 1.5
+
+
+# Each entry should look the same as the first.
+# Unsupported by streaming engine.
+# eval instant at 50m histogram_count(multi_histogram)
+# 	{} 4
+
+# Unsupported by streaming engine.
+# eval instant at 50m histogram_sum(multi_histogram)
+# 	{} 5
+
+# Unsupported by streaming engine.
+# eval instant at 50m histogram_avg(multi_histogram)
+# 	{} 1.25
+
+# Unsupported by streaming engine.
+# eval instant at 50m histogram_fraction(1, 2, multi_histogram)
+# 	{} 0.5
+
+# Unsupported by streaming engine.
+# eval instant at 50m histogram_quantile(0.5, multi_histogram)
+# 	{} 1.5
+
+
+
+# Accumulate the histogram addition for 10 iterations, offset is a bucket position where offset:0 is always the bucket
+# with an upper limit of 1 and offset:1 is the bucket which follows to the right. Negative offsets represent bucket
+# positions for upper limits <1 (tending toward zero), where offset:-1 is the bucket to the left of offset:0.
+load 5m
+	incr_histogram	{{schema:0 sum:4 count:4 buckets:[1 2 1]}}+{{sum:2 count:1 buckets:[1] offset:1}}x10
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_count(incr_histogram)
+# 	{} 5
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_sum(incr_histogram)
+# 	{} 6
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_avg(incr_histogram)
+# 	{} 1.2
+
+# We expect 3/5ths of the values to fall in the range 1 < x <= 2.
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_fraction(1, 2, incr_histogram)
+# 	{} 0.6
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_quantile(0.5, incr_histogram)
+# 	{} 1.5
+
+
+eval instant at 50m incr_histogram
+	{__name__="incr_histogram"} {{count:14 sum:24 buckets:[1 12 1]}}
+
+# Unsupported by streaming engine.
+# eval instant at 50m histogram_count(incr_histogram)
+# 	{} 14
+
+# Unsupported by streaming engine.
+# eval instant at 50m histogram_sum(incr_histogram)
+# 	{} 24
+
+# Unsupported by streaming engine.
+# eval instant at 50m histogram_avg(incr_histogram)
+#    {} 1.7142857142857142
+
+# We expect 12/14ths of the values to fall in the range 1 < x <= 2.
+# Unsupported by streaming engine.
+# eval instant at 50m histogram_fraction(1, 2, incr_histogram)
+# 	{} 0.8571428571428571
+
+# Unsupported by streaming engine.
+# eval instant at 50m histogram_quantile(0.5, incr_histogram)
+# 	{} 1.5
+
+# Per-second average rate of increase should be 1/(5*60) for count and buckets, then 2/(5*60) for sum.
+# Unsupported by streaming engine.
+# eval instant at 50m rate(incr_histogram[5m])
+# 	{} {{count:0.0033333333333333335 sum:0.006666666666666667 offset:1 buckets:[0.0033333333333333335]}}
+
+# Calculate the 50th percentile of observations over the last 10m.
+# Unsupported by streaming engine.
+# eval instant at 50m histogram_quantile(0.5, rate(incr_histogram[10m]))
+# 	{} 1.5
+
+
+
+# Schema represents the histogram resolution, different schema have compatible bucket boundaries, e.g.:
+#  0: 1 2 4 8 16 32 64 (higher resolution)
+# -1: 1   4   16    64  (lower resolution)
+#
+# Histograms can be merged as long as the histogram to the right is same resolution or higher.
+load 5m
+	low_res_histogram	{{schema:-1 sum:4 count:1 buckets:[1] offset:1}}+{{schema:0 sum:4 count:4 buckets:[2 2] offset:1}}x1
+
+eval instant at 5m low_res_histogram
+	{__name__="low_res_histogram"} {{schema:-1 count:5 sum:8 offset:1 buckets:[5]}}
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_count(low_res_histogram)
+# 	{} 5
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_sum(low_res_histogram)
+# 	{} 8
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_avg(low_res_histogram)
+# 	{} 1.6
+
+# We expect all values to fall into the lower-resolution bucket with the range 1 < x <= 4.
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_fraction(1, 4, low_res_histogram)
+# 	{} 1
+
+
+
+# z_bucket:1 means there is one observation in the zero bucket and z_bucket_w:0.5 means the zero bucket has the range
+# 0 < x <= 0.5. Sum and count are expected to represent all observations in the histogram, including those in the zero bucket.
+load 5m
+	single_zero_histogram {{schema:0 z_bucket:1 z_bucket_w:0.5 sum:0.25 count:1}}
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_count(single_zero_histogram)
+# 	{} 1
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_sum(single_zero_histogram)
+# 	{} 0.25
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_avg(single_zero_histogram)
+# 	{} 0.25
+
+# When only the zero bucket is populated, or there are negative buckets, the distribution is assumed to be equally
+# distributed around zero; i.e. that there are an equal number of positive and negative observations. Therefore the
+# entire distribution must lie within the full range of the zero bucket, in this case: -0.5 < x <= +0.5.
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_fraction(-0.5, 0.5, single_zero_histogram)
+# 	{} 1
+
+# Half of the observations are estimated to be zero, as this is the midpoint between -0.5 and +0.5.
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_quantile(0.5, single_zero_histogram)
+# 	{} 0
+
+
+
+# Let's turn single_histogram upside-down.
+load 5m
+	negative_histogram {{schema:0 sum:-5 count:4 n_buckets:[1 2 1]}}
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_count(negative_histogram)
+# 	{} 4
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_sum(negative_histogram)
+# 	{} -5
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_avg(negative_histogram)
+# 	{} -1.25
+
+# We expect half of the values to fall in the range -2 < x <= -1.
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_fraction(-2, -1, negative_histogram)
+# 	{} 0.5
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_quantile(0.5, negative_histogram)
+# 	{} -1.5
+
+
+
+# Two histogram samples.
+load 5m
+	two_samples_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}} {{schema:0 sum:-4 count:4 n_buckets:[1 2 1]}}
+
+# We expect to see the newest sample.
+# Unsupported by streaming engine.
+# eval instant at 10m histogram_count(two_samples_histogram)
+# 	{} 4
+
+# Unsupported by streaming engine.
+# eval instant at 10m histogram_sum(two_samples_histogram)
+# 	{} -4
+
+# Unsupported by streaming engine.
+# eval instant at 10m histogram_avg(two_samples_histogram)
+# 	{} -1
+
+# Unsupported by streaming engine.
+# eval instant at 10m histogram_fraction(-2, -1, two_samples_histogram)
+# 	{} 0.5
+
+# Unsupported by streaming engine.
+# eval instant at 10m histogram_quantile(0.5, two_samples_histogram)
+# 	{} -1.5
+
+
+
+# Add two histograms with negated data.
+load 5m
+	balanced_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}}+{{schema:0 sum:-4 count:4 n_buckets:[1 2 1]}}x1
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_count(balanced_histogram)
+# 	{} 8
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_sum(balanced_histogram)
+# 	{} 0
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_avg(balanced_histogram)
+# 	{} 0
+
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_fraction(0, 4, balanced_histogram)
+# 	{} 0.5
+
+# If the quantile happens to be located in a span of empty buckets, the actually returned value is the lower bound of
+# the first populated bucket after the span of empty buckets.
+# Unsupported by streaming engine.
+# eval instant at 5m histogram_quantile(0.5, balanced_histogram)
+# 	{} 0.5
+
+# Add histogram to test sum(last_over_time) regression
+load 5m
+    incr_sum_histogram{number="1"} {{schema:0 sum:0 count:0 buckets:[1]}}+{{schema:0 sum:1 count:1 buckets:[1]}}x10
+    incr_sum_histogram{number="2"} {{schema:0 sum:0 count:0 buckets:[1]}}+{{schema:0 sum:2 count:1 buckets:[1]}}x10
+
+# Unsupported by streaming engine.
+# eval instant at 50m histogram_sum(sum(incr_sum_histogram))
+#    {} 30
+
+# Unsupported by streaming engine.
+# eval instant at 50m histogram_sum(sum(last_over_time(incr_sum_histogram[5m])))
+#    {} 30
diff --git a/pkg/streamingpromql/testdata/upstream/native_histograms.test.disabled b/pkg/streamingpromql/testdata/upstream/native_histograms.test.disabled
deleted file mode 100644
index 392294edd3c..00000000000
--- a/pkg/streamingpromql/testdata/upstream/native_histograms.test.disabled
+++ /dev/null
@@ -1,276 +0,0 @@
-# SPDX-License-Identifier: AGPL-3.0-only
-# Provenance-includes-location: https://github.com/prometheus/prometheus/tree/main/promql/testdata/native_histograms.test
-# Provenance-includes-license: Apache-2.0
-# Provenance-includes-copyright: The Prometheus Authors
-
-# Minimal valid case: an empty histogram.
-load 5m
-	empty_histogram	{{}}
-
-eval instant at 5m empty_histogram
-	{__name__="empty_histogram"} {{}}
-
-eval instant at 5m histogram_count(empty_histogram)
-	{} 0
-
-eval instant at 5m histogram_sum(empty_histogram)
-	{} 0
-
-eval instant at 5m histogram_avg(empty_histogram)
-	{} NaN
-
-eval instant at 5m histogram_fraction(-Inf, +Inf, empty_histogram)
-	{} NaN
-
-eval instant at 5m histogram_fraction(0, 8, empty_histogram)
-	{} NaN
-
-
-
-# buckets:[1 2 1] means 1 observation in the 1st bucket, 2 observations in the 2nd and 1 observation in the 3rd (total 4).
-load 5m
-	single_histogram	{{schema:0 sum:5 count:4 buckets:[1 2 1]}}
-
-# histogram_count extracts the count property from the histogram.
-eval instant at 5m histogram_count(single_histogram)
-	{} 4
-
-# histogram_sum extracts the sum property from the histogram.
-eval instant at 5m histogram_sum(single_histogram)
-	{} 5
-
-# histogram_avg calculates the average from sum and count properties.
-eval instant at 5m histogram_avg(single_histogram)
-	{} 1.25
-
-# We expect half of the values to fall in the range 1 < x <= 2.
-eval instant at 5m histogram_fraction(1, 2, single_histogram)
-	{} 0.5
-
-# We expect all values to fall in the range 0 < x <= 8.
-eval instant at 5m histogram_fraction(0, 8, single_histogram)
-	{} 1
-
-# Median is 1.5 due to linear estimation of the midpoint of the middle bucket, whose values are within range 1 < x <= 2.
-eval instant at 5m histogram_quantile(0.5, single_histogram)
-	{} 1.5
-
-
-
-# Repeat the same histogram 10 times.
-load 5m
-	multi_histogram	{{schema:0 sum:5 count:4 buckets:[1 2 1]}}x10
-
-eval instant at 5m histogram_count(multi_histogram)
-	{} 4
-
-eval instant at 5m histogram_sum(multi_histogram)
-	{} 5
-
-eval instant at 5m histogram_avg(multi_histogram)
-	{} 1.25
-
-eval instant at 5m histogram_fraction(1, 2, multi_histogram)
-	{} 0.5
-
-eval instant at 5m histogram_quantile(0.5, multi_histogram)
-	{} 1.5
-
-
-# Each entry should look the same as the first.
-eval instant at 50m histogram_count(multi_histogram)
-	{} 4
-
-eval instant at 50m histogram_sum(multi_histogram)
-	{} 5
-
-eval instant at 50m histogram_avg(multi_histogram)
-	{} 1.25
-
-eval instant at 50m histogram_fraction(1, 2, multi_histogram)
-	{} 0.5
-
-eval instant at 50m histogram_quantile(0.5, multi_histogram)
-	{} 1.5
-
-
-
-# Accumulate the histogram addition for 10 iterations, offset is a bucket position where offset:0 is always the bucket
-# with an upper limit of 1 and offset:1 is the bucket which follows to the right. Negative offsets represent bucket
-# positions for upper limits <1 (tending toward zero), where offset:-1 is the bucket to the left of offset:0.
-load 5m
-	incr_histogram	{{schema:0 sum:4 count:4 buckets:[1 2 1]}}+{{sum:2 count:1 buckets:[1] offset:1}}x10
-
-eval instant at 5m histogram_count(incr_histogram)
-	{} 5
-
-eval instant at 5m histogram_sum(incr_histogram)
-	{} 6
-
-eval instant at 5m histogram_avg(incr_histogram)
-	{} 1.2
-
-# We expect 3/5ths of the values to fall in the range 1 < x <= 2.
-eval instant at 5m histogram_fraction(1, 2, incr_histogram)
-	{} 0.6
-
-eval instant at 5m histogram_quantile(0.5, incr_histogram)
-	{} 1.5
-
-
-eval instant at 50m incr_histogram
-	{__name__="incr_histogram"} {{count:14 sum:24 buckets:[1 12 1]}}
-
-eval instant at 50m histogram_count(incr_histogram)
-	{} 14
-
-eval instant at 50m histogram_sum(incr_histogram)
-	{} 24
-
-eval instant at 50m histogram_avg(incr_histogram)
-    {} 1.7142857142857142
-
-# We expect 12/14ths of the values to fall in the range 1 < x <= 2.
-eval instant at 50m histogram_fraction(1, 2, incr_histogram)
-	{} 0.8571428571428571
-
-eval instant at 50m histogram_quantile(0.5, incr_histogram)
-	{} 1.5
-
-# Per-second average rate of increase should be 1/(5*60) for count and buckets, then 2/(5*60) for sum.
-eval instant at 50m rate(incr_histogram[5m])
-	{} {{count:0.0033333333333333335 sum:0.006666666666666667 offset:1 buckets:[0.0033333333333333335]}}
-
-# Calculate the 50th percentile of observations over the last 10m.
-eval instant at 50m histogram_quantile(0.5, rate(incr_histogram[10m]))
-	{} 1.5
-
-
-
-# Schema represents the histogram resolution, different schema have compatible bucket boundaries, e.g.:
-#  0: 1 2 4 8 16 32 64 (higher resolution)
-# -1: 1   4   16    64  (lower resolution)
-#
-# Histograms can be merged as long as the histogram to the right is same resolution or higher.
-load 5m
-	low_res_histogram	{{schema:-1 sum:4 count:1 buckets:[1] offset:1}}+{{schema:0 sum:4 count:4 buckets:[2 2] offset:1}}x1
-
-eval instant at 5m low_res_histogram
-	{__name__="low_res_histogram"} {{schema:-1 count:5 sum:8 offset:1 buckets:[5]}}
-
-eval instant at 5m histogram_count(low_res_histogram)
-	{} 5
-
-eval instant at 5m histogram_sum(low_res_histogram)
-	{} 8
-
-eval instant at 5m histogram_avg(low_res_histogram)
-	{} 1.6
-
-# We expect all values to fall into the lower-resolution bucket with the range 1 < x <= 4.
-eval instant at 5m histogram_fraction(1, 4, low_res_histogram)
-	{} 1
-
-
-
-# z_bucket:1 means there is one observation in the zero bucket and z_bucket_w:0.5 means the zero bucket has the range
-# 0 < x <= 0.5. Sum and count are expected to represent all observations in the histogram, including those in the zero bucket.
-load 5m
-	single_zero_histogram {{schema:0 z_bucket:1 z_bucket_w:0.5 sum:0.25 count:1}}
-
-eval instant at 5m histogram_count(single_zero_histogram)
-	{} 1
-
-eval instant at 5m histogram_sum(single_zero_histogram)
-	{} 0.25
-
-eval instant at 5m histogram_avg(single_zero_histogram)
-	{} 0.25
-
-# When only the zero bucket is populated, or there are negative buckets, the distribution is assumed to be equally
-# distributed around zero; i.e. that there are an equal number of positive and negative observations. Therefore the
-# entire distribution must lie within the full range of the zero bucket, in this case: -0.5 < x <= +0.5.
-eval instant at 5m histogram_fraction(-0.5, 0.5, single_zero_histogram)
-	{} 1
-
-# Half of the observations are estimated to be zero, as this is the midpoint between -0.5 and +0.5.
-eval instant at 5m histogram_quantile(0.5, single_zero_histogram)
-	{} 0
-
-
-
-# Let's turn single_histogram upside-down.
-load 5m
-	negative_histogram {{schema:0 sum:-5 count:4 n_buckets:[1 2 1]}}
-
-eval instant at 5m histogram_count(negative_histogram)
-	{} 4
-
-eval instant at 5m histogram_sum(negative_histogram)
-	{} -5
-
-eval instant at 5m histogram_avg(negative_histogram)
-	{} -1.25
-
-# We expect half of the values to fall in the range -2 < x <= -1.
-eval instant at 5m histogram_fraction(-2, -1, negative_histogram)
-	{} 0.5
-
-eval instant at 5m histogram_quantile(0.5, negative_histogram)
-	{} -1.5
-
-
-
-# Two histogram samples.
-load 5m
-	two_samples_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}} {{schema:0 sum:-4 count:4 n_buckets:[1 2 1]}}
-
-# We expect to see the newest sample.
-eval instant at 10m histogram_count(two_samples_histogram)
-	{} 4
-
-eval instant at 10m histogram_sum(two_samples_histogram)
-	{} -4
-
-eval instant at 10m histogram_avg(two_samples_histogram)
-	{} -1
-
-eval instant at 10m histogram_fraction(-2, -1, two_samples_histogram)
-	{} 0.5
-
-eval instant at 10m histogram_quantile(0.5, two_samples_histogram)
-	{} -1.5
-
-
-
-# Add two histograms with negated data.
-load 5m
-	balanced_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}}+{{schema:0 sum:-4 count:4 n_buckets:[1 2 1]}}x1
-
-eval instant at 5m histogram_count(balanced_histogram)
-	{} 8
-
-eval instant at 5m histogram_sum(balanced_histogram)
-	{} 0
-
-eval instant at 5m histogram_avg(balanced_histogram)
-	{} 0
-
-eval instant at 5m histogram_fraction(0, 4, balanced_histogram)
-	{} 0.5
-
-# If the quantile happens to be located in a span of empty buckets, the actually returned value is the lower bound of
-# the first populated bucket after the span of empty buckets.
-eval instant at 5m histogram_quantile(0.5, balanced_histogram)
-	{} 0.5
-
-# Add histogram to test sum(last_over_time) regression
-load 5m
-    incr_sum_histogram{number="1"} {{schema:0 sum:0 count:0 buckets:[1]}}+{{schema:0 sum:1 count:1 buckets:[1]}}x10
-    incr_sum_histogram{number="2"} {{schema:0 sum:0 count:0 buckets:[1]}}+{{schema:0 sum:2 count:1 buckets:[1]}}x10
-
-eval instant at 50m histogram_sum(sum(incr_sum_histogram))
-    {} 30
-
-eval instant at 50m histogram_sum(sum(last_over_time(incr_sum_histogram[5m])))
-    {} 30
diff --git a/tools/benchmark-query-engine/README.md b/tools/benchmark-query-engine/README.md
index 96171d4d8f4..057f963d582 100644
--- a/tools/benchmark-query-engine/README.md
+++ b/tools/benchmark-query-engine/README.md
@@ -4,7 +4,7 @@ Each benchmark is run in a separate process to provide some kind of guarantee th
 
 An ingester is started in the `benchmark-query-engine` process (ie. not the benchmark process) to ensure the TSDB does not skew results.
 
-Results from `benchmark-query-engine` can be summarised with `benchstat`, as well as [`compare.sh`](../../pkg/querier/engine/streaming/compare.sh).
+Results from `benchmark-query-engine` can be summarised with `benchstat`, as well as [`compare.sh`](./compare.sh).
 
 Usage:
 
diff --git a/pkg/streamingpromql/compare.sh b/tools/benchmark-query-engine/compare.sh
similarity index 90%
rename from pkg/streamingpromql/compare.sh
rename to tools/benchmark-query-engine/compare.sh
index 5c7cbbf345d..de21345e39d 100755
--- a/pkg/streamingpromql/compare.sh
+++ b/tools/benchmark-query-engine/compare.sh
@@ -4,7 +4,7 @@
 
 set -euo pipefail
 
-RESULTS_FILE="$1" # Should be the path to a file produced by a command like `go run ./tools/benchmark-query-engine -count=6 | tee output.txt`
+RESULTS_FILE="$1" # Should be the path to a file produced by a command like `go run . -count=6 | tee output.txt`
 
 PROMETHEUS_RESULTS_FILE=$(mktemp /tmp/prometheus.XXXX)
 STREAMING_RESULTS_FILE=$(mktemp /tmp/streaming.XXXX)