diff --git a/CHANGELOG.md b/CHANGELOG.md index 88870e9fb03..1778deeacab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ ### Jsonnet ### Mimirtool +* [BUGFIX] Fix out of bounds error on export with large timespans and/or series count. #5700 ### Mimir Continuous Test diff --git a/pkg/mimirtool/backfill/backfill.go b/pkg/mimirtool/backfill/backfill.go index c3320c9b319..fe20400bd24 100644 --- a/pkg/mimirtool/backfill/backfill.go +++ b/pkg/mimirtool/backfill/backfill.go @@ -90,7 +90,7 @@ func CreateBlocks(input IteratorCreator, mint, maxt int64, maxSamplesInAppender var wroteHeader bool - for t := mint; t <= maxt; t = t + blockDuration { + for t := mint; t <= maxt; t = t + blockDuration/2 { err := func() error { w, err := tsdb.NewBlockWriter(log.NewNopLogger(), outputDir, blockDuration) if err != nil { @@ -106,7 +106,7 @@ func CreateBlocks(input IteratorCreator, mint, maxt int64, maxSamplesInAppender ctx := context.Background() app := w.Appender(ctx) i := input() - tsUpper := t + blockDuration + tsUpper := t + blockDuration/2 samplesCount := 0 for { err := i.Next() diff --git a/pkg/mimirtool/commands/remote_read.go b/pkg/mimirtool/commands/remote_read.go index ec6d6d8f862..4bfb8bd117b 100644 --- a/pkg/mimirtool/commands/remote_read.go +++ b/pkg/mimirtool/commands/remote_read.go @@ -416,8 +416,7 @@ func (c *RemoteReadCommand) export(_ *kingpin.ParseContext) error { if err != nil { return err } - - iterator := func() backfill.Iterator { + iteratorCreator := func() backfill.Iterator { return newTimeSeriesIterator(timeseries) } @@ -434,7 +433,7 @@ func (c *RemoteReadCommand) export(_ *kingpin.ParseContext) error { defer pipeR.Close() log.Infof("Store TSDB blocks in '%s'", c.tsdbPath) - if err := backfill.CreateBlocks(iterator, int64(mint), int64(maxt), 1000, c.tsdbPath, true, pipeW); err != nil { + if err := backfill.CreateBlocks(iteratorCreator, int64(mint), int64(maxt), 1000, c.tsdbPath, true, pipeW); err != nil { return err } diff --git a/pkg/mimirtool/commands/remote_read_test.go b/pkg/mimirtool/commands/remote_read_test.go index 45ae17b943a..8c589fa7782 100644 --- a/pkg/mimirtool/commands/remote_read_test.go +++ b/pkg/mimirtool/commands/remote_read_test.go @@ -6,12 +6,16 @@ package commands import ( + "fmt" "io" "testing" + "time" "github.com/pkg/errors" "github.com/prometheus/prometheus/prompb" "github.com/stretchr/testify/assert" + + "github.com/grafana/mimir/pkg/mimirtool/backfill" ) func TestTimeSeriesIterator(t *testing.T) { @@ -150,3 +154,41 @@ func TestTimeSeriesIterator(t *testing.T) { } } + +// TestEarlyCommit writes samples of many series that don't fit into the same +// append commit. It makes sure that batching the samples into many commits +// doesn't cause the appends to advance the head block too far and make future +// appends invalid. +func TestEarlyCommit(t *testing.T) { + maxSamplesPerBlock := 1000 + series := 100 + samples := 140 + + start := int64(time.Date(2023, 8, 30, 11, 42, 17, 0, time.UTC).UnixNano()) + inc := int64(time.Minute / time.Millisecond) + end := start + (inc * int64(samples)) + ts := make([]*prompb.TimeSeries, series) + for i := 0; i < series; i++ { + s := &prompb.TimeSeries{ + Labels: []prompb.Label{ + { + Name: "__name__", + Value: fmt.Sprintf("metric_%d", i), + }, + }, + Samples: make([]prompb.Sample, samples), + } + for j := 0; j < samples; j++ { + s.Samples[j] = prompb.Sample{ + Value: float64(j), + Timestamp: start + (inc * int64(j)), + } + } + ts[i] = s + } + iterator := func() backfill.Iterator { + return newTimeSeriesIterator(ts) + } + err := backfill.CreateBlocks(iterator, start, end, maxSamplesPerBlock, t.TempDir(), true, io.Discard) + assert.NoError(t, err) +}