diff --git a/CHANGELOG.md b/CHANGELOG.md index f05885dbbe..c9be78a554 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#5995](https://github.com/thanos-io/thanos/pull/5995) Sidecar: Loads the TLS certificate during startup. - [#6044](https://github.com/thanos-io/thanos/pull/6044) Receive: mark ouf of window errors as conflict, if out-of-window samples ingestion is activated +- [#6050](https://github.com/thanos-io/thanos/pull/6050) Store: Re-try bucket store initial sync upon failure. - [#6066](https://github.com/thanos-io/thanos/pull/6066) Tracing: fixed panic because of nil sampler - [#6067](https://github.com/thanos-io/thanos/pull/6067) Receive: fixed panic when querying uninitialized TSDBs. diff --git a/cmd/thanos/store.go b/cmd/thanos/store.go index 30df09ba5e..abae34d955 100644 --- a/cmd/thanos/store.go +++ b/cmd/thanos/store.go @@ -49,6 +49,11 @@ import ( "github.com/thanos-io/thanos/pkg/ui" ) +const ( + retryTimeoutDuration = 30 + retryIntervalDuration = 10 +) + type storeConfig struct { indexCacheConfigs extflag.PathOrContent objStoreConfig extflag.PathOrContent @@ -381,14 +386,25 @@ func runStore( level.Info(logger).Log("msg", "initializing bucket store") begin := time.Now() - if err := bs.InitialSync(ctx); err != nil { + + // This will stop retrying after set timeout duration. + initialSyncCtx, cancel := context.WithTimeout(ctx, retryTimeoutDuration*time.Second) + defer cancel() + + // Retry in case of error. + err := runutil.Retry(retryIntervalDuration*time.Second, initialSyncCtx.Done(), func() error { + return bs.InitialSync(ctx) + }) + + if err != nil { close(bucketStoreReady) return errors.Wrap(err, "bucket store initial sync") } + level.Info(logger).Log("msg", "bucket store ready", "init_duration", time.Since(begin).String()) close(bucketStoreReady) - err := runutil.Repeat(conf.syncInterval, ctx.Done(), func() error { + err = runutil.Repeat(conf.syncInterval, ctx.Done(), func() error { if err := bs.SyncBlocks(ctx); err != nil { level.Warn(logger).Log("msg", "syncing blocks failed", "err", err) }