Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce SkipBlocksWithOutOfOrderChunksEnabled feature #4707

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@

## master / unreleased

* [FEATURE] Ruler: Add `external_labels` option to tag all alerts with a given set of labels.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems that those changes were not intentional

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh this one is intentional because FEATURE should go after CHANGE (I as added the PR number)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will fix the unintended change (from '*' to '-' four bullets)

* [CHANGE] Fix incorrectly named `cortex_cache_fetched_keys` and `cortex_cache_hits` metrics. Renamed to `cortex_cache_fetched_keys_total` and `cortex_cache_hits_total` respectively. #4686
* [CHANGE] Enable Thanos series limiter in store-gateway. #4702
* [CHANGE] Distributor: Apply `max_fetched_series_per_query` limit for `/series` API. #4683
* [FEATURE] Ruler: Add `external_labels` option to tag all alerts with a given set of labels.
* [FEATURE] Compactor: Add `-compactor.skip-blocks-with-out-of-order-chunks-enabled` configuration to mark blocks containing index with out-of-order chunks for no compact instead of halting the compaction

## 1.12.0 in progress

Expand Down
5 changes: 5 additions & 0 deletions docs/blocks-storage/compactor.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ compactor:
# CLI flag: -compactor.tenant-cleanup-delay
[tenant_cleanup_delay: <duration> | default = 6h]

# When enabled, mark blocks containing index with out-of-order chunks for no
# compact instead of halting the compaction.
# CLI flag: -compactor.skip-blocks-with-out-of-order-chunks-enabled
[skip_blocks_with_out_of_order_chunks_enabled: <boolean> | default = false]

# When enabled, at compactor startup the bucket will be scanned and all found
# deletion marks inside the block location will be copied to the markers
# global location too. This option can (and should) be safely disabled as soon
Expand Down
5 changes: 5 additions & 0 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -5294,6 +5294,11 @@ The `compactor_config` configures the compactor for the blocks storage.
# CLI flag: -compactor.tenant-cleanup-delay
[tenant_cleanup_delay: <duration> | default = 6h]

# When enabled, mark blocks containing index with out-of-order chunks for no
# compact instead of halting the compaction.
# CLI flag: -compactor.skip-blocks-with-out-of-order-chunks-enabled
[skip_blocks_with_out_of_order_chunks_enabled: <boolean> | default = false]

# When enabled, at compactor startup the bucket will be scanned and all found
# deletion marks inside the block location will be copied to the markers global
# location too. This option can (and should) be safely disabled as soon as the
Expand Down
84 changes: 55 additions & 29 deletions pkg/compactor/compactor.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ var (
supportedShardingStrategies = []string{util.ShardingStrategyDefault, util.ShardingStrategyShuffle}
errInvalidShardingStrategy = errors.New("invalid sharding strategy")

DefaultBlocksGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.Bucket, logger log.Logger, reg prometheus.Registerer, blocksMarkedForDeletion prometheus.Counter, garbageCollectedBlocks prometheus.Counter) compact.Grouper {
DefaultBlocksGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.Bucket, logger log.Logger, reg prometheus.Registerer, blocksMarkedForDeletion, blocksMarkedForNoCompaction, garbageCollectedBlocks prometheus.Counter) compact.Grouper {
return compact.NewDefaultGrouper(
logger,
bkt,
Expand All @@ -60,42 +60,48 @@ var (
reg,
blocksMarkedForDeletion,
garbageCollectedBlocks,
prometheus.NewCounter(prometheus.CounterOpts{}),
blocksMarkedForNoCompaction,
metadata.NoneFunc)
}

ShuffleShardingGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.Bucket, logger log.Logger, reg prometheus.Registerer, blocksMarkedForDeletion prometheus.Counter, garbageCollectedBlocks prometheus.Counter) compact.Grouper {
ShuffleShardingGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.Bucket, logger log.Logger, reg prometheus.Registerer, blocksMarkedForDeletion, blocksMarkedForNoCompaction, garbageCollectedBlocks prometheus.Counter) compact.Grouper {
return NewShuffleShardingGrouper(
logger,
bkt,
false, // Do not accept malformed indexes
true, // Enable vertical compaction
reg,
blocksMarkedForDeletion,
prometheus.NewCounter(prometheus.CounterOpts{}),
blocksMarkedForNoCompaction,
garbageCollectedBlocks,
metadata.NoneFunc,
cfg)
}

DefaultBlocksCompactorFactory = func(ctx context.Context, cfg Config, logger log.Logger, reg prometheus.Registerer) (compact.Compactor, compact.Planner, error) {
DefaultBlocksCompactorFactory = func(ctx context.Context, cfg Config, logger log.Logger, reg prometheus.Registerer) (compact.Compactor, PlannerFactory, error) {
compactor, err := tsdb.NewLeveledCompactor(ctx, reg, logger, cfg.BlockRanges.ToMilliseconds(), downsample.NewPool(), nil)
if err != nil {
return nil, nil, err
}

planner := compact.NewTSDBBasedPlanner(logger, cfg.BlockRanges.ToMilliseconds())
return compactor, planner, nil
plannerFactory := func(logger log.Logger, cfg Config, noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter) compact.Planner {
return compact.NewPlanner(logger, cfg.BlockRanges.ToMilliseconds(), noCompactionMarkFilter)
}

return compactor, plannerFactory, nil
}

ShuffleShardingBlocksCompactorFactory = func(ctx context.Context, cfg Config, logger log.Logger, reg prometheus.Registerer) (compact.Compactor, compact.Planner, error) {
ShuffleShardingBlocksCompactorFactory = func(ctx context.Context, cfg Config, logger log.Logger, reg prometheus.Registerer) (compact.Compactor, PlannerFactory, error) {
compactor, err := tsdb.NewLeveledCompactor(ctx, reg, logger, cfg.BlockRanges.ToMilliseconds(), downsample.NewPool(), nil)
if err != nil {
return nil, nil, err
}

planner := NewShuffleShardingPlanner(logger, cfg.BlockRanges.ToMilliseconds())
return compactor, planner, nil
plannerFactory := func(logger log.Logger, cfg Config, noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter) compact.Planner {

return NewShuffleShardingPlanner(logger, cfg.BlockRanges.ToMilliseconds(), noCompactionMarkFilter.NoCompactMarkedBlocks)
}
return compactor, plannerFactory, nil
}
)

Expand All @@ -107,6 +113,7 @@ type BlocksGrouperFactory func(
logger log.Logger,
reg prometheus.Registerer,
blocksMarkedForDeletion prometheus.Counter,
blocksMarkedForNoCompact prometheus.Counter,
garbageCollectedBlocks prometheus.Counter,
) compact.Grouper

Expand All @@ -116,22 +123,29 @@ type BlocksCompactorFactory func(
cfg Config,
logger log.Logger,
reg prometheus.Registerer,
) (compact.Compactor, compact.Planner, error)
) (compact.Compactor, PlannerFactory, error)

type PlannerFactory func(
logger log.Logger,
cfg Config,
noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter,
) compact.Planner

// Config holds the Compactor config.
type Config struct {
BlockRanges cortex_tsdb.DurationList `yaml:"block_ranges"`
BlockSyncConcurrency int `yaml:"block_sync_concurrency"`
MetaSyncConcurrency int `yaml:"meta_sync_concurrency"`
ConsistencyDelay time.Duration `yaml:"consistency_delay"`
DataDir string `yaml:"data_dir"`
CompactionInterval time.Duration `yaml:"compaction_interval"`
CompactionRetries int `yaml:"compaction_retries"`
CompactionConcurrency int `yaml:"compaction_concurrency"`
CleanupInterval time.Duration `yaml:"cleanup_interval"`
CleanupConcurrency int `yaml:"cleanup_concurrency"`
DeletionDelay time.Duration `yaml:"deletion_delay"`
TenantCleanupDelay time.Duration `yaml:"tenant_cleanup_delay"`
BlockRanges cortex_tsdb.DurationList `yaml:"block_ranges"`
BlockSyncConcurrency int `yaml:"block_sync_concurrency"`
MetaSyncConcurrency int `yaml:"meta_sync_concurrency"`
ConsistencyDelay time.Duration `yaml:"consistency_delay"`
DataDir string `yaml:"data_dir"`
CompactionInterval time.Duration `yaml:"compaction_interval"`
CompactionRetries int `yaml:"compaction_retries"`
CompactionConcurrency int `yaml:"compaction_concurrency"`
CleanupInterval time.Duration `yaml:"cleanup_interval"`
CleanupConcurrency int `yaml:"cleanup_concurrency"`
DeletionDelay time.Duration `yaml:"deletion_delay"`
TenantCleanupDelay time.Duration `yaml:"tenant_cleanup_delay"`
SkipBlocksWithOutOfOrderChunksEnabled bool `yaml:"skip_blocks_with_out_of_order_chunks_enabled"`

// Whether the migration of block deletion marks to the global markers location is enabled.
BlockDeletionMarksMigrationEnabled bool `yaml:"block_deletion_marks_migration_enabled"`
Expand Down Expand Up @@ -180,6 +194,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
"If 0, blocks will be deleted straight away. Note that deleting blocks immediately can cause query failures.")
f.DurationVar(&cfg.TenantCleanupDelay, "compactor.tenant-cleanup-delay", 6*time.Hour, "For tenants marked for deletion, this is time between deleting of last block, and doing final cleanup (marker files, debug files) of the tenant.")
f.BoolVar(&cfg.BlockDeletionMarksMigrationEnabled, "compactor.block-deletion-marks-migration-enabled", false, "When enabled, at compactor startup the bucket will be scanned and all found deletion marks inside the block location will be copied to the markers global location too. This option can (and should) be safely disabled as soon as the compactor has successfully run at least once.")
f.BoolVar(&cfg.SkipBlocksWithOutOfOrderChunksEnabled, "compactor.skip-blocks-with-out-of-order-chunks-enabled", false, "When enabled, mark blocks containing index with out-of-order chunks for no compact instead of halting the compaction.")

f.Var(&cfg.EnabledTenants, "compactor.enabled-tenants", "Comma separated list of tenants that can be compacted. If specified, only these tenants will be compacted by compactor, otherwise all tenants can be compacted. Subject to sharding.")
f.Var(&cfg.DisabledTenants, "compactor.disabled-tenants", "Comma separated list of tenants that cannot be compacted by this compactor. If specified, and compactor would normally pick given tenant for compaction (via -compactor.enabled-tenants or sharding), it will be ignored instead.")
Expand Down Expand Up @@ -231,9 +246,10 @@ type Compactor struct {
// Blocks cleaner is responsible to hard delete blocks marked for deletion.
blocksCleaner *BlocksCleaner

// Underlying compactor and planner used to compact TSDB blocks.
// Underlying compactor used to compact TSDB blocks.
blocksCompactor compact.Compactor
blocksPlanner compact.Planner

blocksPlannerFactory PlannerFactory

// Client used to run operations on the bucket storing blocks.
bucketClient objstore.Bucket
Expand All @@ -255,6 +271,7 @@ type Compactor struct {
compactionRunFailedTenants prometheus.Gauge
compactionRunInterval prometheus.Gauge
blocksMarkedForDeletion prometheus.Counter
blocksMarkedForNoCompaction prometheus.Counter
garbageCollectedBlocks prometheus.Counter

// TSDB syncer metrics
Expand Down Expand Up @@ -357,6 +374,10 @@ func newCompactor(
Help: blocksMarkedForDeletionHelp,
ConstLabels: prometheus.Labels{"reason": "compaction"},
}),
blocksMarkedForNoCompaction: promauto.With(registerer).NewCounter(prometheus.CounterOpts{
Name: "cortex_compactor_blocks_marked_for_no_compaction_total",
Help: "Total number of blocks marked for no compact during a compaction run.",
}),
garbageCollectedBlocks: promauto.With(registerer).NewCounter(prometheus.CounterOpts{
Name: "cortex_compactor_garbage_collected_blocks_total",
Help: "Total number of blocks marked for deletion by compactor.",
Expand Down Expand Up @@ -389,7 +410,7 @@ func (c *Compactor) starting(ctx context.Context) error {
}

// Create blocks compactor dependencies.
c.blocksCompactor, c.blocksPlanner, err = c.blocksCompactorFactory(ctx, c.compactorCfg, c.logger, c.registerer)
c.blocksCompactor, c.blocksPlannerFactory, err = c.blocksCompactorFactory(ctx, c.compactorCfg, c.logger, c.registerer)
if err != nil {
return errors.Wrap(err, "failed to initialize compactor dependencies")
}
Expand Down Expand Up @@ -657,6 +678,10 @@ func (c *Compactor) compactUser(ctx context.Context, userID string) error {
0,
c.compactorCfg.MetaSyncConcurrency)

// Filters out blocks with no compaction maker; blocks can be marked as no compaction for reasons like
// out of order chunks or index file too big.
noCompactMarkerFilter := compact.NewGatherNoCompactionMarkFilter(ulogger, bucket, c.compactorCfg.MetaSyncConcurrency)

fetcher, err := block.NewMetaFetcher(
ulogger,
c.compactorCfg.MetaSyncConcurrency,
Expand All @@ -671,6 +696,7 @@ func (c *Compactor) compactUser(ctx context.Context, userID string) error {
block.NewConsistencyDelayMetaFilter(ulogger, c.compactorCfg.ConsistencyDelay, reg),
ignoreDeletionMarkFilter,
deduplicateBlocksFilter,
noCompactMarkerFilter,
},
nil,
)
Expand All @@ -696,13 +722,13 @@ func (c *Compactor) compactUser(ctx context.Context, userID string) error {
compactor, err := compact.NewBucketCompactor(
ulogger,
syncer,
c.blocksGrouperFactory(ctx, c.compactorCfg, bucket, ulogger, reg, c.blocksMarkedForDeletion, c.garbageCollectedBlocks),
c.blocksPlanner,
c.blocksGrouperFactory(ctx, c.compactorCfg, bucket, ulogger, reg, c.blocksMarkedForDeletion, c.blocksMarkedForNoCompaction, c.garbageCollectedBlocks),
c.blocksPlannerFactory(ulogger, c.compactorCfg, noCompactMarkerFilter),
c.blocksCompactor,
path.Join(c.compactorCfg.DataDir, "compact"),
bucket,
c.compactorCfg.CompactionConcurrency,
false,
c.compactorCfg.SkipBlocksWithOutOfOrderChunksEnabled,
)
if err != nil {
return errors.Wrap(err, "failed to create bucket compactor")
Expand Down
Loading