diff --git a/HISTORY.md b/HISTORY.md index 214e8a6f887..1297cf2e11f 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -26,6 +26,7 @@ * In leveled compaction with dynamic levelling, level multiplier is not anymore adjusted due to oversized L0. Instead, compaction score is adjusted by increasing size level target by adding incoming bytes from upper levels. This would deprioritize compactions from upper levels if more data from L0 is coming. This is to fix some unnecessary full stalling due to drastic change of level targets, while not wasting write bandwidth for compaction while writes are overloaded. * For level compaction with `level_compaction_dynamic_level_bytes=true`, RocksDB now trivially moves levels down to fill LSM starting from bottommost level during DB open. See more in comments for option `level_compaction_dynamic_level_bytes`. * For level compaction with `level_compaction_dynamic_level_bytes=true`, RocksDB now drains unnecessary levels through background compaction automatically (#11340). This together with #11321 makes it automatic to migrate other compaction settings to level compaction with `level_compaction_dynamic_level_bytes=true`. In addition, a live DB that becomes smaller will now have unnecessary levels drained which can help to reduce read and space amp. +* If `CompactRange()` is called with `CompactRangeOptions::bottommost_level_compaction=kForce*` to compact from L0 to L1, RocksDB now will try to do trivial move from L0 to L1 and then do an intra L1 compaction, instead of a L0 to L1 compaction with trivial move disabled (#11375). ## 6.29.5 (03/29/2022) ### Bug Fixes diff --git a/db/db_compaction_filter_test.cc b/db/db_compaction_filter_test.cc index abf8b6c8f30..0c4aa16f8cb 100644 --- a/db/db_compaction_filter_test.cc +++ b/db/db_compaction_filter_test.cc @@ -753,7 +753,7 @@ TEST_F(DBTestCompactionFilter, CompactionFilterContextCfId) { } #ifndef ROCKSDB_LITE -// Compaction filters aplies to all records, regardless snapshots. +// Compaction filters applies to all records, regardless snapshots. TEST_F(DBTestCompactionFilter, CompactionFilterIgnoreSnapshot) { std::string five = ToString(5); Options options = CurrentOptions(); diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 4c18966d445..66ea698d535 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -56,11 +56,12 @@ class DBCompactionTestWithParam class DBCompactionTestWithBottommostParam : public DBTestBase, - public testing::WithParamInterface { + public testing::WithParamInterface< + std::tuple> { public: DBCompactionTestWithBottommostParam() : DBTestBase("db_compaction_test", /*env_do_fsync=*/true) { - bottommost_level_compaction_ = GetParam(); + bottommost_level_compaction_ = std::get<0>(GetParam()); } BottommostLevelCompaction bottommost_level_compaction_; @@ -5678,6 +5679,9 @@ TEST_P(DBCompactionTestWithBottommostParam, SequenceKeysManualCompaction) { constexpr int kSstNum = 10; Options options = CurrentOptions(); options.disable_auto_compactions = true; + options.num_levels = 7; + const bool dynamic_level = std::get<1>(GetParam()); + options.level_compaction_dynamic_level_bytes = dynamic_level; DestroyAndReopen(options); // Generate some sst files on level 0 with sequence keys (no overlap) @@ -5695,25 +5699,50 @@ TEST_P(DBCompactionTestWithBottommostParam, SequenceKeysManualCompaction) { auto cro = CompactRangeOptions(); cro.bottommost_level_compaction = bottommost_level_compaction_; + bool trivial_moved = false; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundCompaction:TrivialMove", + [&](void* /*arg*/) { trivial_moved = true; }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + // All bottommost_level_compaction options should allow l0 -> l1 trivial move. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); + ASSERT_TRUE(trivial_moved); if (bottommost_level_compaction_ == BottommostLevelCompaction::kForce || bottommost_level_compaction_ == BottommostLevelCompaction::kForceOptimized) { - // Real compaction to compact all sst files from level 0 to 1 file on level - // 1 - ASSERT_EQ("0,1", FilesPerLevel(0)); + // bottommost level should go through intra-level compaction + // and has only 1 file + if (dynamic_level) { + ASSERT_EQ("0,0,0,0,0,0,1", FilesPerLevel(0)); + } else { + ASSERT_EQ("0,1", FilesPerLevel(0)); + } } else { +<<<<<<< HEAD // Just trivial move from level 0 -> 1 ASSERT_EQ("0," + ToString(kSstNum), FilesPerLevel(0)); +||||||| parent of 43e9a60bb (Always allow L0->L1 trivial move during manual compaction (#11375)) + // Just trivial move from level 0 -> 1 + ASSERT_EQ("0," + std::to_string(kSstNum), FilesPerLevel(0)); +======= + // Just trivial move from level 0 -> 1/base + if (dynamic_level) { + ASSERT_EQ("0,0,0,0,0,0," + std::to_string(kSstNum), FilesPerLevel(0)); + } else { + ASSERT_EQ("0," + std::to_string(kSstNum), FilesPerLevel(0)); + } +>>>>>>> 43e9a60bb (Always allow L0->L1 trivial move during manual compaction (#11375)) } } INSTANTIATE_TEST_CASE_P( DBCompactionTestWithBottommostParam, DBCompactionTestWithBottommostParam, - ::testing::Values(BottommostLevelCompaction::kSkip, - BottommostLevelCompaction::kIfHaveCompactionFilter, - BottommostLevelCompaction::kForce, - BottommostLevelCompaction::kForceOptimized)); + ::testing::Combine( + ::testing::Values(BottommostLevelCompaction::kSkip, + BottommostLevelCompaction::kIfHaveCompactionFilter, + BottommostLevelCompaction::kForce, + BottommostLevelCompaction::kForceOptimized), + ::testing::Bool())); TEST_F(DBCompactionTest, UpdateLevelSubCompactionTest) { Options options = CurrentOptions(); diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index f7d7e4b172a..509d0308665 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -670,12 +670,16 @@ class DBImpl : public DB { // max_file_num_to_ignore allows bottom level compaction to filter out newly // compacted SST files. Setting max_file_num_to_ignore to kMaxUint64 will // disable the filtering + // If `final_output_level` is not nullptr, it is set to manual compaction's + // output level if returned status is OK, and it may or may not be set to + // manual compaction's output level if returned status is not OK. Status RunManualCompaction(ColumnFamilyData* cfd, int input_level, int output_level, const CompactRangeOptions& compact_range_options, const Slice* begin, const Slice* end, bool exclusive, bool disallow_trivial_move, - uint64_t max_file_num_to_ignore); + uint64_t max_file_num_to_ignore, + int* final_output_level = nullptr); // Return an internal iterator over the current state of the database. // The keys of this iterator are internal keys (see format.h). diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index b069db609f1..d8d11dc6114 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -1082,7 +1082,7 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options, } s = RunManualCompaction(cfd, ColumnFamilyData::kCompactAllLevels, final_output_level, options, begin, end, exclusive, - false, port::kMaxUint64); + false /* disable_trivial_move */, port::kMaxUint64); } else { int first_overlapped_level = kInvalidLevel; int max_overlapped_level = kInvalidLevel; @@ -1117,70 +1117,83 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options, CleanupSuperVersion(super_version); } if (s.ok() && first_overlapped_level != kInvalidLevel) { - // max_file_num_to_ignore can be used to filter out newly created SST - // files, useful for bottom level compaction in a manual compaction - uint64_t max_file_num_to_ignore = port::kMaxUint64; - uint64_t next_file_number = versions_->current_next_file_number(); - final_output_level = max_overlapped_level; - int output_level; - for (int level = first_overlapped_level; level <= max_overlapped_level; - level++) { - bool disallow_trivial_move = false; - // in case the compaction is universal or if we're compacting the - // bottom-most level, the output level will be the same as input one. - // level 0 can never be the bottommost level (i.e. if all files are in - // level 0, we will compact to level 1) - if (cfd->ioptions()->compaction_style == kCompactionStyleUniversal || - cfd->ioptions()->compaction_style == kCompactionStyleFIFO) { - output_level = level; - } else if (level == max_overlapped_level && level > 0) { - if (options.bottommost_level_compaction == - BottommostLevelCompaction::kSkip) { - // Skip bottommost level compaction - continue; - } else if (options.bottommost_level_compaction == - BottommostLevelCompaction::kIfHaveCompactionFilter && - cfd->ioptions()->compaction_filter == nullptr && - cfd->ioptions()->compaction_filter_factory == nullptr) { - // Skip bottommost level compaction since we don't have a compaction - // filter - continue; - } - output_level = level; - // update max_file_num_to_ignore only for bottom level compaction - // because data in newly compacted files in middle levels may still - // need to be pushed down - max_file_num_to_ignore = next_file_number; - } else { + if (cfd->ioptions()->compaction_style == kCompactionStyleUniversal || + cfd->ioptions()->compaction_style == kCompactionStyleFIFO) { + assert(first_overlapped_level == 0); + s = RunManualCompaction( + cfd, first_overlapped_level, first_overlapped_level, options, begin, + end, exclusive, true /* disallow_trivial_move */, + std::numeric_limits::max() /* max_file_num_to_ignore */, + trim_ts); + final_output_level = max_overlapped_level; + } else { + assert(cfd->ioptions()->compaction_style == kCompactionStyleLevel); + uint64_t next_file_number = versions_->current_next_file_number(); + // Start compaction from `first_overlapped_level`, one level down at a + // time, until output level >= max_overlapped_level. + // When max_overlapped_level == 0, we will still compact from L0 -> L1 + // (or LBase), and followed by a bottommost level intra-level compaction + // at L1 (or LBase), if applicable. + int level = first_overlapped_level; + final_output_level = level; + int output_level, base_level; + while (level < max_overlapped_level || level == 0) { output_level = level + 1; - if (cfd->ioptions()->compaction_style == kCompactionStyleLevel && - cfd->ioptions()->level_compaction_dynamic_level_bytes && + if (cfd->ioptions()->level_compaction_dynamic_level_bytes && level == 0) { output_level = ColumnFamilyData::kCompactToBaseLevel; } - // if it's a BottommostLevel compaction and `kForce*` compaction is - // set, disallow trivial move - if (level == max_overlapped_level && - (options.bottommost_level_compaction == - BottommostLevelCompaction::kForce || - options.bottommost_level_compaction == - BottommostLevelCompaction::kForceOptimized)) { - disallow_trivial_move = true; + // Use max value for `max_file_num_to_ignore` to always compact + // files down. + s = RunManualCompaction( + cfd, level, output_level, options, begin, end, exclusive, + !trim_ts.empty() /* disallow_trivial_move */, + std::numeric_limits::max() /* max_file_num_to_ignore */, + trim_ts, + output_level == ColumnFamilyData::kCompactToBaseLevel + ? &base_level + : nullptr); + if (!s.ok()) { + break; } + if (output_level == ColumnFamilyData::kCompactToBaseLevel) { + assert(base_level > 0); + level = base_level; + } else { + ++level; + } + final_output_level = level; + TEST_SYNC_POINT("DBImpl::RunManualCompaction()::1"); + TEST_SYNC_POINT("DBImpl::RunManualCompaction()::2"); } - s = RunManualCompaction(cfd, level, output_level, options, begin, end, - exclusive, disallow_trivial_move, - max_file_num_to_ignore); - if (!s.ok()) { - break; - } - if (output_level == ColumnFamilyData::kCompactToBaseLevel) { - final_output_level = cfd->NumberLevels() - 1; - } else if (output_level > final_output_level) { - final_output_level = output_level; + if (s.ok()) { + assert(final_output_level > 0); + // bottommost level intra-level compaction + // TODO(cbi): this preserves earlier behavior where if + // max_overlapped_level = 0 and bottommost_level_compaction is + // kIfHaveCompactionFilter, we only do a L0 -> LBase compaction + // and do not do intra-LBase compaction even when user configures + // compaction filter. We may want to still do a LBase -> LBase + // compaction in case there is some file in LBase that did not go + // through L0 -> LBase compaction, and hence did not go through + // compaction filter. + if ((options.bottommost_level_compaction == + BottommostLevelCompaction::kIfHaveCompactionFilter && + max_overlapped_level != 0 && + (cfd->ioptions()->compaction_filter != nullptr || + cfd->ioptions()->compaction_filter_factory != nullptr)) || + options.bottommost_level_compaction == + BottommostLevelCompaction::kForceOptimized || + options.bottommost_level_compaction == + BottommostLevelCompaction::kForce) { + // Use `next_file_number` as `max_file_num_to_ignore` to avoid + // rewriting newly compacted files when it is kForceOptimized. + s = RunManualCompaction( + cfd, final_output_level, final_output_level, options, begin, + end, exclusive, !trim_ts.empty() /* disallow_trivial_move */, + next_file_number /* max_file_num_to_ignore */, trim_ts); + } } - TEST_SYNC_POINT("DBImpl::RunManualCompaction()::1"); - TEST_SYNC_POINT("DBImpl::RunManualCompaction()::2"); } } } @@ -1798,7 +1811,7 @@ Status DBImpl::RunManualCompaction( ColumnFamilyData* cfd, int input_level, int output_level, const CompactRangeOptions& compact_range_options, const Slice* begin, const Slice* end, bool exclusive, bool disallow_trivial_move, - uint64_t max_file_num_to_ignore) { + uint64_t max_file_num_to_ignore, int* final_output_level) { assert(input_level == ColumnFamilyData::kCompactAllLevels || input_level >= 0); @@ -1953,6 +1966,15 @@ Status DBImpl::RunManualCompaction( } else if (!scheduled) { if (compaction == nullptr) { manual.done = true; + if (final_output_level) { + // No compaction needed or there is a conflicting compaction. + // Still set `final_output_level` to the level where we would + // have compacted to. + *final_output_level = output_level; + if (output_level == ColumnFamilyData::kCompactToBaseLevel) { + *final_output_level = cfd->current()->storage_info()->base_level(); + } + } bg_cv_.SignalAll(); continue; } @@ -1986,6 +2008,9 @@ Status DBImpl::RunManualCompaction( } scheduled = true; TEST_SYNC_POINT("DBImpl::RunManualCompaction:Scheduled"); + if (final_output_level) { + *final_output_level = compaction->output_level(); + } } } diff --git a/db/db_sst_test.cc b/db/db_sst_test.cc index b036e1ef969..68d818ba02a 100644 --- a/db/db_sst_test.cc +++ b/db/db_sst_test.cc @@ -758,9 +758,10 @@ TEST_F(DBSSTTest, RateLimitedWALDelete) { // We created 4 sst files in L0 ASSERT_EQ("4", FilesPerLevel(0)); - // Compaction will move the 4 files in L0 to trash and create 1 L1 file + // Compaction will move the 4 files in L0 to trash and create 1 L1 file. + // Use kForceOptimized to not rewrite the new L1 file. CompactRangeOptions cro; - cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; + cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); ASSERT_OK(dbfull()->TEST_WaitForCompact(true)); ASSERT_EQ("0,1", FilesPerLevel(0));