diff --git a/db/blob/db_blob_basic_test.cc b/db/blob/db_blob_basic_test.cc index ef48844b432..e41933d3549 100644 --- a/db/blob/db_blob_basic_test.cc +++ b/db/blob/db_blob_basic_test.cc @@ -1182,6 +1182,30 @@ TEST_F(DBBlobBasicTest, GetMergeBlobWithPut) { ASSERT_EQ(Get("Key1"), "v1,v2,v3"); } +TEST_F(DBBlobBasicTest, GetMergeBlobFromMemoryTier) { + Options options = GetDefaultOptions(); + options.merge_operator = MergeOperators::CreateStringAppendOperator(); + options.enable_blob_files = true; + options.min_blob_size = 0; + + Reopen(options); + + ASSERT_OK(Put(Key(0), "v1")); + ASSERT_OK(Flush()); + ASSERT_OK(Merge(Key(0), "v2")); + ASSERT_OK(Flush()); + + // Regular `Get()` loads data block to cache. + std::string value; + ASSERT_OK(db_->Get(ReadOptions(), Key(0), &value)); + ASSERT_EQ("v1,v2", value); + + // Base value blob is still uncached, so an in-memory read will fail. + ReadOptions read_options; + read_options.read_tier = kBlockCacheTier; + ASSERT_TRUE(db_->Get(read_options, Key(0), &value).IsIncomplete()); +} + TEST_F(DBBlobBasicTest, MultiGetMergeBlobWithPut) { constexpr size_t num_keys = 3; diff --git a/db/table_cache.cc b/db/table_cache.cc index 13cbbe58418..02956c7c29c 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -395,7 +395,7 @@ uint64_t TableCache::CreateRowCacheKeyPrefix(const ReadOptions& options, bool TableCache::GetFromRowCache(const Slice& user_key, IterKey& row_cache_key, size_t prefix_size, GetContext* get_context, - SequenceNumber seq_no) { + Status* read_status, SequenceNumber seq_no) { bool found = false; row_cache_key.TrimAppend(prefix_size, user_key.data(), user_key.size()); @@ -414,8 +414,8 @@ bool TableCache::GetFromRowCache(const Slice& user_key, IterKey& row_cache_key, row_cache.RegisterReleaseAsCleanup(row_handle, value_pinner); // If row cache hit, knowing cache key is the same to row_cache_key, // can use row_cache_key's seq no to construct InternalKey. - replayGetContextLog(*row_cache.Value(row_handle), user_key, get_context, - &value_pinner, seq_no); + *read_status = replayGetContextLog(*row_cache.Value(row_handle), user_key, + get_context, &value_pinner, seq_no); RecordTick(ioptions_.stats, ROW_CACHE_HIT); found = true; } else { @@ -440,21 +440,20 @@ Status TableCache::Get( // Check row cache if enabled. // Reuse row_cache_key sequence number when row cache hits. + Status s; if (ioptions_.row_cache && !get_context->NeedToReadSequence()) { auto user_key = ExtractUserKey(k); uint64_t cache_entry_seq_no = CreateRowCacheKeyPrefix(options, fd, k, get_context, row_cache_key); done = GetFromRowCache(user_key, row_cache_key, row_cache_key.Size(), - get_context, cache_entry_seq_no); + get_context, &s, cache_entry_seq_no); if (!done) { row_cache_entry = &row_cache_entry_buffer; } } - Status s; TableReader* t = fd.table_reader; TypedHandle* handle = nullptr; - if (!done) { - assert(s.ok()); + if (s.ok() && !done) { if (t == nullptr) { s = FindTable(options, file_options_, internal_comparator, file_meta, &handle, block_protection_bytes_per_key, prefix_extractor, diff --git a/db/table_cache.h b/db/table_cache.h index ae3fc93c376..cf3cd25c914 100644 --- a/db/table_cache.h +++ b/db/table_cache.h @@ -273,6 +273,7 @@ class TableCache { // user key to row_cache_key at offset prefix_size bool GetFromRowCache(const Slice& user_key, IterKey& row_cache_key, size_t prefix_size, GetContext* get_context, + Status* read_status, SequenceNumber seq_no = kMaxSequenceNumber); const ImmutableOptions& ioptions_; @@ -286,4 +287,4 @@ class TableCache { std::string db_session_id_; }; -} // namespace ROCKSDB_NAMESPACE \ No newline at end of file +} // namespace ROCKSDB_NAMESPACE diff --git a/db/table_cache_sync_and_async.h b/db/table_cache_sync_and_async.h index a06b7d43164..f069c8b8055 100644 --- a/db/table_cache_sync_and_async.h +++ b/db/table_cache_sync_and_async.h @@ -50,8 +50,14 @@ DEFINE_SYNC_AND_ASYNC(Status, TableCache::MultiGet) GetContext* get_context = miter->get_context; - if (GetFromRowCache(user_key, row_cache_key, row_cache_key_prefix_size, - get_context)) { + Status read_status; + bool ret = + GetFromRowCache(user_key, row_cache_key, row_cache_key_prefix_size, + get_context, &read_status); + if (!read_status.ok()) { + CO_RETURN read_status; + } + if (ret) { table_range.SkipKey(miter); } else { row_cache_entries.emplace_back(); diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index b46619eddc9..432f9ffa62b 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -2335,11 +2335,18 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, biter.key(), &parsed_key, false /* log_err_key */); // TODO if (!pik_status.ok()) { s = pik_status; + break; } - if (!get_context->SaveValue( - parsed_key, biter.value(), &matched, - biter.IsValuePinned() ? &biter : nullptr)) { + Status read_status; + bool ret = get_context->SaveValue( + parsed_key, biter.value(), &matched, &read_status, + biter.IsValuePinned() ? &biter : nullptr); + if (!read_status.ok()) { + s = read_status; + break; + } + if (!ret) { if (get_context->State() == GetContext::GetState::kFound) { does_referenced_key_exist = true; referenced_data_size = biter.key().size() + biter.value().size(); @@ -2348,7 +2355,9 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, break; } } - s = biter.status(); + if (s.ok()) { + s = biter.status(); + } if (!s.ok()) { break; } diff --git a/table/block_based/block_based_table_reader_sync_and_async.h b/table/block_based/block_based_table_reader_sync_and_async.h index 98cf73dcacf..6350560c1d3 100644 --- a/table/block_based/block_based_table_reader_sync_and_async.h +++ b/table/block_based/block_based_table_reader_sync_and_async.h @@ -735,9 +735,17 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet) biter->key(), &parsed_key, false /* log_err_key */); // TODO if (!pik_status.ok()) { s = pik_status; + break; + } + Status read_status; + bool ret = get_context->SaveValue( + parsed_key, biter->value(), &matched, &read_status, + value_pinner ? value_pinner : nullptr); + if (!read_status.ok()) { + s = read_status; + break; } - if (!get_context->SaveValue(parsed_key, biter->value(), &matched, - value_pinner)) { + if (!ret) { if (get_context->State() == GetContext::GetState::kFound) { does_referenced_key_exist = true; referenced_data_size = @@ -746,7 +754,9 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet) done = true; break; } - s = biter->status(); + if (s.ok()) { + s = biter->status(); + } } // Write the block cache access. // XXX: There appear to be 'break' statements above that bypass this diff --git a/table/cuckoo/cuckoo_table_reader.cc b/table/cuckoo/cuckoo_table_reader.cc index d74a0b041f8..5be1ebc19ec 100644 --- a/table/cuckoo/cuckoo_table_reader.cc +++ b/table/cuckoo/cuckoo_table_reader.cc @@ -186,7 +186,10 @@ Status CuckooTableReader::Get(const ReadOptions& /*readOptions*/, return s; } bool dont_care __attribute__((__unused__)); - get_context->SaveValue(found_ikey, value, &dont_care); + get_context->SaveValue(found_ikey, value, &dont_care, &s); + if (!s.ok()) { + return s; + } } // We don't support merge operations. So, we return here. return Status::OK(); diff --git a/table/get_context.cc b/table/get_context.cc index 16251f93a9c..763bd8d197e 100644 --- a/table/get_context.cc +++ b/table/get_context.cc @@ -221,7 +221,7 @@ void GetContext::ReportCounters() { bool GetContext::SaveValue(const ParsedInternalKey& parsed_key, const Slice& value, bool* matched, - Cleanable* value_pinner) { + Status* read_status, Cleanable* value_pinner) { assert(matched); assert((state_ != kMerge && parsed_key.type != kTypeMerge) || merge_context_ != nullptr); @@ -356,8 +356,8 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key, // merge_context_->operand_list if (type == kTypeBlobIndex) { PinnableSlice pin_val; - if (GetBlobValue(parsed_key.user_key, unpacked_value, &pin_val) == - false) { + if (GetBlobValue(parsed_key.user_key, unpacked_value, &pin_val, + read_status) == false) { return false; } Slice blob_value(pin_val); @@ -383,8 +383,8 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key, assert(merge_operator_ != nullptr); if (type == kTypeBlobIndex) { PinnableSlice pin_val; - if (GetBlobValue(parsed_key.user_key, unpacked_value, &pin_val) == - false) { + if (GetBlobValue(parsed_key.user_key, unpacked_value, &pin_val, + read_status) == false) { return false; } Slice blob_value(pin_val); @@ -547,14 +547,14 @@ void GetContext::MergeWithWideColumnBaseValue(const Slice& entity) { } bool GetContext::GetBlobValue(const Slice& user_key, const Slice& blob_index, - PinnableSlice* blob_value) { + PinnableSlice* blob_value, Status* read_status) { constexpr FilePrefetchBuffer* prefetch_buffer = nullptr; constexpr uint64_t* bytes_read = nullptr; - Status status = blob_fetcher_->FetchBlob( - user_key, blob_index, prefetch_buffer, blob_value, bytes_read); - if (!status.ok()) { - if (status.IsIncomplete()) { + *read_status = blob_fetcher_->FetchBlob(user_key, blob_index, prefetch_buffer, + blob_value, bytes_read); + if (!read_status->ok()) { + if (read_status->IsIncomplete()) { // FIXME: this code is not covered by unit tests MarkKeyMayExist(); return false; @@ -577,9 +577,9 @@ void GetContext::push_operand(const Slice& value, Cleanable* value_pinner) { } } -void replayGetContextLog(const Slice& replay_log, const Slice& user_key, - GetContext* get_context, Cleanable* value_pinner, - SequenceNumber seq_no) { +Status replayGetContextLog(const Slice& replay_log, const Slice& user_key, + GetContext* get_context, Cleanable* value_pinner, + SequenceNumber seq_no) { Slice s = replay_log; Slice ts; size_t ts_sz = get_context->TimestampSize(); @@ -610,8 +610,13 @@ void replayGetContextLog(const Slice& replay_log, const Slice& user_key, (void)ret; - get_context->SaveValue(ikey, value, &dont_care, value_pinner); + Status read_status; + get_context->SaveValue(ikey, value, &dont_care, &read_status, value_pinner); + if (!read_status.ok()) { + return read_status; + } } + return Status::OK(); } } // namespace ROCKSDB_NAMESPACE diff --git a/table/get_context.h b/table/get_context.h index da41631fc40..ada479001c9 100644 --- a/table/get_context.h +++ b/table/get_context.h @@ -135,7 +135,8 @@ class GetContext { // Returns True if more keys need to be read (due to merges) or // False if the complete value has been found. bool SaveValue(const ParsedInternalKey& parsed_key, const Slice& value, - bool* matched, Cleanable* value_pinner = nullptr); + bool* matched, Status* read_status, + Cleanable* value_pinner = nullptr); // Simplified version of the previous function. Should only be used when we // know that the operation is a Put. @@ -204,7 +205,7 @@ class GetContext { void MergeWithWideColumnBaseValue(const Slice& entity); bool GetBlobValue(const Slice& user_key, const Slice& blob_index, - PinnableSlice* blob_value); + PinnableSlice* blob_value, Status* read_status); void appendToReplayLog(ValueType type, Slice value, Slice ts); @@ -250,9 +251,9 @@ class GetContext { // Call this to replay a log and bring the get_context up to date. The replay // log must have been created by another GetContext object, whose replay log // must have been set by calling GetContext::SetReplayLog(). -void replayGetContextLog(const Slice& replay_log, const Slice& user_key, - GetContext* get_context, - Cleanable* value_pinner = nullptr, - SequenceNumber seq_no = kMaxSequenceNumber); +Status replayGetContextLog(const Slice& replay_log, const Slice& user_key, + GetContext* get_context, + Cleanable* value_pinner = nullptr, + SequenceNumber seq_no = kMaxSequenceNumber); } // namespace ROCKSDB_NAMESPACE diff --git a/table/mock_table.cc b/table/mock_table.cc index ef12060d741..14fbb3f1d07 100644 --- a/table/mock_table.cc +++ b/table/mock_table.cc @@ -220,7 +220,13 @@ Status MockTableReader::Get(const ReadOptions&, const Slice& key, } bool dont_care __attribute__((__unused__)); - if (!get_context->SaveValue(parsed_key, iter->value(), &dont_care)) { + Status read_status; + bool ret = get_context->SaveValue(parsed_key, iter->value(), &dont_care, + &read_status); + if (!read_status.ok()) { + return read_status; + } + if (!ret) { break; } } diff --git a/table/plain/plain_table_reader.cc b/table/plain/plain_table_reader.cc index 595dfb10d26..d3c968f73a9 100644 --- a/table/plain/plain_table_reader.cc +++ b/table/plain/plain_table_reader.cc @@ -614,8 +614,12 @@ Status PlainTableReader::Get(const ReadOptions& /*ro*/, const Slice& target, // can we enable the fast path? if (internal_comparator_.Compare(found_key, parsed_target) >= 0) { bool dont_care __attribute__((__unused__)); - if (!get_context->SaveValue(found_key, found_value, &dont_care, - dummy_cleanable_.get())) { + bool ret = get_context->SaveValue(found_key, found_value, &dont_care, &s, + dummy_cleanable_.get()); + if (!s.ok()) { + return s; + } + if (!ret) { break; } } diff --git a/unreleased_history/bug_fixes/blockcachetier_blob_as_mergebase.md b/unreleased_history/bug_fixes/blockcachetier_blob_as_mergebase.md new file mode 100644 index 00000000000..13ee51a827c --- /dev/null +++ b/unreleased_history/bug_fixes/blockcachetier_blob_as_mergebase.md @@ -0,0 +1 @@ +* Fixed `kBlockCacheTier` reads to return `Status::Incomplete` when I/O is needed to fetch a merge chain's base value from a blob file.