grafana · pracucci · Mar 7, 2023 · Feb 20, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -49,7 +49,7 @@ Querying with using `{__mimir_storage__="ephemeral"}` selector no longer works.
 * [ENHANCEMENT] Query-frontend: track `cortex_frontend_query_response_codec_duration_seconds` and `cortex_frontend_query_response_codec_payload_bytes` metrics to measure the time taken and bytes read / written while encoding and decoding query result payloads. #4110
 * [ENHANCEMENT] Alertmanager: expose additional upstream metrics `cortex_alertmanager_dispatcher_aggregation_groups`, `cortex_alertmanager_dispatcher_alert_processing_duration_seconds`. #4151
 * [ENHANCEMENT] Querier and query-frontend: add experimental, more performant protobuf query result response format enabled with `-query-frontend.query-result-response-format=protobuf`. #4153 #4304 #4318 #4375
-* [ENHANCEMENT] Store-gateway: use more efficient chunks fetching and caching. This should reduce CPU, memory utilization, and receive bandwidth of a store-gateway. Enable with `-blocks-storage.bucket-store.chunks-cache.fine-grained-chunks-caching-enabled=true`. #4163 #4174 #4227
+* [ENHANCEMENT] Store-gateway: use more efficient chunks fetching and caching. This should reduce CPU, memory utilization, and receive bandwidth of a store-gateway. Enable with `-blocks-storage.bucket-store.chunks-cache.fine-grained-chunks-caching-enabled=true`. #4163 #4174 #4227 #4255
 * [ENHANCEMENT] Query-frontend: Wait for in-flight queries to finish before shutting down. #4073 #4170
 * [ENHANCEMENT] Store-gateway: added `encode` and `other` stage to `cortex_bucket_store_series_request_stage_duration_seconds` metric. #4179
 * [ENHANCEMENT] Ingester: log state of TSDB when shipping or forced compaction can't be done due to unexpected state of TSDB. #4211

@@ -64,25 +64,12 @@ func getCacheOptions(slabs *pool.SafeSlabPool[byte]) []cache.Option {
 	var opts []cache.Option
 
 	if slabs != nil {
-		opts = append(opts, cache.WithAllocator(&slabPoolAdapter{pool: slabs}))
+		opts = append(opts, cache.WithAllocator(pool.NewSafeSlabPoolAllocator(slabs)))
 	}
 
 	return opts
 }
 
-type slabPoolAdapter struct {
-	pool *pool.SafeSlabPool[byte]
-}
-
-func (s *slabPoolAdapter) Get(sz int) *[]byte {
-	b := s.pool.Get(sz)
-	return &b
-}
-
-func (s *slabPoolAdapter) Put(_ *[]byte) {
-	// no-op
-}
-
 // CachingBucket implementation that provides some caching features, based on passed configuration.
 type CachingBucket struct {
 	objstore.Bucket

@@ -15,6 +15,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/prometheus/tsdb/chunks"
 
+	"github.com/grafana/mimir/pkg/util/pool"
 	"github.com/grafana/mimir/pkg/util/spanlogger"
 )
 
@@ -27,7 +28,7 @@ type Range struct {
 }
 
 type Cache interface {
-	FetchMultiChunks(ctx context.Context, userID string, ranges []Range) (hits map[Range][]byte)
+	FetchMultiChunks(ctx context.Context, userID string, ranges []Range, chunksPool *pool.SafeSlabPool[byte]) (hits map[Range][]byte)
 	StoreChunks(ctx context.Context, userID string, ranges map[Range][]byte)
 }
 
@@ -43,8 +44,8 @@ func NewTracingCache(c Cache, l log.Logger) TracingCache {
 	}
 }
 
-func (c TracingCache) FetchMultiChunks(ctx context.Context, userID string, ranges []Range) (hits map[Range][]byte) {
-	hits = c.c.FetchMultiChunks(ctx, userID, ranges)
+func (c TracingCache) FetchMultiChunks(ctx context.Context, userID string, ranges []Range, chunksPool *pool.SafeSlabPool[byte]) (hits map[Range][]byte) {
+	hits = c.c.FetchMultiChunks(ctx, userID, ranges, chunksPool)
 
 	l := spanlogger.FromContext(ctx, c.l)
 	level.Debug(l).Log(
@@ -79,7 +80,7 @@ type ChunksCache struct {
 
 type NoopCache struct{}
 
-func (NoopCache) FetchMultiChunks(ctx context.Context, userID string, ranges []Range) (hits map[Range][]byte) {
+func (NoopCache) FetchMultiChunks(ctx context.Context, userID string, ranges []Range, chunksPool *pool.SafeSlabPool[byte]) (hits map[Range][]byte) {
 	return nil
 }
 
@@ -107,7 +108,7 @@ func NewChunksCache(logger log.Logger, client cache.Cache, reg prometheus.Regist
 	return c, nil
 }
 
-func (c *ChunksCache) FetchMultiChunks(ctx context.Context, userID string, ranges []Range) (hits map[Range][]byte) {
+func (c *ChunksCache) FetchMultiChunks(ctx context.Context, userID string, ranges []Range, chunksPool *pool.SafeSlabPool[byte]) (hits map[Range][]byte) {
 	keysMap := make(map[string]Range, len(ranges))
 	keys := make([]string, len(ranges))
 	for i, r := range ranges {
@@ -116,7 +117,7 @@ func (c *ChunksCache) FetchMultiChunks(ctx context.Context, userID string, range
 		keys[i] = k
 	}
 
-	hitBytes := c.cache.Fetch(ctx, keys)
+	hitBytes := c.cache.Fetch(ctx, keys, cache.WithAllocator(pool.NewSafeSlabPoolAllocator(chunksPool)))
 	if len(hitBytes) > 0 {
 		hits = make(map[Range][]byte, len(hitBytes))
 		for key, b := range hitBytes {

@@ -99,7 +99,7 @@ func TestDskitChunksCache_FetchMultiChunks(t *testing.T) {
 			}
 
 			// Fetch postings from cached and assert on it.
-			hits := c.FetchMultiChunks(ctx, testData.fetchUserID, testData.fetchRanges)
+			hits := c.FetchMultiChunks(ctx, testData.fetchUserID, testData.fetchRanges, nil)
 			assert.Equal(t, testData.expectedHits, hits)
 
 			// Assert on metrics.

@@ -30,8 +30,10 @@ const (
 	// number of chunks (across series).
 	seriesChunksSlabSize = 1000
 
-	// Selected so that chunks typically fit within the slab size (16 KiB)
-	chunkBytesSlabSize = 16 * 1024
+	// Selected so that many chunks fit within the slab size with low fragmentation, either when
+	// fine-grained chunks cache is enabled (byte slices have variable size and contain many chunks) or disabled (byte slices
+	// are at most 16KB each).
+	chunkBytesSlabSize = 160 * 1024
 )
 
 var (
@@ -388,13 +390,16 @@ func (c *loadingSeriesChunksSetIterator) Next() (retHasNext bool) {
 		}
 	}()
 
+	// Create a batched memory pool that can be released all at once.
+	chunksPool := pool.NewSafeSlabPool[byte](chunkBytesSlicePool, chunkBytesSlabSize)
+
 	// The series slice is guaranteed to have at least the requested capacity,
 	// so can safely expand it.
 	nextSet.series = nextSet.series[:nextUnloaded.len()]
 
 	var cachedRanges map[chunkscache.Range][]byte
 	if c.cache != nil {
-		cachedRanges = c.cache.FetchMultiChunks(c.ctx, c.userID, toCacheKeys(nextUnloaded.series))
+		cachedRanges = c.cache.FetchMultiChunks(c.ctx, c.userID, toCacheKeys(nextUnloaded.series), chunksPool)
 		c.recordCachedChunks(cachedRanges)
 	}
 	c.chunkReaders.reset()
@@ -434,9 +439,6 @@ func (c *loadingSeriesChunksSetIterator) Next() (retHasNext bool) {
 		}
 	}
 
-	// Create a batched memory pool that can be released all at once.
-	chunksPool := pool.NewSafeSlabPool[byte](chunkBytesSlicePool, chunkBytesSlabSize)
-
 	err := c.chunkReaders.load(nextSet.series, chunksPool, c.stats)
 	if err != nil {
 		c.err = errors.Wrap(err, "loading chunks")

@@ -520,7 +520,7 @@ func (b testBlock) toSeriesChunkRefs(seriesIndex int) seriesChunkRefs {
 	return b.toSeriesChunkRefsWithNRanges(seriesIndex, 1)
 }
 
-func TestRangeLoadingSeriesChunksSetIterator(t *testing.T) {
+func TestLoadingSeriesChunksSetIterator(t *testing.T) {
 	block1 := testBlock{
 		ulid:   ulid.MustNew(1, nil),
 		series: generateSeriesEntries(t, 10),
@@ -1290,7 +1290,7 @@ func newInMemoryChunksCache() chunkscache.Cache {
 	}
 }
 
-func (c *inMemoryChunksCache) FetchMultiChunks(ctx context.Context, userID string, ranges []chunkscache.Range) map[chunkscache.Range][]byte {
+func (c *inMemoryChunksCache) FetchMultiChunks(ctx context.Context, userID string, ranges []chunkscache.Range, chunksPool *pool.SafeSlabPool[byte]) map[chunkscache.Range][]byte {
 	hits := make(map[chunkscache.Range][]byte, len(ranges))
 	for _, r := range ranges {
 		if cached, ok := c.cached[userID][r]; ok {

@@ -238,3 +238,28 @@ func (b *SafeSlabPool[T]) Get(size int) []T {
 
 	return b.wrapped.Get(size)
 }
+
+type SafeSlabPoolAllocator struct {
+	pool *SafeSlabPool[byte]
+}
+
+// NewSafeSlabPoolAllocator wraps the input SafeSlabPool[byte] into an allocator suitable to be used with
+// a cache client. This function returns nil if the input SafeSlabPool[byte] is nil.
+func NewSafeSlabPoolAllocator(pool *SafeSlabPool[byte]) *SafeSlabPoolAllocator {
+	if pool == nil {
+		return nil
+	}
+
+	return &SafeSlabPoolAllocator{
+		pool: pool,
+	}
+}
+
+func (a *SafeSlabPoolAllocator) Get(sz int) *[]byte {
+	b := a.pool.Get(sz)
+	return &b
+}
+
+func (a *SafeSlabPoolAllocator) Put(_ *[]byte) {
+	// no-op
+}