From e65955b351c35ccb59741317794eac56505aa604 Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Tue, 31 Jan 2023 09:46:55 -0500 Subject: [PATCH 1/2] Copy chunk bytes in TSDB store before sending to client During head compaction mmaped memory gets released while gRPC is marshaling bytes from that same memory region. This leads to a fatal segfault and crashes the receiver. The segfault happens when marshaling chunks specifically. This commit modifies the TSDB store server to copy chunk bytes before sending them to the client. I tried running this for a while and saw no significant increase in memory usage. Signed-off-by: Filip Petkovski --- pkg/store/tsdb.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/store/tsdb.go b/pkg/store/tsdb.go index 1d3a687915..00d21b5919 100644 --- a/pkg/store/tsdb.go +++ b/pkg/store/tsdb.go @@ -202,13 +202,15 @@ func (s *TSDBStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesSer return status.Errorf(codes.Internal, "TSDBStore: found not populated chunk returned by SeriesSet at ref: %v", chk.Ref) } + chunkBytes := make([]byte, len(chk.Chunk.Bytes())) + copy(chunkBytes, chk.Chunk.Bytes()) c := storepb.AggrChunk{ MinTime: chk.MinTime, MaxTime: chk.MaxTime, Raw: &storepb.Chunk{ Type: storepb.Chunk_Encoding(chk.Chunk.Encoding() - 1), // Proto chunk encoding is one off to TSDB one. - Data: chk.Chunk.Bytes(), - Hash: hashChunk(hasher, chk.Chunk.Bytes(), enableChunkHashCalculation), + Data: chunkBytes, + Hash: hashChunk(hasher, chunkBytes, enableChunkHashCalculation), }, } frameBytesLeft -= c.Size() From 681a17a5444f47ddd9b63669ffc7444416658fde Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Fri, 10 Mar 2023 09:11:07 +0100 Subject: [PATCH 2/2] Add CHANGELOG entry Signed-off-by: Filip Petkovski --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 01d3f368ae..479f7f78a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re ### Fixed +- [#6203](https://github.com/thanos-io/thanos/pull/6203) Receive: Fix panic in head compaction under high query load. + ### Changed ### Removed