Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

streaming store-gateway: avoid copying postings when hashing them #3779

Merged
merged 9 commits into from
Dec 20, 2022
32 changes: 15 additions & 17 deletions pkg/storegateway/indexcache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ package indexcache

import (
"context"
"crypto/sha1"
"encoding/base64"
"reflect"
"sort"
"strings"
"unsafe"

"github.com/oklog/ulid"
"github.com/prometheus/client_golang/prometheus"
Expand Down Expand Up @@ -91,23 +92,20 @@ type PostingsKey string

// CanonicalPostingsKey creates a canonical version of PostingsKey
func CanonicalPostingsKey(postings []storage.SeriesRef) PostingsKey {
hashable := make([]byte, len(postings)*8)
for i, posting := range postings {
for octet := 0; octet < 8; octet++ {
hashable[i+octet] = byte(posting >> (octet * 8))
}
}

// We hash the postings list twice to minimize the chance of collisions
hasher1, _ := blake2b.New256(nil) // This will never return an error
hasher2 := sha1.New()

_, _ = hasher1.Write(hashable)
_, _ = hasher2.Write(hashable)

checksum := hasher2.Sum(hasher1.Sum(nil))
hashable := unsafeCastPostingsToBytes(postings)
dimitarvdimitrov marked this conversation as resolved.
Show resolved Hide resolved
checksum := blake2b.Sum256(hashable)
return PostingsKey(base64.RawURLEncoding.EncodeToString(checksum[:]))
}

return PostingsKey(base64.RawURLEncoding.EncodeToString(checksum))
// unsafeCastPostingsToBytes returns the postings as a slice of bytes with minimal allocations.
// It casts the memory region of the underlying array to a slice of bytes.
dimitarvdimitrov marked this conversation as resolved.
Show resolved Hide resolved
func unsafeCastPostingsToBytes(postings []storage.SeriesRef) []byte {
byteSlice := make([]byte, 0)
slicePtr := (*reflect.SliceHeader)(unsafe.Pointer(&byteSlice))
56quarters marked this conversation as resolved.
Show resolved Hide resolved
slicePtr.Data = (*reflect.SliceHeader)(unsafe.Pointer(&postings)).Data
slicePtr.Len = len(postings) * 8
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nit, non blocking] Instead of hardcoding 8, can we read the size of storage.SeriesRef instead?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah i thought about that but the tradeoff was using the unsafe package more 😄

i did the change in e57efad, see what you think

slicePtr.Cap = slicePtr.Len
return byteSlice
}

// LabelMatchersKey represents a canonical key for a []*matchers.Matchers slice
Expand Down
31 changes: 31 additions & 0 deletions pkg/storegateway/indexcache/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,34 @@ func BenchmarkCanonicalPostingsKey(b *testing.B) {
})
}
}

func TestUnsafeCastPostingsToBytes(t *testing.T) {
slowPostingsToBytes := func(postings []storage.SeriesRef) []byte {
byteSlice := make([]byte, len(postings)*8)
for i, posting := range postings {
for octet := 0; octet < 8; octet++ {
byteSlice[i*8+octet] = byte(posting >> (octet * 8))
}
}
return byteSlice
}
t.Run("base case", func(t *testing.T) {
postings := []storage.SeriesRef{1, 2}
assert.Equal(t, slowPostingsToBytes(postings), unsafeCastPostingsToBytes(postings))
})
t.Run("zero-length postings", func(t *testing.T) {
postings := make([]storage.SeriesRef, 0)
assert.Equal(t, slowPostingsToBytes(postings), unsafeCastPostingsToBytes(postings))
})
t.Run("nil postings", func(t *testing.T) {
assert.Equal(t, []byte(nil), unsafeCastPostingsToBytes(nil))
})
t.Run("more than 256 postings", func(t *testing.T) {
// Only casting a slice pointer truncates all postings to only their last byte.
postings := make([]storage.SeriesRef, 300)
for i := range postings {
postings[i] = storage.SeriesRef(i + 1)
}
assert.Equal(t, slowPostingsToBytes(postings), unsafeCastPostingsToBytes(postings))
})
}