diff --git a/CHANGELOG.md b/CHANGELOG.md index 0749086679cb..c4ab9cf64280 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ CHANGES: +* storage/raft: The storage configuration now accepts a new `max_entry_size` config that will limit + the total size in bytes of any entry committed via raft. It defaults to `"1048576"` (1MiB). [[GH-9027](https://github.com/hashicorp/vault/pull/9027)] * token: Token creation with custom token ID via `id` will no longer allow periods (`.`) as part of the input string. The final generated token value may contain periods, such as the `s.` prefix for service token indication. [[GH-8646](https://github.com/hashicorp/vault/pull/8646/files)] diff --git a/physical/raft/raft.go b/physical/raft/raft.go index 12afd5316bad..c8535ea7cd23 100644 --- a/physical/raft/raft.go +++ b/physical/raft/raft.go @@ -52,6 +52,8 @@ var ( snapshotsRetained = 2 restoreOpDelayDuration = 5 * time.Second + + defaultMaxEntrySize = uint64(2 * raftchunking.ChunkSize) ) // RaftBackend implements the backend interfaces and uses the raft protocol to @@ -107,6 +109,11 @@ type RaftBackend struct { // permitPool is used to limit the number of concurrent storage calls. permitPool *physical.PermitPool + + // maxEntrySize imposes a size limit (in bytes) on a raft entry (put or transaction). + // It is suggested to use a value of 2x the Raft chunking size for optimal + // performance. + maxEntrySize uint64 } // LeaderJoinInfo contains information required by a node to join itself as a @@ -226,6 +233,7 @@ func NewRaftBackend(conf map[string]string, logger log.Logger) (physical.Backend var log raft.LogStore var stable raft.StableStore var snap raft.SnapshotStore + var devMode bool if devMode { store := raft.NewInmemStore() @@ -303,16 +311,27 @@ func NewRaftBackend(conf map[string]string, logger log.Logger) (physical.Backend } } + maxEntrySize := defaultMaxEntrySize + if maxEntrySizeCfg := conf["max_entry_size"]; len(maxEntrySizeCfg) != 0 { + i, err := strconv.Atoi(maxEntrySizeCfg) + if err != nil { + return nil, fmt.Errorf("failed to parse 'max_entry_size': %w", err) + } + + maxEntrySize = uint64(i) + } + return &RaftBackend{ - logger: logger, - fsm: fsm, - conf: conf, - logStore: log, - stableStore: stable, - snapStore: snap, - dataDir: path, - localID: localID, - permitPool: physical.NewPermitPool(physical.DefaultParallelOperations), + logger: logger, + fsm: fsm, + conf: conf, + logStore: log, + stableStore: stable, + snapStore: snap, + dataDir: path, + localID: localID, + permitPool: physical.NewPermitPool(physical.DefaultParallelOperations), + maxEntrySize: maxEntrySize, }, nil } @@ -925,10 +944,23 @@ func (b *RaftBackend) Get(ctx context.Context, path string) (*physical.Entry, er b.permitPool.Acquire() defer b.permitPool.Release() - return b.fsm.Get(ctx, path) + entry, err := b.fsm.Get(ctx, path) + if entry != nil { + valueLen := len(entry.Value) + if uint64(valueLen) > b.maxEntrySize { + b.logger.Warn( + "retrieved entry value is too large; see https://www.vaultproject.io/docs/configuration/storage/raft#raft-parameters", + "size", valueLen, "suggested", b.maxEntrySize, + ) + } + } + + return entry, err } -// Put inserts an entry in the log for the put operation +// Put inserts an entry in the log for the put operation. It will return an +// error if the resulting entry encoding exceeds the configured max_entry_size +// or if the call to applyLog fails. func (b *RaftBackend) Put(ctx context.Context, entry *physical.Entry) error { defer metrics.MeasureSince([]string{"raft-storage", "put"}, time.Now()) command := &LogData{ @@ -1009,6 +1041,13 @@ func (b *RaftBackend) applyLog(ctx context.Context, command *LogData) error { return err } + cmdSize := len(commandBytes) + if uint64(cmdSize) > b.maxEntrySize { + return fmt.Errorf("%s; got %d bytes, max: %d bytes", physical.ErrValueTooLarge, cmdSize, b.maxEntrySize) + } + + defer metrics.AddSample([]string{"raft-storage", "entry_size"}, float32(cmdSize)) + var chunked bool var applyFuture raft.ApplyFuture switch { diff --git a/physical/raft/raft_test.go b/physical/raft/raft_test.go index 930e46a40d07..b1c85395e0ca 100644 --- a/physical/raft/raft_test.go +++ b/physical/raft/raft_test.go @@ -4,11 +4,13 @@ import ( "context" "crypto/md5" "encoding/base64" - fmt "fmt" + "fmt" "io" "io/ioutil" + "math/rand" "os" "path/filepath" + "strings" "testing" "time" @@ -203,6 +205,67 @@ func TestRaft_Backend(t *testing.T) { physical.ExerciseBackend(t, b) } +func TestRaft_Backend_LargeValue(t *testing.T) { + b, dir := getRaft(t, true, true) + defer os.RemoveAll(dir) + + value := make([]byte, defaultMaxEntrySize+1) + rand.Read(value) + entry := &physical.Entry{Key: "foo", Value: value} + + err := b.Put(context.Background(), entry) + if err == nil { + t.Fatal("expected error for put entry") + } + + if !strings.Contains(err.Error(), physical.ErrValueTooLarge) { + t.Fatalf("expected %q, got %v", physical.ErrValueTooLarge, err) + } + + out, err := b.Get(context.Background(), entry.Key) + if err != nil { + t.Fatalf("unexpected error after failed put: %v", err) + } + if out != nil { + t.Fatal("expected response entry to be nil after a failed put") + } +} + +func TestRaft_TransactionalBackend_LargeValue(t *testing.T) { + b, dir := getRaft(t, true, true) + defer os.RemoveAll(dir) + + value := make([]byte, defaultMaxEntrySize+1) + rand.Read(value) + + txns := []*physical.TxnEntry{ + &physical.TxnEntry{ + Operation: physical.PutOperation, + Entry: &physical.Entry{ + Key: "foo", + Value: value, + }, + }, + } + + err := b.Transaction(context.Background(), txns) + if err == nil { + t.Fatal("expected error for transactions") + } + + if !strings.Contains(err.Error(), physical.ErrValueTooLarge) { + t.Fatalf("expected %q, got %v", physical.ErrValueTooLarge, err) + } + + out, err := b.Get(context.Background(), txns[0].Entry.Key) + if err != nil { + t.Fatalf("unexpected error after failed put: %v", err) + } + if out != nil { + t.Fatal("expected response entry to be nil after a failed put") + } +} + func TestRaft_Backend_ListPrefix(t *testing.T) { b, dir := getRaft(t, true, true) defer os.RemoveAll(dir) diff --git a/website/pages/docs/configuration/storage/raft.mdx b/website/pages/docs/configuration/storage/raft.mdx index e3bad53b8edb..b16b130c6611 100644 --- a/website/pages/docs/configuration/storage/raft.mdx +++ b/website/pages/docs/configuration/storage/raft.mdx @@ -94,6 +94,16 @@ time. To use Raft for HA coordination users must also use Raft for storage. still need to be unsealed manually. See the section below that describes the parameters accepted by the `retry_join` stanza. +- `max_entry_size` `(integer: 1048576)` - This configures the maximum number of + bytes for a raft entry. It applies to both Put operations and transactions. + Any put or transaction operation exceeding this configuration value will cause + the respective operation to fail. Raft has a suggested max size of data in a + raft log entry. This is based on current architecture, default timing, etc. + Integrated storage also uses a chunk size that is the threshold used for + breaking a large value into chunks. By default, the chunk size is the same as + raft's max size log entry. The default value for this configuration is 1048576 + -- two times the chunking size. + ### `retry_join` stanza - `leader_api_addr` `(string: "")` - Address of a possible leader node. diff --git a/website/pages/docs/internals/telemetry.mdx b/website/pages/docs/internals/telemetry.mdx index 393022f09323..66d182087801 100644 --- a/website/pages/docs/internals/telemetry.mdx +++ b/website/pages/docs/internals/telemetry.mdx @@ -374,6 +374,7 @@ These metrics relate to raft based [integrated storage][integrated-storage]. | `vault.raft-storage.put` | Time to insert log entry to persist path. | ms | timer | | `vault.raft-storage.list` | Time to list all entries under the prefix from the FSM. | ms | timer | | `vault.raft-storage.transaction` | Time to insert operations into a single log. | ms | timer | +| `vault.raft-storage.entry_size` | The total size of a Raft entry during log application in bytes. | bytes | sample | ## Integrated Raft Storage Leadership Changes